StatePropagatorDataGpu object to manage GPU forces, positions and velocities buffers
[gromacs.git] / src / gromacs / domdec / domdec.cpp
blobf3365e11a37900fbfe3cf8e79b62d2f310461bb4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #include "gmxpre.h"
38 #include "domdec.h"
40 #include "config.h"
42 #include <cassert>
43 #include <cinttypes>
44 #include <climits>
45 #include <cmath>
46 #include <cstdio>
47 #include <cstdlib>
48 #include <cstring>
50 #include <algorithm>
51 #include <memory>
53 #include "gromacs/domdec/builder.h"
54 #include "gromacs/domdec/collect.h"
55 #include "gromacs/domdec/dlb.h"
56 #include "gromacs/domdec/dlbtiming.h"
57 #include "gromacs/domdec/domdec_network.h"
58 #include "gromacs/domdec/ga2la.h"
59 #include "gromacs/domdec/gpuhaloexchange.h"
60 #include "gromacs/domdec/options.h"
61 #include "gromacs/domdec/partition.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/listed_forces/manage_threading.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/math/vectypes.h"
69 #include "gromacs/mdlib/calc_verletbuf.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/updategroups.h"
73 #include "gromacs/mdlib/vsite.h"
74 #include "gromacs/mdtypes/commrec.h"
75 #include "gromacs/mdtypes/forceoutput.h"
76 #include "gromacs/mdtypes/inputrec.h"
77 #include "gromacs/mdtypes/mdrunoptions.h"
78 #include "gromacs/mdtypes/state.h"
79 #include "gromacs/pbcutil/ishift.h"
80 #include "gromacs/pbcutil/pbc.h"
81 #include "gromacs/pulling/pull.h"
82 #include "gromacs/timing/wallcycle.h"
83 #include "gromacs/topology/block.h"
84 #include "gromacs/topology/idef.h"
85 #include "gromacs/topology/ifunc.h"
86 #include "gromacs/topology/mtop_lookup.h"
87 #include "gromacs/topology/mtop_util.h"
88 #include "gromacs/topology/topology.h"
89 #include "gromacs/utility/basedefinitions.h"
90 #include "gromacs/utility/basenetwork.h"
91 #include "gromacs/utility/cstringutil.h"
92 #include "gromacs/utility/exceptions.h"
93 #include "gromacs/utility/fatalerror.h"
94 #include "gromacs/utility/gmxmpi.h"
95 #include "gromacs/utility/logger.h"
96 #include "gromacs/utility/real.h"
97 #include "gromacs/utility/smalloc.h"
98 #include "gromacs/utility/strconvert.h"
99 #include "gromacs/utility/stringstream.h"
100 #include "gromacs/utility/stringutil.h"
101 #include "gromacs/utility/textwriter.h"
103 #include "atomdistribution.h"
104 #include "box.h"
105 #include "cellsizes.h"
106 #include "distribute.h"
107 #include "domdec_constraints.h"
108 #include "domdec_internal.h"
109 #include "domdec_setup.h"
110 #include "domdec_vsite.h"
111 #include "redistribute.h"
112 #include "utility.h"
114 // TODO remove this when moving domdec into gmx namespace
115 using gmx::DdRankOrder;
116 using gmx::DlbOption;
117 using gmx::DomdecOptions;
119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
122 #define DD_CGIBS 2
124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_FLAG_NRCG 65535
126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
129 /* The DD zone order */
130 static const ivec dd_zo[DD_MAXZONE] =
131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
133 /* The non-bonded zone-pair setup for domain decomposition
134 * The first number is the i-zone, the second number the first j-zone seen by
135 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
136 * As is, this is for 3D decomposition, where there are 4 i-zones.
137 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
138 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
140 static const int
141 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
142 {1, 3, 6},
143 {2, 5, 6},
144 {3, 5, 7}};
148 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
150 static void index2xyz(ivec nc,int ind,ivec xyz)
152 xyz[XX] = ind % nc[XX];
153 xyz[YY] = (ind / nc[XX]) % nc[YY];
154 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
160 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
161 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
162 xyz[ZZ] = ind % nc[ZZ];
165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
167 int ddnodeid = -1;
169 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
170 const int ddindex = dd_index(dd->nc, c);
171 if (cartSetup.bCartesianPP_PME)
173 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
175 else if (cartSetup.bCartesianPP)
177 #if GMX_MPI
178 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
179 #endif
181 else
183 ddnodeid = ddindex;
186 return ddnodeid;
189 int ddglatnr(const gmx_domdec_t *dd, int i)
191 int atnr;
193 if (dd == nullptr)
195 atnr = i + 1;
197 else
199 if (i >= dd->comm->atomRanges.numAtomsTotal())
201 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
203 atnr = dd->globalAtomIndices[i] + 1;
206 return atnr;
209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
211 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
212 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
217 int i;
219 if (state->ddp_count != dd->ddp_count)
221 gmx_incons("The MD state does not match the domain decomposition state");
224 state->cg_gl.resize(dd->ncg_home);
225 for (i = 0; i < dd->ncg_home; i++)
227 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
230 state->ddp_count_cg_gl = dd->ddp_count;
233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
235 return &dd->comm->zones;
238 int dd_numAtomsZones(const gmx_domdec_t &dd)
240 return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
243 int dd_numHomeAtoms(const gmx_domdec_t &dd)
245 return dd.comm->atomRanges.numHomeAtoms();
248 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
250 /* We currently set mdatoms entries for all atoms:
251 * local + non-local + communicated for vsite + constraints
254 return dd->comm->atomRanges.numAtomsTotal();
257 int dd_natoms_vsite(const gmx_domdec_t *dd)
259 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
262 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
264 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
265 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
268 void dd_move_x(gmx_domdec_t *dd,
269 const matrix box,
270 gmx::ArrayRef<gmx::RVec> x,
271 gmx_wallcycle *wcycle)
273 wallcycle_start(wcycle, ewcMOVEX);
275 int nzone, nat_tot;
276 gmx_domdec_comm_t *comm;
277 gmx_domdec_comm_dim_t *cd;
278 rvec shift = {0, 0, 0};
279 gmx_bool bPBC, bScrew;
281 comm = dd->comm;
283 nzone = 1;
284 nat_tot = comm->atomRanges.numHomeAtoms();
285 for (int d = 0; d < dd->ndim; d++)
287 bPBC = (dd->ci[dd->dim[d]] == 0);
288 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
289 if (bPBC)
291 copy_rvec(box[dd->dim[d]], shift);
293 cd = &comm->cd[d];
294 for (const gmx_domdec_ind_t &ind : cd->ind)
296 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
297 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
298 int n = 0;
299 if (!bPBC)
301 for (int j : ind.index)
303 sendBuffer[n] = x[j];
304 n++;
307 else if (!bScrew)
309 for (int j : ind.index)
311 /* We need to shift the coordinates */
312 for (int d = 0; d < DIM; d++)
314 sendBuffer[n][d] = x[j][d] + shift[d];
316 n++;
319 else
321 for (int j : ind.index)
323 /* Shift x */
324 sendBuffer[n][XX] = x[j][XX] + shift[XX];
325 /* Rotate y and z.
326 * This operation requires a special shift force
327 * treatment, which is performed in calc_vir.
329 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
330 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
331 n++;
335 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
337 gmx::ArrayRef<gmx::RVec> receiveBuffer;
338 if (cd->receiveInPlace)
340 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
342 else
344 receiveBuffer = receiveBufferAccess.buffer;
346 /* Send and receive the coordinates */
347 ddSendrecv(dd, d, dddirBackward,
348 sendBuffer, receiveBuffer);
350 if (!cd->receiveInPlace)
352 int j = 0;
353 for (int zone = 0; zone < nzone; zone++)
355 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
357 x[i] = receiveBuffer[j++];
361 nat_tot += ind.nrecv[nzone+1];
363 nzone += nzone;
366 wallcycle_stop(wcycle, ewcMOVEX);
369 void dd_move_f(gmx_domdec_t *dd,
370 gmx::ForceWithShiftForces *forceWithShiftForces,
371 gmx_wallcycle *wcycle)
373 wallcycle_start(wcycle, ewcMOVEF);
375 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
376 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
378 gmx_domdec_comm_t &comm = *dd->comm;
379 int nzone = comm.zones.n/2;
380 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
381 for (int d = dd->ndim-1; d >= 0; d--)
383 /* Only forces in domains near the PBC boundaries need to
384 consider PBC in the treatment of fshift */
385 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
386 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
387 /* Determine which shift vector we need */
388 ivec vis = { 0, 0, 0 };
389 vis[dd->dim[d]] = 1;
390 const int is = IVEC2IS(vis);
392 /* Loop over the pulses */
393 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
394 for (int p = cd.numPulses() - 1; p >= 0; p--)
396 const gmx_domdec_ind_t &ind = cd.ind[p];
397 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
398 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
400 nat_tot -= ind.nrecv[nzone+1];
402 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
404 gmx::ArrayRef<gmx::RVec> sendBuffer;
405 if (cd.receiveInPlace)
407 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
409 else
411 sendBuffer = sendBufferAccess.buffer;
412 int j = 0;
413 for (int zone = 0; zone < nzone; zone++)
415 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
417 sendBuffer[j++] = f[i];
421 /* Communicate the forces */
422 ddSendrecv(dd, d, dddirForward,
423 sendBuffer, receiveBuffer);
424 /* Add the received forces */
425 int n = 0;
426 if (!shiftForcesNeedPbc)
428 for (int j : ind.index)
430 for (int d = 0; d < DIM; d++)
432 f[j][d] += receiveBuffer[n][d];
434 n++;
437 else if (!applyScrewPbc)
439 for (int j : ind.index)
441 for (int d = 0; d < DIM; d++)
443 f[j][d] += receiveBuffer[n][d];
445 /* Add this force to the shift force */
446 for (int d = 0; d < DIM; d++)
448 fshift[is][d] += receiveBuffer[n][d];
450 n++;
453 else
455 for (int j : ind.index)
457 /* Rotate the force */
458 f[j][XX] += receiveBuffer[n][XX];
459 f[j][YY] -= receiveBuffer[n][YY];
460 f[j][ZZ] -= receiveBuffer[n][ZZ];
461 if (shiftForcesNeedPbc)
463 /* Add this force to the shift force */
464 for (int d = 0; d < DIM; d++)
466 fshift[is][d] += receiveBuffer[n][d];
469 n++;
473 nzone /= 2;
475 wallcycle_stop(wcycle, ewcMOVEF);
478 /* Convenience function for extracting a real buffer from an rvec buffer
480 * To reduce the number of temporary communication buffers and avoid
481 * cache polution, we reuse gmx::RVec buffers for storing reals.
482 * This functions return a real buffer reference with the same number
483 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
485 static gmx::ArrayRef<real>
486 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
488 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
489 arrayRef.size());
492 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
494 int nzone, nat_tot;
495 gmx_domdec_comm_t *comm;
496 gmx_domdec_comm_dim_t *cd;
498 comm = dd->comm;
500 nzone = 1;
501 nat_tot = comm->atomRanges.numHomeAtoms();
502 for (int d = 0; d < dd->ndim; d++)
504 cd = &comm->cd[d];
505 for (const gmx_domdec_ind_t &ind : cd->ind)
507 /* Note: We provision for RVec instead of real, so a factor of 3
508 * more than needed. The buffer actually already has this size
509 * and we pass a plain pointer below, so this does not matter.
511 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
512 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
513 int n = 0;
514 for (int j : ind.index)
516 sendBuffer[n++] = v[j];
519 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
521 gmx::ArrayRef<real> receiveBuffer;
522 if (cd->receiveInPlace)
524 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
526 else
528 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
530 /* Send and receive the data */
531 ddSendrecv(dd, d, dddirBackward,
532 sendBuffer, receiveBuffer);
533 if (!cd->receiveInPlace)
535 int j = 0;
536 for (int zone = 0; zone < nzone; zone++)
538 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
540 v[i] = receiveBuffer[j++];
544 nat_tot += ind.nrecv[nzone+1];
546 nzone += nzone;
550 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
552 int nzone, nat_tot;
553 gmx_domdec_comm_t *comm;
554 gmx_domdec_comm_dim_t *cd;
556 comm = dd->comm;
558 nzone = comm->zones.n/2;
559 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
560 for (int d = dd->ndim-1; d >= 0; d--)
562 cd = &comm->cd[d];
563 for (int p = cd->numPulses() - 1; p >= 0; p--)
565 const gmx_domdec_ind_t &ind = cd->ind[p];
567 /* Note: We provision for RVec instead of real, so a factor of 3
568 * more than needed. The buffer actually already has this size
569 * and we typecast, so this works as intended.
571 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
572 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
573 nat_tot -= ind.nrecv[nzone + 1];
575 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
577 gmx::ArrayRef<real> sendBuffer;
578 if (cd->receiveInPlace)
580 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
582 else
584 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
585 int j = 0;
586 for (int zone = 0; zone < nzone; zone++)
588 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
590 sendBuffer[j++] = v[i];
594 /* Communicate the forces */
595 ddSendrecv(dd, d, dddirForward,
596 sendBuffer, receiveBuffer);
597 /* Add the received forces */
598 int n = 0;
599 for (int j : ind.index)
601 v[j] += receiveBuffer[n];
602 n++;
605 nzone /= 2;
609 real dd_cutoff_multibody(const gmx_domdec_t *dd)
611 const gmx_domdec_comm_t &comm = *dd->comm;
612 const DDSystemInfo &systemInfo = comm.systemInfo;
614 real r = -1;
615 if (systemInfo.haveInterDomainMultiBodyBondeds)
617 if (comm.cutoff_mbody > 0)
619 r = comm.cutoff_mbody;
621 else
623 /* cutoff_mbody=0 means we do not have DLB */
624 r = comm.cellsize_min[dd->dim[0]];
625 for (int di = 1; di < dd->ndim; di++)
627 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
629 if (comm.systemInfo.filterBondedCommunication)
631 r = std::max(r, comm.cutoff_mbody);
633 else
635 r = std::min(r, systemInfo.cutoff);
640 return r;
643 real dd_cutoff_twobody(const gmx_domdec_t *dd)
645 real r_mb;
647 r_mb = dd_cutoff_multibody(dd);
649 return std::max(dd->comm->systemInfo.cutoff, r_mb);
653 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
654 const CartesianRankSetup &cartSetup,
655 const ivec coord,
656 ivec coord_pme)
658 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
659 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
660 copy_ivec(coord, coord_pme);
661 coord_pme[cartSetup.cartpmedim] =
662 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
665 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
666 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
667 const int ddCellIndex)
669 const int npp = ddRankSetup.numPPRanks;
670 const int npme = ddRankSetup.numRanksDoingPme;
672 /* Here we assign a PME node to communicate with this DD node
673 * by assuming that the major index of both is x.
674 * We add npme/2 to obtain an even distribution.
676 return (ddCellIndex*npme + npme/2)/npp;
679 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
681 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
683 int n = 0;
684 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
686 const int p0 = ddindex2pmeindex(ddRankSetup, i);
687 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
688 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
690 if (debug)
692 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
694 pmeRanks[n] = i + 1 + n;
695 n++;
699 return pmeRanks;
702 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
704 gmx_domdec_t *dd;
705 ivec coords;
706 int slab;
708 dd = cr->dd;
710 if (dd->comm->bCartesian) {
711 gmx_ddindex2xyz(dd->nc,ddindex,coords);
712 dd_coords2pmecoords(dd,coords,coords_pme);
713 copy_ivec(dd->ntot,nc);
714 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
715 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
717 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
718 } else {
719 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
722 coords[XX] = x;
723 coords[YY] = y;
724 coords[ZZ] = z;
725 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
727 return slab;
730 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
732 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
733 ivec coords = { x, y, z };
734 int nodeid = -1;
736 if (cartSetup.bCartesianPP_PME)
738 #if GMX_MPI
739 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
740 #endif
742 else
744 int ddindex = dd_index(cr->dd->nc, coords);
745 if (cartSetup.bCartesianPP)
747 nodeid = cartSetup.ddindex2simnodeid[ddindex];
749 else
751 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
753 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
755 else
757 nodeid = ddindex;
762 return nodeid;
765 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
766 const CartesianRankSetup &cartSetup,
767 gmx::ArrayRef<const int> pmeRanks,
768 const t_commrec gmx_unused *cr,
769 const int sim_nodeid)
771 int pmenode = -1;
773 /* This assumes a uniform x domain decomposition grid cell size */
774 if (cartSetup.bCartesianPP_PME)
776 #if GMX_MPI
777 ivec coord, coord_pme;
778 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
779 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
781 /* This is a PP rank */
782 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
783 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
785 #endif
787 else if (cartSetup.bCartesianPP)
789 if (sim_nodeid < ddRankSetup.numPPRanks)
791 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
794 else
796 /* This assumes DD cells with identical x coordinates
797 * are numbered sequentially.
799 if (pmeRanks.empty())
801 if (sim_nodeid < ddRankSetup.numPPRanks)
803 /* The DD index equals the nodeid */
804 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
807 else
809 int i = 0;
810 while (sim_nodeid > pmeRanks[i])
812 i++;
814 if (sim_nodeid < pmeRanks[i])
816 pmenode = pmeRanks[i];
821 return pmenode;
824 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
826 if (dd != nullptr)
828 return {
829 dd->comm->ddRankSetup.npmenodes_x,
830 dd->comm->ddRankSetup.npmenodes_y
833 else
835 return {
836 1, 1
841 std::vector<int> get_pme_ddranks(const t_commrec *cr,
842 const int pmenodeid)
844 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
845 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
846 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
847 std::vector<int> ddranks;
848 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
850 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
852 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
854 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
856 if (cartSetup.bCartesianPP_PME)
858 ivec coord = { x, y, z };
859 ivec coord_pme;
860 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
861 if (cr->dd->ci[XX] == coord_pme[XX] &&
862 cr->dd->ci[YY] == coord_pme[YY] &&
863 cr->dd->ci[ZZ] == coord_pme[ZZ])
865 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
868 else
870 /* The slab corresponds to the nodeid in the PME group */
871 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
873 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
879 return ddranks;
882 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
883 gmx::ArrayRef<const int> pmeRanks,
884 const t_commrec *cr)
886 gmx_bool bReceive = TRUE;
888 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
889 if (ddRankSetup.usePmeOnlyRanks)
891 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
892 if (cartSetup.bCartesianPP_PME)
894 #if GMX_MPI
895 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
896 ivec coords;
897 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
898 coords[cartSetup.cartpmedim]++;
899 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
901 int rank;
902 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
903 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
905 /* This is not the last PP node for pmenode */
906 bReceive = FALSE;
909 #else
910 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
911 #endif
913 else
915 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
916 if (cr->sim_nodeid+1 < cr->nnodes &&
917 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
919 /* This is not the last PP node for pmenode */
920 bReceive = FALSE;
925 return bReceive;
928 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
930 gmx_domdec_comm_t *comm;
931 int i;
933 comm = dd->comm;
935 snew(*dim_f, dd->nc[dim]+1);
936 (*dim_f)[0] = 0;
937 for (i = 1; i < dd->nc[dim]; i++)
939 if (comm->slb_frac[dim])
941 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
943 else
945 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
948 (*dim_f)[dd->nc[dim]] = 1;
951 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
953 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
955 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
957 ddpme->dim = YY;
959 else
961 ddpme->dim = dimind;
963 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
965 ddpme->nslab = (ddpme->dim == 0 ?
966 ddRankSetup.npmenodes_x :
967 ddRankSetup.npmenodes_y);
969 if (ddpme->nslab <= 1)
971 return;
974 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
975 /* Determine for each PME slab the PP location range for dimension dim */
976 snew(ddpme->pp_min, ddpme->nslab);
977 snew(ddpme->pp_max, ddpme->nslab);
978 for (int slab = 0; slab < ddpme->nslab; slab++)
980 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
981 ddpme->pp_max[slab] = 0;
983 for (int i = 0; i < dd->nnodes; i++)
985 ivec xyz;
986 ddindex2xyz(dd->nc, i, xyz);
987 /* For y only use our y/z slab.
988 * This assumes that the PME x grid size matches the DD grid size.
990 if (dimind == 0 || xyz[XX] == dd->ci[XX])
992 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
993 int slab;
994 if (dimind == 0)
996 slab = pmeindex/nso;
998 else
1000 slab = pmeindex % ddpme->nslab;
1002 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1003 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1007 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1010 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1012 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1014 if (ddRankSetup.ddpme[0].dim == XX)
1016 return ddRankSetup.ddpme[0].maxshift;
1018 else
1020 return 0;
1024 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1026 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1028 if (ddRankSetup.ddpme[0].dim == YY)
1030 return ddRankSetup.ddpme[0].maxshift;
1032 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1034 return ddRankSetup.ddpme[1].maxshift;
1036 else
1038 return 0;
1042 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1044 return dd.comm->systemInfo.haveSplitConstraints;
1047 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1049 /* Note that the cycles value can be incorrect, either 0 or some
1050 * extremely large value, when our thread migrated to another core
1051 * with an unsynchronized cycle counter. If this happens less often
1052 * that once per nstlist steps, this will not cause issues, since
1053 * we later subtract the maximum value from the sum over nstlist steps.
1054 * A zero count will slightly lower the total, but that's a small effect.
1055 * Note that the main purpose of the subtraction of the maximum value
1056 * is to avoid throwing off the load balancing when stalls occur due
1057 * e.g. system activity or network congestion.
1059 dd->comm->cycl[ddCycl] += cycles;
1060 dd->comm->cycl_n[ddCycl]++;
1061 if (cycles > dd->comm->cycl_max[ddCycl])
1063 dd->comm->cycl_max[ddCycl] = cycles;
1067 #if GMX_MPI
1068 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1070 MPI_Comm c_row;
1071 int dim, i, rank;
1072 ivec loc_c;
1073 gmx_bool bPartOfGroup = FALSE;
1075 dim = dd->dim[dim_ind];
1076 copy_ivec(loc, loc_c);
1077 for (i = 0; i < dd->nc[dim]; i++)
1079 loc_c[dim] = i;
1080 rank = dd_index(dd->nc, loc_c);
1081 if (rank == dd->rank)
1083 /* This process is part of the group */
1084 bPartOfGroup = TRUE;
1087 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1088 &c_row);
1089 if (bPartOfGroup)
1091 dd->comm->mpi_comm_load[dim_ind] = c_row;
1092 if (!isDlbDisabled(dd->comm))
1094 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1096 if (dd->ci[dim] == dd->master_ci[dim])
1098 /* This is the root process of this row */
1099 cellsizes.rowMaster = std::make_unique<RowMaster>();
1101 RowMaster &rowMaster = *cellsizes.rowMaster;
1102 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1103 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1104 rowMaster.isCellMin.resize(dd->nc[dim]);
1105 if (dim_ind > 0)
1107 rowMaster.bounds.resize(dd->nc[dim]);
1109 rowMaster.buf_ncd.resize(dd->nc[dim]);
1111 else
1113 /* This is not a root process, we only need to receive cell_f */
1114 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1117 if (dd->ci[dim] == dd->master_ci[dim])
1119 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1123 #endif
1125 void dd_setup_dlb_resource_sharing(const t_commrec *cr,
1126 int gpu_id)
1128 #if GMX_MPI
1129 int physicalnode_id_hash;
1130 gmx_domdec_t *dd;
1131 MPI_Comm mpi_comm_pp_physicalnode;
1133 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1135 /* Only ranks with short-ranged tasks (currently) use GPUs.
1136 * If we don't have GPUs assigned, there are no resources to share.
1138 return;
1141 physicalnode_id_hash = gmx_physicalnode_id_hash();
1143 dd = cr->dd;
1145 if (debug)
1147 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1148 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1149 dd->rank, physicalnode_id_hash, gpu_id);
1151 /* Split the PP communicator over the physical nodes */
1152 /* TODO: See if we should store this (before), as it's also used for
1153 * for the nodecomm summation.
1155 // TODO PhysicalNodeCommunicator could be extended/used to handle
1156 // the need for per-node per-group communicators.
1157 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1158 &mpi_comm_pp_physicalnode);
1159 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1160 &dd->comm->mpi_comm_gpu_shared);
1161 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1162 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1164 if (debug)
1166 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1169 /* Note that some ranks could share a GPU, while others don't */
1171 if (dd->comm->nrank_gpu_shared == 1)
1173 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1175 #else
1176 GMX_UNUSED_VALUE(cr);
1177 GMX_UNUSED_VALUE(gpu_id);
1178 #endif
1181 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1183 #if GMX_MPI
1184 int dim0, dim1, i, j;
1185 ivec loc;
1187 if (debug)
1189 fprintf(debug, "Making load communicators\n");
1192 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1193 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1195 if (dd->ndim == 0)
1197 return;
1200 clear_ivec(loc);
1201 make_load_communicator(dd, 0, loc);
1202 if (dd->ndim > 1)
1204 dim0 = dd->dim[0];
1205 for (i = 0; i < dd->nc[dim0]; i++)
1207 loc[dim0] = i;
1208 make_load_communicator(dd, 1, loc);
1211 if (dd->ndim > 2)
1213 dim0 = dd->dim[0];
1214 for (i = 0; i < dd->nc[dim0]; i++)
1216 loc[dim0] = i;
1217 dim1 = dd->dim[1];
1218 for (j = 0; j < dd->nc[dim1]; j++)
1220 loc[dim1] = j;
1221 make_load_communicator(dd, 2, loc);
1226 if (debug)
1228 fprintf(debug, "Finished making load communicators\n");
1230 #endif
1233 /*! \brief Sets up the relation between neighboring domains and zones */
1234 static void setup_neighbor_relations(gmx_domdec_t *dd)
1236 int d, dim, i, j, m;
1237 ivec tmp, s;
1238 gmx_domdec_zones_t *zones;
1239 gmx_domdec_ns_ranges_t *izone;
1240 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1242 for (d = 0; d < dd->ndim; d++)
1244 dim = dd->dim[d];
1245 copy_ivec(dd->ci, tmp);
1246 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1247 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1248 copy_ivec(dd->ci, tmp);
1249 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1250 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1251 if (debug)
1253 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1254 dd->rank, dim,
1255 dd->neighbor[d][0],
1256 dd->neighbor[d][1]);
1260 int nzone = (1 << dd->ndim);
1261 int nizone = (1 << std::max(dd->ndim - 1, 0));
1262 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1264 zones = &dd->comm->zones;
1266 for (i = 0; i < nzone; i++)
1268 m = 0;
1269 clear_ivec(zones->shift[i]);
1270 for (d = 0; d < dd->ndim; d++)
1272 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1276 zones->n = nzone;
1277 for (i = 0; i < nzone; i++)
1279 for (d = 0; d < DIM; d++)
1281 s[d] = dd->ci[d] - zones->shift[i][d];
1282 if (s[d] < 0)
1284 s[d] += dd->nc[d];
1286 else if (s[d] >= dd->nc[d])
1288 s[d] -= dd->nc[d];
1292 zones->nizone = nizone;
1293 for (i = 0; i < zones->nizone; i++)
1295 assert(ddNonbondedZonePairRanges[i][0] == i);
1297 izone = &zones->izone[i];
1298 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1299 * j-zones up to nzone.
1301 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1302 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1303 for (dim = 0; dim < DIM; dim++)
1305 if (dd->nc[dim] == 1)
1307 /* All shifts should be allowed */
1308 izone->shift0[dim] = -1;
1309 izone->shift1[dim] = 1;
1311 else
1313 /* Determine the min/max j-zone shift wrt the i-zone */
1314 izone->shift0[dim] = 1;
1315 izone->shift1[dim] = -1;
1316 for (j = izone->j0; j < izone->j1; j++)
1318 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1319 if (shift_diff < izone->shift0[dim])
1321 izone->shift0[dim] = shift_diff;
1323 if (shift_diff > izone->shift1[dim])
1325 izone->shift1[dim] = shift_diff;
1332 if (!isDlbDisabled(dd->comm))
1334 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1337 if (dd->comm->ddSettings.recordLoad)
1339 make_load_communicators(dd);
1343 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1344 gmx_domdec_t *dd,
1345 t_commrec gmx_unused *cr,
1346 bool gmx_unused reorder)
1348 #if GMX_MPI
1349 gmx_domdec_comm_t *comm = dd->comm;
1350 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1352 if (cartSetup.bCartesianPP)
1354 /* Set up cartesian communication for the particle-particle part */
1355 GMX_LOG(mdlog.info).appendTextFormatted(
1356 "Will use a Cartesian communicator: %d x %d x %d",
1357 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1359 ivec periods;
1360 for (int i = 0; i < DIM; i++)
1362 periods[i] = TRUE;
1364 MPI_Comm comm_cart;
1365 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1366 &comm_cart);
1367 /* We overwrite the old communicator with the new cartesian one */
1368 cr->mpi_comm_mygroup = comm_cart;
1371 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1372 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1374 if (cartSetup.bCartesianPP_PME)
1376 /* Since we want to use the original cartesian setup for sim,
1377 * and not the one after split, we need to make an index.
1379 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1380 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1381 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1382 /* Get the rank of the DD master,
1383 * above we made sure that the master node is a PP node.
1385 int rank;
1386 if (MASTER(cr))
1388 rank = dd->rank;
1390 else
1392 rank = 0;
1394 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1396 else if (cartSetup.bCartesianPP)
1398 if (!comm->ddRankSetup.usePmeOnlyRanks)
1400 /* The PP communicator is also
1401 * the communicator for this simulation
1403 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1405 cr->nodeid = dd->rank;
1407 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1409 /* We need to make an index to go from the coordinates
1410 * to the nodeid of this simulation.
1412 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1413 std::vector<int> buf(dd->nnodes);
1414 if (thisRankHasDuty(cr, DUTY_PP))
1416 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1418 /* Communicate the ddindex to simulation nodeid index */
1419 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1420 cr->mpi_comm_mysim);
1422 /* Determine the master coordinates and rank.
1423 * The DD master should be the same node as the master of this sim.
1425 for (int i = 0; i < dd->nnodes; i++)
1427 if (cartSetup.ddindex2simnodeid[i] == 0)
1429 ddindex2xyz(dd->nc, i, dd->master_ci);
1430 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1433 if (debug)
1435 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1438 else
1440 /* No Cartesian communicators */
1441 /* We use the rank in dd->comm->all as DD index */
1442 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1443 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1444 dd->masterrank = 0;
1445 clear_ivec(dd->master_ci);
1447 #endif
1449 GMX_LOG(mdlog.info).appendTextFormatted(
1450 "Domain decomposition rank %d, coordinates %d %d %d\n",
1451 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1452 if (debug)
1454 fprintf(debug,
1455 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1456 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1460 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1461 t_commrec *cr)
1463 #if GMX_MPI
1464 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1466 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1468 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1469 std::vector<int> buf(dd->nnodes);
1470 if (thisRankHasDuty(cr, DUTY_PP))
1472 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1474 /* Communicate the ddindex to simulation nodeid index */
1475 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1476 cr->mpi_comm_mysim);
1478 #else
1479 GMX_UNUSED_VALUE(dd);
1480 GMX_UNUSED_VALUE(cr);
1481 #endif
1484 static CartesianRankSetup
1485 split_communicator(const gmx::MDLogger &mdlog,
1486 t_commrec *cr,
1487 const DdRankOrder ddRankOrder,
1488 bool gmx_unused reorder,
1489 const DDRankSetup &ddRankSetup,
1490 ivec ddCellIndex,
1491 std::vector<int> *pmeRanks)
1493 CartesianRankSetup cartSetup;
1495 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1496 cartSetup.bCartesianPP_PME = false;
1498 const ivec &numDDCells = ddRankSetup.numPPCells;
1499 /* Initially we set ntot to the number of PP cells */
1500 copy_ivec(numDDCells, cartSetup.ntot);
1502 if (cartSetup.bCartesianPP)
1504 const int numDDCellsTot = ddRankSetup.numPPRanks;
1505 bool bDiv[DIM];
1506 for (int i = 1; i < DIM; i++)
1508 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1510 if (bDiv[YY] || bDiv[ZZ])
1512 cartSetup.bCartesianPP_PME = TRUE;
1513 /* If we have 2D PME decomposition, which is always in x+y,
1514 * we stack the PME only nodes in z.
1515 * Otherwise we choose the direction that provides the thinnest slab
1516 * of PME only nodes as this will have the least effect
1517 * on the PP communication.
1518 * But for the PME communication the opposite might be better.
1520 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1521 !bDiv[YY] ||
1522 numDDCells[YY] > numDDCells[ZZ]))
1524 cartSetup.cartpmedim = ZZ;
1526 else
1528 cartSetup.cartpmedim = YY;
1530 cartSetup.ntot[cartSetup.cartpmedim]
1531 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1533 else
1535 GMX_LOG(mdlog.info).appendTextFormatted(
1536 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1537 ddRankSetup.numRanksDoingPme,
1538 numDDCells[XX], numDDCells[YY],
1539 numDDCells[XX], numDDCells[ZZ]);
1540 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1544 if (cartSetup.bCartesianPP_PME)
1546 #if GMX_MPI
1547 int rank;
1548 ivec periods;
1550 GMX_LOG(mdlog.info).appendTextFormatted(
1551 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1552 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1554 for (int i = 0; i < DIM; i++)
1556 periods[i] = TRUE;
1558 MPI_Comm comm_cart;
1559 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1560 &comm_cart);
1561 MPI_Comm_rank(comm_cart, &rank);
1562 if (MASTER(cr) && rank != 0)
1564 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1567 /* With this assigment we loose the link to the original communicator
1568 * which will usually be MPI_COMM_WORLD, unless have multisim.
1570 cr->mpi_comm_mysim = comm_cart;
1571 cr->sim_nodeid = rank;
1573 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1575 GMX_LOG(mdlog.info).appendTextFormatted(
1576 "Cartesian rank %d, coordinates %d %d %d\n",
1577 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1579 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1581 cr->duty = DUTY_PP;
1583 if (!ddRankSetup.usePmeOnlyRanks ||
1584 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1586 cr->duty = DUTY_PME;
1589 /* Split the sim communicator into PP and PME only nodes */
1590 MPI_Comm_split(cr->mpi_comm_mysim,
1591 getThisRankDuties(cr),
1592 dd_index(cartSetup.ntot, ddCellIndex),
1593 &cr->mpi_comm_mygroup);
1594 #else
1595 GMX_UNUSED_VALUE(ddCellIndex);
1596 #endif
1598 else
1600 switch (ddRankOrder)
1602 case DdRankOrder::pp_pme:
1603 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1604 break;
1605 case DdRankOrder::interleave:
1606 /* Interleave the PP-only and PME-only ranks */
1607 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1608 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1609 break;
1610 case DdRankOrder::cartesian:
1611 break;
1612 default:
1613 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1616 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1618 cr->duty = DUTY_PME;
1620 else
1622 cr->duty = DUTY_PP;
1624 #if GMX_MPI
1625 /* Split the sim communicator into PP and PME only nodes */
1626 MPI_Comm_split(cr->mpi_comm_mysim,
1627 getThisRankDuties(cr),
1628 cr->nodeid,
1629 &cr->mpi_comm_mygroup);
1630 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1631 #endif
1634 GMX_LOG(mdlog.info).appendTextFormatted(
1635 "This rank does only %s work.\n",
1636 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1638 return cartSetup;
1641 /*! \brief Makes the PP communicator and the PME communicator, when needed
1643 * Returns the Cartesian rank setup.
1644 * Sets \p cr->mpi_comm_mygroup
1645 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1646 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1648 static CartesianRankSetup
1649 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1650 const DDSettings &ddSettings,
1651 const DdRankOrder ddRankOrder,
1652 const DDRankSetup &ddRankSetup,
1653 t_commrec *cr,
1654 ivec ddCellIndex,
1655 std::vector<int> *pmeRanks)
1657 CartesianRankSetup cartSetup;
1659 if (ddRankSetup.usePmeOnlyRanks)
1661 /* Split the communicator into a PP and PME part */
1662 cartSetup =
1663 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1664 ddRankSetup, ddCellIndex, pmeRanks);
1666 else
1668 /* All nodes do PP and PME */
1669 /* We do not require separate communicators */
1670 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1672 cartSetup.bCartesianPP = false;
1673 cartSetup.bCartesianPP_PME = false;
1676 return cartSetup;
1679 /*! \brief For PP ranks, sets or makes the communicator
1681 * For PME ranks get the rank id.
1682 * For PP only ranks, sets the PME-only rank.
1684 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1685 const DDSettings &ddSettings,
1686 gmx::ArrayRef<const int> pmeRanks,
1687 t_commrec *cr,
1688 const int numAtomsInSystem,
1689 gmx_domdec_t *dd)
1691 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1692 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1694 if (thisRankHasDuty(cr, DUTY_PP))
1696 /* Copy or make a new PP communicator */
1698 /* We (possibly) reordered the nodes in split_communicator,
1699 * so it is no longer required in make_pp_communicator.
1701 const bool useCartesianReorder =
1702 (ddSettings.useCartesianReorder &&
1703 !cartSetup.bCartesianPP_PME);
1705 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1707 else
1709 receive_ddindex2simnodeid(dd, cr);
1712 if (!thisRankHasDuty(cr, DUTY_PME))
1714 /* Set up the commnuication to our PME node */
1715 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1716 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1717 if (debug)
1719 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1720 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1723 else
1725 dd->pme_nodeid = -1;
1728 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1729 if (MASTER(cr))
1731 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1732 numAtomsInSystem,
1733 numAtomsInSystem);
1737 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1738 const char *dir, int nc, const char *size_string)
1740 real *slb_frac, tot;
1741 int i, n;
1742 double dbl;
1744 slb_frac = nullptr;
1745 if (nc > 1 && size_string != nullptr)
1747 GMX_LOG(mdlog.info).appendTextFormatted(
1748 "Using static load balancing for the %s direction", dir);
1749 snew(slb_frac, nc);
1750 tot = 0;
1751 for (i = 0; i < nc; i++)
1753 dbl = 0;
1754 sscanf(size_string, "%20lf%n", &dbl, &n);
1755 if (dbl == 0)
1757 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1759 slb_frac[i] = dbl;
1760 size_string += n;
1761 tot += slb_frac[i];
1763 /* Normalize */
1764 std::string relativeCellSizes = "Relative cell sizes:";
1765 for (i = 0; i < nc; i++)
1767 slb_frac[i] /= tot;
1768 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1770 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1773 return slb_frac;
1776 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1778 int n = 0;
1779 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1780 int nmol;
1781 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1783 for (auto &ilist : extractILists(*ilists, IF_BOND))
1785 if (NRAL(ilist.functionType) > 2)
1787 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1792 return n;
1795 static int dd_getenv(const gmx::MDLogger &mdlog,
1796 const char *env_var, int def)
1798 char *val;
1799 int nst;
1801 nst = def;
1802 val = getenv(env_var);
1803 if (val)
1805 if (sscanf(val, "%20d", &nst) <= 0)
1807 nst = 1;
1809 GMX_LOG(mdlog.info).appendTextFormatted(
1810 "Found env.var. %s = %s, using value %d",
1811 env_var, val, nst);
1814 return nst;
1817 static void check_dd_restrictions(const gmx_domdec_t *dd,
1818 const t_inputrec *ir,
1819 const gmx::MDLogger &mdlog)
1821 if (ir->ePBC == epbcSCREW &&
1822 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1824 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1827 if (ir->nstlist == 0)
1829 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1832 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1834 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1838 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1839 const ivec numDomains)
1841 real r = ddbox.box_size[XX];
1842 for (int d = 0; d < DIM; d++)
1844 if (numDomains[d] > 1)
1846 /* Check using the initial average cell size */
1847 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1851 return r;
1854 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1856 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1857 const std::string &reasonStr,
1858 const gmx::MDLogger &mdlog)
1860 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1861 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1863 if (cmdlineDlbState == DlbState::onUser)
1865 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1867 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1869 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1871 return DlbState::offForever;
1874 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1876 * This function parses the parameters of "-dlb" command line option setting
1877 * corresponding state values. Then it checks the consistency of the determined
1878 * state with other run parameters and settings. As a result, the initial state
1879 * may be altered or an error may be thrown if incompatibility of options is detected.
1881 * \param [in] mdlog Logger.
1882 * \param [in] dlbOption Enum value for the DLB option.
1883 * \param [in] bRecordLoad True if the load balancer is recording load information.
1884 * \param [in] mdrunOptions Options for mdrun.
1885 * \param [in] ir Pointer mdrun to input parameters.
1886 * \returns DLB initial/startup state.
1888 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1889 DlbOption dlbOption, gmx_bool bRecordLoad,
1890 const gmx::MdrunOptions &mdrunOptions,
1891 const t_inputrec *ir)
1893 DlbState dlbState = DlbState::offCanTurnOn;
1895 switch (dlbOption)
1897 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1898 case DlbOption::no: dlbState = DlbState::offUser; break;
1899 case DlbOption::yes: dlbState = DlbState::onUser; break;
1900 default: gmx_incons("Invalid dlbOption enum value");
1903 /* Reruns don't support DLB: bail or override auto mode */
1904 if (mdrunOptions.rerun)
1906 std::string reasonStr = "it is not supported in reruns.";
1907 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1910 /* Unsupported integrators */
1911 if (!EI_DYNAMICS(ir->eI))
1913 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1914 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1917 /* Without cycle counters we can't time work to balance on */
1918 if (!bRecordLoad)
1920 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1921 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1924 if (mdrunOptions.reproducible)
1926 std::string reasonStr = "you started a reproducible run.";
1927 switch (dlbState)
1929 case DlbState::offUser:
1930 break;
1931 case DlbState::offForever:
1932 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1933 break;
1934 case DlbState::offCanTurnOn:
1935 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1936 case DlbState::onCanTurnOff:
1937 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1938 break;
1939 case DlbState::onUser:
1940 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1941 default:
1942 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1946 return dlbState;
1949 static gmx_domdec_comm_t *init_dd_comm()
1951 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1953 comm->n_load_have = 0;
1954 comm->n_load_collect = 0;
1956 comm->haveTurnedOffDlb = false;
1958 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1960 comm->sum_nat[i] = 0;
1962 comm->ndecomp = 0;
1963 comm->nload = 0;
1964 comm->load_step = 0;
1965 comm->load_sum = 0;
1966 comm->load_max = 0;
1967 clear_ivec(comm->load_lim);
1968 comm->load_mdf = 0;
1969 comm->load_pme = 0;
1971 /* This should be replaced by a unique pointer */
1972 comm->balanceRegion = ddBalanceRegionAllocate();
1974 return comm;
1977 /* Returns whether mtop contains constraints and/or vsites */
1978 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1980 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1981 int nmol;
1982 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1984 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1986 return true;
1990 return false;
1993 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1994 const gmx_mtop_t &mtop,
1995 const t_inputrec &inputrec,
1996 const real cutoffMargin,
1997 DDSystemInfo *systemInfo)
1999 /* When we have constraints and/or vsites, it is beneficial to use
2000 * update groups (when possible) to allow independent update of groups.
2002 if (!systemHasConstraintsOrVsites(mtop))
2004 /* No constraints or vsites, atoms can be updated independently */
2005 return;
2008 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2009 systemInfo->useUpdateGroups =
2010 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2011 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2013 if (systemInfo->useUpdateGroups)
2015 int numUpdateGroups = 0;
2016 for (const auto &molblock : mtop.molblock)
2018 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2021 systemInfo->maxUpdateGroupRadius =
2022 computeMaxUpdateGroupRadius(mtop,
2023 systemInfo->updateGroupingPerMoleculetype,
2024 maxReferenceTemperature(inputrec));
2026 /* To use update groups, the large domain-to-domain cutoff distance
2027 * should be compatible with the box size.
2029 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2031 if (systemInfo->useUpdateGroups)
2033 GMX_LOG(mdlog.info).appendTextFormatted(
2034 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2035 numUpdateGroups,
2036 mtop.natoms/static_cast<double>(numUpdateGroups),
2037 systemInfo->maxUpdateGroupRadius);
2039 else
2041 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2042 systemInfo->updateGroupingPerMoleculetype.clear();
2047 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2048 npbcdim(ePBC2npbcdim(ir.ePBC)),
2049 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2050 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2051 haveScrewPBC(ir.ePBC == epbcSCREW)
2055 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2056 static bool
2057 moleculesAreAlwaysWhole(const gmx_mtop_t &mtop,
2058 const bool useUpdateGroups,
2059 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2061 if (useUpdateGroups)
2063 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2064 "Need one grouping per moltype");
2065 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2067 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2069 return false;
2073 else
2075 for (const auto &moltype : mtop.moltype)
2077 if (moltype.atoms.nr > 1)
2079 return false;
2084 return true;
2087 /*! \brief Generate the simulation system information */
2088 static DDSystemInfo
2089 getSystemInfo(const gmx::MDLogger &mdlog,
2090 const t_commrec *cr,
2091 const DomdecOptions &options,
2092 const gmx_mtop_t &mtop,
2093 const t_inputrec &ir,
2094 const matrix box,
2095 gmx::ArrayRef<const gmx::RVec> xGlobal)
2097 const real tenPercentMargin = 1.1;
2099 DDSystemInfo systemInfo;
2101 /* We need to decide on update groups early, as this affects communication distances */
2102 systemInfo.useUpdateGroups = false;
2103 if (ir.cutoff_scheme == ecutsVERLET)
2105 real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2106 setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2109 systemInfo.moleculesAreAlwaysWhole =
2110 moleculesAreAlwaysWhole(mtop,
2111 systemInfo.useUpdateGroups,
2112 systemInfo.updateGroupingPerMoleculetype);
2113 systemInfo.haveInterDomainBondeds = (!systemInfo.moleculesAreAlwaysWhole ||
2114 mtop.bIntermolecularInteractions);
2115 systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2116 multi_body_bondeds_count(&mtop) > 0);
2118 if (systemInfo.useUpdateGroups)
2120 systemInfo.haveSplitConstraints = false;
2121 systemInfo.haveSplitSettles = false;
2123 else
2125 systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2126 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2127 systemInfo.haveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2130 if (ir.rlist == 0)
2132 /* Set the cut-off to some very large value,
2133 * so we don't need if statements everywhere in the code.
2134 * We use sqrt, since the cut-off is squared in some places.
2136 systemInfo.cutoff = GMX_CUTOFF_INF;
2138 else
2140 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2142 systemInfo.minCutoffForMultiBody = 0;
2144 /* Determine the minimum cell size limit, affected by many factors */
2145 systemInfo.cellsizeLimit = 0;
2146 systemInfo.filterBondedCommunication = false;
2148 /* We do not allow home atoms to move beyond the neighboring domain
2149 * between domain decomposition steps, which limits the cell size.
2150 * Get an estimate of cell size limit due to atom displacement.
2151 * In most cases this is a large overestimate, because it assumes
2152 * non-interaction atoms.
2153 * We set the chance to 1 in a trillion steps.
2155 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2156 const real limitForAtomDisplacement =
2157 minCellSizeForAtomDisplacement(mtop, ir,
2158 systemInfo.updateGroupingPerMoleculetype,
2159 c_chanceThatAtomMovesBeyondDomain);
2160 GMX_LOG(mdlog.info).appendTextFormatted(
2161 "Minimum cell size due to atom displacement: %.3f nm",
2162 limitForAtomDisplacement);
2164 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2165 limitForAtomDisplacement);
2167 /* TODO: PME decomposition currently requires atoms not to be more than
2168 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2169 * In nearly all cases, limitForAtomDisplacement will be smaller
2170 * than 2/3*rlist, so the PME requirement is satisfied.
2171 * But it would be better for both correctness and performance
2172 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2173 * Note that we would need to improve the pairlist buffer case.
2176 if (systemInfo.haveInterDomainBondeds)
2178 if (options.minimumCommunicationRange > 0)
2180 systemInfo.minCutoffForMultiBody =
2181 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2182 if (options.useBondedCommunication)
2184 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2186 else
2188 systemInfo.cutoff = std::max(systemInfo.cutoff,
2189 systemInfo.minCutoffForMultiBody);
2192 else if (ir.bPeriodicMols)
2194 /* Can not easily determine the required cut-off */
2195 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2196 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2198 else
2200 real r_2b, r_mb;
2202 if (MASTER(cr))
2204 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2205 options.checkBondedInteractions,
2206 &r_2b, &r_mb);
2208 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2209 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2211 /* We use an initial margin of 10% for the minimum cell size,
2212 * except when we are just below the non-bonded cut-off.
2214 if (options.useBondedCommunication)
2216 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2218 const real r_bonded = std::max(r_2b, r_mb);
2219 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2220 /* This is the (only) place where we turn on the filtering */
2221 systemInfo.filterBondedCommunication = true;
2223 else
2225 const real r_bonded = r_mb;
2226 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2227 systemInfo.cutoff);
2229 /* We determine cutoff_mbody later */
2230 systemInfo.increaseMultiBodyCutoff = true;
2232 else
2234 /* No special bonded communication,
2235 * simply increase the DD cut-off.
2237 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2238 systemInfo.cutoff = std::max(systemInfo.cutoff,
2239 systemInfo.minCutoffForMultiBody);
2242 GMX_LOG(mdlog.info).appendTextFormatted(
2243 "Minimum cell size due to bonded interactions: %.3f nm",
2244 systemInfo.minCutoffForMultiBody);
2246 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2247 systemInfo.minCutoffForMultiBody);
2250 systemInfo.constraintCommunicationRange = 0;
2251 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2253 /* There is a cell size limit due to the constraints (P-LINCS) */
2254 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2255 GMX_LOG(mdlog.info).appendTextFormatted(
2256 "Estimated maximum distance required for P-LINCS: %.3f nm",
2257 systemInfo.constraintCommunicationRange);
2258 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2260 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2263 else if (options.constraintCommunicationRange > 0)
2265 /* Here we do not check for dd->splitConstraints.
2266 * because one can also set a cell size limit for virtual sites only
2267 * and at this point we don't know yet if there are intercg v-sites.
2269 GMX_LOG(mdlog.info).appendTextFormatted(
2270 "User supplied maximum distance required for P-LINCS: %.3f nm",
2271 options.constraintCommunicationRange);
2272 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2274 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2275 systemInfo.constraintCommunicationRange);
2277 return systemInfo;
2280 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2281 * implemented. */
2282 static void
2283 checkDDGridSetup(const DDGridSetup &ddGridSetup,
2284 const t_commrec *cr,
2285 const DomdecOptions &options,
2286 const DDSettings &ddSettings,
2287 const DDSystemInfo &systemInfo,
2288 const real cellsizeLimit,
2289 const gmx_ddbox_t &ddbox)
2291 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2293 char buf[STRLEN];
2294 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2295 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2296 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2297 !bC ? "-rdd" : "-rcon",
2298 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2299 bC ? " or your LINCS settings" : "");
2301 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2302 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2303 "%s\n"
2304 "Look in the log file for details on the domain decomposition",
2305 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2306 cellsizeLimit,
2307 buf);
2310 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2311 if (acs < cellsizeLimit)
2313 if (options.numCells[XX] <= 0)
2315 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2317 else
2319 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2320 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2321 acs, cellsizeLimit);
2325 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2326 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2328 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2329 "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2330 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2332 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2334 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2335 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2339 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2340 static DDRankSetup
2341 getDDRankSetup(const gmx::MDLogger &mdlog,
2342 t_commrec *cr,
2343 const DDGridSetup &ddGridSetup,
2344 const t_inputrec &ir)
2346 GMX_LOG(mdlog.info).appendTextFormatted(
2347 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2348 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2349 ddGridSetup.numPmeOnlyRanks);
2351 DDRankSetup ddRankSetup;
2353 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2354 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2356 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2357 if (ddRankSetup.usePmeOnlyRanks)
2359 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2361 else
2363 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2366 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2368 /* The following choices should match those
2369 * in comm_cost_est in domdec_setup.c.
2370 * Note that here the checks have to take into account
2371 * that the decomposition might occur in a different order than xyz
2372 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2373 * in which case they will not match those in comm_cost_est,
2374 * but since that is mainly for testing purposes that's fine.
2376 if (ddGridSetup.numDDDimensions >= 2 &&
2377 ddGridSetup.ddDimensions[0] == XX &&
2378 ddGridSetup.ddDimensions[1] == YY &&
2379 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2380 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2381 getenv("GMX_PMEONEDD") == nullptr)
2383 ddRankSetup.npmedecompdim = 2;
2384 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2385 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2387 else
2389 /* In case nc is 1 in both x and y we could still choose to
2390 * decompose pme in y instead of x, but we use x for simplicity.
2392 ddRankSetup.npmedecompdim = 1;
2393 if (ddGridSetup.ddDimensions[0] == YY)
2395 ddRankSetup.npmenodes_x = 1;
2396 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2398 else
2400 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2401 ddRankSetup.npmenodes_y = 1;
2404 GMX_LOG(mdlog.info).appendTextFormatted(
2405 "PME domain decomposition: %d x %d x %d",
2406 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2408 else
2410 ddRankSetup.npmedecompdim = 0;
2411 ddRankSetup.npmenodes_x = 0;
2412 ddRankSetup.npmenodes_y = 0;
2415 return ddRankSetup;
2418 /*! \brief Set the cell size and interaction limits */
2419 static void set_dd_limits(const gmx::MDLogger &mdlog,
2420 t_commrec *cr, gmx_domdec_t *dd,
2421 const DomdecOptions &options,
2422 const DDSettings &ddSettings,
2423 const DDSystemInfo &systemInfo,
2424 const DDGridSetup &ddGridSetup,
2425 const int numPPRanks,
2426 const gmx_mtop_t *mtop,
2427 const t_inputrec *ir,
2428 const gmx_ddbox_t &ddbox)
2430 gmx_domdec_comm_t *comm = dd->comm;
2431 comm->ddSettings = ddSettings;
2433 /* Initialize to GPU share count to 0, might change later */
2434 comm->nrank_gpu_shared = 0;
2436 comm->dlbState = comm->ddSettings.initialDlbState;
2437 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2438 /* To consider turning DLB on after 2*nstlist steps we need to check
2439 * at partitioning count 3. Thus we need to increase the first count by 2.
2441 comm->ddPartioningCountFirstDlbOff += 2;
2443 comm->bPMELoadBalDLBLimits = FALSE;
2445 /* Allocate the charge group/atom sorting struct */
2446 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2448 comm->systemInfo = systemInfo;
2450 if (systemInfo.useUpdateGroups)
2452 /* Note: We would like to use dd->nnodes for the atom count estimate,
2453 * but that is not yet available here. But this anyhow only
2454 * affect performance up to the second dd_partition_system call.
2456 const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
2457 comm->updateGroupsCog =
2458 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2459 systemInfo.updateGroupingPerMoleculetype,
2460 maxReferenceTemperature(*ir),
2461 homeAtomCountEstimate);
2464 /* Set the DD setup given by ddGridSetup */
2465 copy_ivec(ddGridSetup.numDomains, dd->nc);
2466 dd->ndim = ddGridSetup.numDDDimensions;
2467 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2469 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2471 snew(comm->slb_frac, DIM);
2472 if (isDlbDisabled(comm))
2474 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2475 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2476 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2479 /* Set the multi-body cut-off and cellsize limit for DLB */
2480 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2481 comm->cellsize_limit = systemInfo.cellsizeLimit;
2482 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2484 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2486 /* Set the bonded communication distance to halfway
2487 * the minimum and the maximum,
2488 * since the extra communication cost is nearly zero.
2490 real acs = average_cellsize_min(ddbox, dd->nc);
2491 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2492 if (!isDlbDisabled(comm))
2494 /* Check if this does not limit the scaling */
2495 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2496 options.dlbScaling*acs);
2498 if (!systemInfo.filterBondedCommunication)
2500 /* Without bBondComm do not go beyond the n.b. cut-off */
2501 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2502 if (comm->cellsize_limit >= systemInfo.cutoff)
2504 /* We don't loose a lot of efficieny
2505 * when increasing it to the n.b. cut-off.
2506 * It can even be slightly faster, because we need
2507 * less checks for the communication setup.
2509 comm->cutoff_mbody = systemInfo.cutoff;
2512 /* Check if we did not end up below our original limit */
2513 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2514 systemInfo.minCutoffForMultiBody);
2516 if (comm->cutoff_mbody > comm->cellsize_limit)
2518 comm->cellsize_limit = comm->cutoff_mbody;
2521 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2524 if (debug)
2526 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2527 "cellsize limit %f\n",
2528 gmx::boolToString(systemInfo.filterBondedCommunication),
2529 comm->cellsize_limit);
2532 if (MASTER(cr))
2534 check_dd_restrictions(dd, ir, mdlog);
2538 void dd_init_bondeds(FILE *fplog,
2539 gmx_domdec_t *dd,
2540 const gmx_mtop_t *mtop,
2541 const gmx_vsite_t *vsite,
2542 const t_inputrec *ir,
2543 gmx_bool bBCheck,
2544 cginfo_mb_t *cginfo_mb)
2546 gmx_domdec_comm_t *comm;
2548 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2550 comm = dd->comm;
2552 if (comm->systemInfo.filterBondedCommunication)
2554 /* Communicate atoms beyond the cut-off for bonded interactions */
2555 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2557 else
2559 /* Only communicate atoms based on cut-off */
2560 comm->bondedLinks = nullptr;
2564 static void writeSettings(gmx::TextWriter *log,
2565 gmx_domdec_t *dd,
2566 const gmx_mtop_t *mtop,
2567 const t_inputrec *ir,
2568 gmx_bool bDynLoadBal,
2569 real dlb_scale,
2570 const gmx_ddbox_t *ddbox)
2572 gmx_domdec_comm_t *comm;
2573 int d;
2574 ivec np;
2575 real limit, shrink;
2577 comm = dd->comm;
2579 if (bDynLoadBal)
2581 log->writeString("The maximum number of communication pulses is:");
2582 for (d = 0; d < dd->ndim; d++)
2584 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2586 log->ensureLineBreak();
2587 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2588 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2589 log->writeString("The allowed shrink of domain decomposition cells is:");
2590 for (d = 0; d < DIM; d++)
2592 if (dd->nc[d] > 1)
2594 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2596 shrink = 0;
2598 else
2600 shrink =
2601 comm->cellsize_min_dlb[d]/
2602 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2604 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2607 log->ensureLineBreak();
2609 else
2611 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2612 log->writeString("The initial number of communication pulses is:");
2613 for (d = 0; d < dd->ndim; d++)
2615 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2617 log->ensureLineBreak();
2618 log->writeString("The initial domain decomposition cell size is:");
2619 for (d = 0; d < DIM; d++)
2621 if (dd->nc[d] > 1)
2623 log->writeStringFormatted(" %c %.2f nm",
2624 dim2char(d), dd->comm->cellsize_min[d]);
2627 log->ensureLineBreak();
2628 log->writeLine();
2631 const bool haveInterDomainVsites =
2632 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2634 if (comm->systemInfo.haveInterDomainBondeds ||
2635 haveInterDomainVsites ||
2636 comm->systemInfo.haveSplitConstraints ||
2637 comm->systemInfo.haveSplitSettles)
2639 std::string decompUnits;
2640 if (comm->systemInfo.useUpdateGroups)
2642 decompUnits = "atom groups";
2644 else
2646 decompUnits = "atoms";
2649 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2650 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2652 if (bDynLoadBal)
2654 limit = dd->comm->cellsize_limit;
2656 else
2658 if (dd->unitCellInfo.ddBoxIsDynamic)
2660 log->writeLine("(the following are initial values, they could change due to box deformation)");
2662 limit = dd->comm->cellsize_min[XX];
2663 for (d = 1; d < DIM; d++)
2665 limit = std::min(limit, dd->comm->cellsize_min[d]);
2669 if (comm->systemInfo.haveInterDomainBondeds)
2671 log->writeLineFormatted("%40s %-7s %6.3f nm",
2672 "two-body bonded interactions", "(-rdd)",
2673 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2674 log->writeLineFormatted("%40s %-7s %6.3f nm",
2675 "multi-body bonded interactions", "(-rdd)",
2676 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2678 if (haveInterDomainVsites)
2680 log->writeLineFormatted("%40s %-7s %6.3f nm",
2681 "virtual site constructions", "(-rcon)", limit);
2683 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2685 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2686 1+ir->nProjOrder);
2687 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2688 separation.c_str(), "(-rcon)", limit);
2690 log->ensureLineBreak();
2694 static void logSettings(const gmx::MDLogger &mdlog,
2695 gmx_domdec_t *dd,
2696 const gmx_mtop_t *mtop,
2697 const t_inputrec *ir,
2698 real dlb_scale,
2699 const gmx_ddbox_t *ddbox)
2701 gmx::StringOutputStream stream;
2702 gmx::TextWriter log(&stream);
2703 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2704 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2707 log.ensureEmptyLine();
2708 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2710 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2712 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2715 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2716 gmx_domdec_t *dd,
2717 real dlb_scale,
2718 const t_inputrec *ir,
2719 const gmx_ddbox_t *ddbox)
2721 gmx_domdec_comm_t *comm;
2722 int d, dim, npulse, npulse_d_max, npulse_d;
2723 gmx_bool bNoCutOff;
2725 comm = dd->comm;
2727 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2729 /* Determine the maximum number of comm. pulses in one dimension */
2731 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2733 /* Determine the maximum required number of grid pulses */
2734 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2736 /* Only a single pulse is required */
2737 npulse = 1;
2739 else if (!bNoCutOff && comm->cellsize_limit > 0)
2741 /* We round down slightly here to avoid overhead due to the latency
2742 * of extra communication calls when the cut-off
2743 * would be only slightly longer than the cell size.
2744 * Later cellsize_limit is redetermined,
2745 * so we can not miss interactions due to this rounding.
2747 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2749 else
2751 /* There is no cell size limit */
2752 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2755 if (!bNoCutOff && npulse > 1)
2757 /* See if we can do with less pulses, based on dlb_scale */
2758 npulse_d_max = 0;
2759 for (d = 0; d < dd->ndim; d++)
2761 dim = dd->dim[d];
2762 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2763 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2764 npulse_d_max = std::max(npulse_d_max, npulse_d);
2766 npulse = std::min(npulse, npulse_d_max);
2769 /* This env var can override npulse */
2770 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2771 if (d > 0)
2773 npulse = d;
2776 comm->maxpulse = 1;
2777 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2778 for (d = 0; d < dd->ndim; d++)
2780 if (comm->ddSettings.request1DAnd1Pulse)
2782 comm->cd[d].np_dlb = 1;
2784 else
2786 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2787 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2789 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2791 comm->bVacDLBNoLimit = FALSE;
2795 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2796 if (!comm->bVacDLBNoLimit)
2798 comm->cellsize_limit = std::max(comm->cellsize_limit,
2799 comm->systemInfo.cutoff/comm->maxpulse);
2801 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2802 /* Set the minimum cell size for each DD dimension */
2803 for (d = 0; d < dd->ndim; d++)
2805 if (comm->bVacDLBNoLimit ||
2806 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2808 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2810 else
2812 comm->cellsize_min_dlb[dd->dim[d]] =
2813 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2816 if (comm->cutoff_mbody <= 0)
2818 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2820 if (isDlbOn(comm))
2822 set_dlb_limits(dd);
2826 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2828 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2831 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2833 /* If each molecule is a single charge group
2834 * or we use domain decomposition for each periodic dimension,
2835 * we do not need to take pbc into account for the bonded interactions.
2837 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2838 !(dd->nc[XX] > 1 &&
2839 dd->nc[YY] > 1 &&
2840 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2843 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2844 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2845 gmx_domdec_t *dd, real dlb_scale,
2846 const gmx_mtop_t *mtop, const t_inputrec *ir,
2847 const gmx_ddbox_t *ddbox)
2849 gmx_domdec_comm_t *comm = dd->comm;
2850 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2852 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2854 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2855 if (ddRankSetup.npmedecompdim >= 2)
2857 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2860 else
2862 ddRankSetup.numRanksDoingPme = 0;
2863 if (dd->pme_nodeid >= 0)
2865 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2866 "Can not have separate PME ranks without PME electrostatics");
2870 if (debug)
2872 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2874 if (!isDlbDisabled(comm))
2876 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2879 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2881 real vol_frac;
2882 if (ir->ePBC == epbcNONE)
2884 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2886 else
2888 vol_frac =
2889 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2891 if (debug)
2893 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2895 int natoms_tot = mtop->natoms;
2897 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2898 static_cast<int>(vol_frac*natoms_tot));
2901 /*! \brief Get some important DD parameters which can be modified by env.vars */
2902 static DDSettings
2903 getDDSettings(const gmx::MDLogger &mdlog,
2904 const DomdecOptions &options,
2905 const gmx::MdrunOptions &mdrunOptions,
2906 const t_inputrec &ir)
2908 DDSettings ddSettings;
2910 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2911 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2912 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2913 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2914 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2915 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2916 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2917 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2918 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2919 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2921 if (ddSettings.useSendRecv2)
2923 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2926 if (ddSettings.eFlop)
2928 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2929 ddSettings.recordLoad = true;
2931 else
2933 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2936 ddSettings.initialDlbState =
2937 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2938 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2939 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2941 return ddSettings;
2944 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2945 unitCellInfo(ir)
2949 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2951 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2952 * generally requires a larger box than other possible decompositions
2953 * with the same rank count, so the calling code might need to decide
2954 * what is the most appropriate way to run the simulation based on
2955 * whether such a DD is possible.
2957 * This function works like init_domain_decomposition(), but will not
2958 * give a fatal error, and only uses \c cr for communicating between
2959 * ranks.
2961 * It is safe to call before thread-MPI spawns ranks, so that
2962 * thread-MPI can decide whether and how to trigger the GPU halo
2963 * exchange code path. The number of PME ranks, if any, should be set
2964 * in \c options.numPmeRanks.
2966 static bool
2967 canMake1DAnd1PulseDomainDecomposition(const DDSettings &ddSettingsOriginal,
2968 const t_commrec *cr,
2969 const int numRanksRequested,
2970 const DomdecOptions &options,
2971 const gmx_mtop_t &mtop,
2972 const t_inputrec &ir,
2973 const matrix box,
2974 gmx::ArrayRef<const gmx::RVec> xGlobal)
2976 // Ensure we don't write any output from this checking routine
2977 gmx::MDLogger dummyLogger;
2979 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2981 DDSettings ddSettings = ddSettingsOriginal;
2982 ddSettings.request1DAnd1Pulse = true;
2983 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2984 !isDlbDisabled(ddSettings.initialDlbState),
2985 options.dlbScaling, ir,
2986 systemInfo.cellsizeLimit);
2987 gmx_ddbox_t ddbox = {0};
2988 DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2989 ddSettings, systemInfo, gridSetupCellsizeLimit,
2990 mtop, ir, box, xGlobal, &ddbox);
2992 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2994 return canMakeDDWith1DAnd1Pulse;
2997 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2999 const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
3000 const int productOfDimensionSizes = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
3001 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
3003 const bool hasMax1Pulse =
3004 ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
3005 (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3007 return decompositionHasOneDimension && hasMax1Pulse;
3011 namespace gmx
3014 // TODO once the functionality stablizes, move this class and
3015 // supporting functionality into builder.cpp
3016 /*! \brief Impl class for DD builder */
3017 class DomainDecompositionBuilder::Impl
3019 public:
3020 //! Constructor
3021 Impl(const MDLogger &mdlog,
3022 t_commrec *cr,
3023 const DomdecOptions &options,
3024 const MdrunOptions &mdrunOptions,
3025 bool prefer1DAnd1Pulse,
3026 const gmx_mtop_t &mtop,
3027 const t_inputrec &ir,
3028 const matrix box,
3029 ArrayRef<const RVec> xGlobal);
3031 //! Build the resulting DD manager
3032 gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3034 //! Objects used in constructing and configuring DD
3035 //! {
3036 //! Logging object
3037 const MDLogger &mdlog_;
3038 //! Communication object
3039 t_commrec *cr_;
3040 //! User-supplied options configuring DD behavior
3041 const DomdecOptions options_;
3042 //! Global system topology
3043 const gmx_mtop_t &mtop_;
3044 //! User input values from the tpr file
3045 const t_inputrec &ir_;
3046 //! }
3048 //! Internal objects used in constructing DD
3049 //! {
3050 //! Settings combined from the user input
3051 DDSettings ddSettings_;
3052 //! Information derived from the simulation system
3053 DDSystemInfo systemInfo_;
3054 //! Box structure
3055 gmx_ddbox_t ddbox_ = { 0 };
3056 //! Organization of the DD grids
3057 DDGridSetup ddGridSetup_;
3058 //! Organzation of the DD ranks
3059 DDRankSetup ddRankSetup_;
3060 //! Number of DD cells in each dimension
3061 ivec ddCellIndex_ = { 0, 0, 0 };
3062 //! IDs of PME-only ranks
3063 std::vector<int> pmeRanks_;
3064 //! Contains a valid Cartesian-communicator-based setup, or defaults.
3065 CartesianRankSetup cartSetup_;
3066 //! }
3070 DomainDecompositionBuilder::Impl::Impl(const MDLogger &mdlog,
3071 t_commrec *cr,
3072 const DomdecOptions &options,
3073 const MdrunOptions &mdrunOptions,
3074 const bool prefer1DAnd1Pulse,
3075 const gmx_mtop_t &mtop,
3076 const t_inputrec &ir,
3077 const matrix box,
3078 ArrayRef<const RVec> xGlobal)
3079 : mdlog_(mdlog),
3080 cr_(cr),
3081 options_(options),
3082 mtop_(mtop),
3083 ir_(ir)
3085 GMX_LOG(mdlog_.info).appendTextFormatted(
3086 "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3088 ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3090 if (prefer1DAnd1Pulse &&
3091 canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3092 mtop_, ir_, box, xGlobal))
3094 ddSettings_.request1DAnd1Pulse = true;
3097 if (ddSettings_.eFlop > 1)
3099 /* Ensure that we have different random flop counts on different ranks */
3100 srand(1 + cr_->nodeid);
3103 systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3105 const int numRanksRequested = cr_->nnodes;
3106 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3108 // DD grid setup uses a more different cell size limit for
3109 // automated setup than the one in systemInfo_. The latter is used
3110 // in set_dd_limits() to configure DLB, for example.
3111 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3112 !isDlbDisabled(ddSettings_.initialDlbState),
3113 options_.dlbScaling, ir_,
3114 systemInfo_.cellsizeLimit);
3115 ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3116 ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3117 mtop_, ir_, box, xGlobal, &ddbox_);
3118 checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3120 cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3122 ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3124 /* Generate the group communicator, also decides the duty of each rank */
3125 cartSetup_ =
3126 makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3127 ddRankSetup_, cr_,
3128 ddCellIndex_, &pmeRanks_);
3131 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3133 gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3135 copy_ivec(ddCellIndex_, dd->ci);
3137 dd->comm = init_dd_comm();
3139 dd->comm->ddRankSetup = ddRankSetup_;
3140 dd->comm->cartesianRankSetup = cartSetup_;
3142 set_dd_limits(mdlog_, cr_, dd, options_,
3143 ddSettings_, systemInfo_,
3144 ddGridSetup_,
3145 ddRankSetup_.numPPRanks,
3146 &mtop_, &ir_,
3147 ddbox_);
3149 setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3151 if (thisRankHasDuty(cr_, DUTY_PP))
3153 set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3155 setup_neighbor_relations(dd);
3158 /* Set overallocation to avoid frequent reallocation of arrays */
3159 set_over_alloc_dd(TRUE);
3161 dd->atomSets = atomSets;
3163 return dd;
3166 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger &mdlog,
3167 t_commrec *cr,
3168 const DomdecOptions &options,
3169 const MdrunOptions &mdrunOptions,
3170 const bool prefer1DAnd1Pulse,
3171 const gmx_mtop_t &mtop,
3172 const t_inputrec &ir,
3173 const matrix box,
3174 ArrayRef<const RVec> xGlobal)
3175 : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3179 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3181 return impl_->build(atomSets);
3184 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3186 } // namespace gmx
3188 static gmx_bool test_dd_cutoff(t_commrec *cr,
3189 const matrix box,
3190 gmx::ArrayRef<const gmx::RVec> x,
3191 real cutoffRequested)
3193 gmx_domdec_t *dd;
3194 gmx_ddbox_t ddbox;
3195 int d, dim, np;
3196 real inv_cell_size;
3197 int LocallyLimited;
3199 dd = cr->dd;
3201 set_ddbox(*dd, false, box, true, x, &ddbox);
3203 LocallyLimited = 0;
3205 for (d = 0; d < dd->ndim; d++)
3207 dim = dd->dim[d];
3209 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3210 if (dd->unitCellInfo.ddBoxIsDynamic)
3212 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3215 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3217 if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3219 return FALSE;
3222 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3224 if (np > dd->comm->cd[d].np_dlb)
3226 return FALSE;
3229 /* If a current local cell size is smaller than the requested
3230 * cut-off, we could still fix it, but this gets very complicated.
3231 * Without fixing here, we might actually need more checks.
3233 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3234 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3236 LocallyLimited = 1;
3241 if (!isDlbDisabled(dd->comm))
3243 /* If DLB is not active yet, we don't need to check the grid jumps.
3244 * Actually we shouldn't, because then the grid jump data is not set.
3246 if (isDlbOn(dd->comm) &&
3247 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3249 LocallyLimited = 1;
3252 gmx_sumi(1, &LocallyLimited, cr);
3254 if (LocallyLimited > 0)
3256 return FALSE;
3260 return TRUE;
3263 gmx_bool change_dd_cutoff(t_commrec *cr,
3264 const matrix box,
3265 gmx::ArrayRef<const gmx::RVec> x,
3266 real cutoffRequested)
3268 gmx_bool bCutoffAllowed;
3270 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3272 if (bCutoffAllowed)
3274 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3277 return bCutoffAllowed;