Use gmx::Range in domdec
[gromacs.git] / src / gromacs / domdec / domdec.cpp
blob84e3b898eb1798c7587470ff53990e67eda0dbd8
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #include "gmxpre.h"
38 #include "domdec.h"
40 #include "config.h"
42 #include <cassert>
43 #include <cinttypes>
44 #include <climits>
45 #include <cmath>
46 #include <cstdio>
47 #include <cstdlib>
48 #include <cstring>
50 #include <algorithm>
51 #include <memory>
53 #include "gromacs/domdec/builder.h"
54 #include "gromacs/domdec/collect.h"
55 #include "gromacs/domdec/dlb.h"
56 #include "gromacs/domdec/dlbtiming.h"
57 #include "gromacs/domdec/domdec_network.h"
58 #include "gromacs/domdec/ga2la.h"
59 #include "gromacs/domdec/gpuhaloexchange.h"
60 #include "gromacs/domdec/options.h"
61 #include "gromacs/domdec/partition.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/listed_forces/manage_threading.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/math/vectypes.h"
69 #include "gromacs/mdlib/calc_verletbuf.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/updategroups.h"
73 #include "gromacs/mdlib/vsite.h"
74 #include "gromacs/mdtypes/commrec.h"
75 #include "gromacs/mdtypes/forceoutput.h"
76 #include "gromacs/mdtypes/inputrec.h"
77 #include "gromacs/mdtypes/mdrunoptions.h"
78 #include "gromacs/mdtypes/state.h"
79 #include "gromacs/pbcutil/ishift.h"
80 #include "gromacs/pbcutil/pbc.h"
81 #include "gromacs/pulling/pull.h"
82 #include "gromacs/timing/wallcycle.h"
83 #include "gromacs/topology/block.h"
84 #include "gromacs/topology/idef.h"
85 #include "gromacs/topology/ifunc.h"
86 #include "gromacs/topology/mtop_lookup.h"
87 #include "gromacs/topology/mtop_util.h"
88 #include "gromacs/topology/topology.h"
89 #include "gromacs/utility/basedefinitions.h"
90 #include "gromacs/utility/basenetwork.h"
91 #include "gromacs/utility/cstringutil.h"
92 #include "gromacs/utility/exceptions.h"
93 #include "gromacs/utility/fatalerror.h"
94 #include "gromacs/utility/gmxmpi.h"
95 #include "gromacs/utility/logger.h"
96 #include "gromacs/utility/real.h"
97 #include "gromacs/utility/smalloc.h"
98 #include "gromacs/utility/strconvert.h"
99 #include "gromacs/utility/stringstream.h"
100 #include "gromacs/utility/stringutil.h"
101 #include "gromacs/utility/textwriter.h"
103 #include "atomdistribution.h"
104 #include "box.h"
105 #include "cellsizes.h"
106 #include "distribute.h"
107 #include "domdec_constraints.h"
108 #include "domdec_internal.h"
109 #include "domdec_setup.h"
110 #include "domdec_vsite.h"
111 #include "redistribute.h"
112 #include "utility.h"
114 // TODO remove this when moving domdec into gmx namespace
115 using gmx::DdRankOrder;
116 using gmx::DlbOption;
117 using gmx::DomdecOptions;
119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
122 #define DD_CGIBS 2
124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_FLAG_NRCG 65535
126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
129 /* The DD zone order */
130 static const ivec dd_zo[DD_MAXZONE] =
131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
133 /* The non-bonded zone-pair setup for domain decomposition
134 * The first number is the i-zone, the second number the first j-zone seen by
135 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
136 * As is, this is for 3D decomposition, where there are 4 i-zones.
137 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
138 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
140 static const int
141 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
142 {1, 3, 6},
143 {2, 5, 6},
144 {3, 5, 7}};
148 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
150 static void index2xyz(ivec nc,int ind,ivec xyz)
152 xyz[XX] = ind % nc[XX];
153 xyz[YY] = (ind / nc[XX]) % nc[YY];
154 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
160 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
161 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
162 xyz[ZZ] = ind % nc[ZZ];
165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
167 int ddnodeid = -1;
169 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
170 const int ddindex = dd_index(dd->nc, c);
171 if (cartSetup.bCartesianPP_PME)
173 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
175 else if (cartSetup.bCartesianPP)
177 #if GMX_MPI
178 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
179 #endif
181 else
183 ddnodeid = ddindex;
186 return ddnodeid;
189 int ddglatnr(const gmx_domdec_t *dd, int i)
191 int atnr;
193 if (dd == nullptr)
195 atnr = i + 1;
197 else
199 if (i >= dd->comm->atomRanges.numAtomsTotal())
201 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
203 atnr = dd->globalAtomIndices[i] + 1;
206 return atnr;
209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
211 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
212 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
217 int i;
219 if (state->ddp_count != dd->ddp_count)
221 gmx_incons("The MD state does not match the domain decomposition state");
224 state->cg_gl.resize(dd->ncg_home);
225 for (i = 0; i < dd->ncg_home; i++)
227 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
230 state->ddp_count_cg_gl = dd->ddp_count;
233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
235 return &dd->comm->zones;
238 int dd_numAtomsZones(const gmx_domdec_t &dd)
240 return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
243 int dd_numHomeAtoms(const gmx_domdec_t &dd)
245 return dd.comm->atomRanges.numHomeAtoms();
248 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
250 /* We currently set mdatoms entries for all atoms:
251 * local + non-local + communicated for vsite + constraints
254 return dd->comm->atomRanges.numAtomsTotal();
257 int dd_natoms_vsite(const gmx_domdec_t *dd)
259 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
262 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
264 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
265 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
268 void dd_move_x(gmx_domdec_t *dd,
269 const matrix box,
270 gmx::ArrayRef<gmx::RVec> x,
271 gmx_wallcycle *wcycle)
273 wallcycle_start(wcycle, ewcMOVEX);
275 int nzone, nat_tot;
276 gmx_domdec_comm_t *comm;
277 gmx_domdec_comm_dim_t *cd;
278 rvec shift = {0, 0, 0};
279 gmx_bool bPBC, bScrew;
281 comm = dd->comm;
283 nzone = 1;
284 nat_tot = comm->atomRanges.numHomeAtoms();
285 for (int d = 0; d < dd->ndim; d++)
287 bPBC = (dd->ci[dd->dim[d]] == 0);
288 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
289 if (bPBC)
291 copy_rvec(box[dd->dim[d]], shift);
293 cd = &comm->cd[d];
294 for (const gmx_domdec_ind_t &ind : cd->ind)
296 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
297 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
298 int n = 0;
299 if (!bPBC)
301 for (int j : ind.index)
303 sendBuffer[n] = x[j];
304 n++;
307 else if (!bScrew)
309 for (int j : ind.index)
311 /* We need to shift the coordinates */
312 for (int d = 0; d < DIM; d++)
314 sendBuffer[n][d] = x[j][d] + shift[d];
316 n++;
319 else
321 for (int j : ind.index)
323 /* Shift x */
324 sendBuffer[n][XX] = x[j][XX] + shift[XX];
325 /* Rotate y and z.
326 * This operation requires a special shift force
327 * treatment, which is performed in calc_vir.
329 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
330 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
331 n++;
335 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
337 gmx::ArrayRef<gmx::RVec> receiveBuffer;
338 if (cd->receiveInPlace)
340 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
342 else
344 receiveBuffer = receiveBufferAccess.buffer;
346 /* Send and receive the coordinates */
347 ddSendrecv(dd, d, dddirBackward,
348 sendBuffer, receiveBuffer);
350 if (!cd->receiveInPlace)
352 int j = 0;
353 for (int zone = 0; zone < nzone; zone++)
355 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
357 x[i] = receiveBuffer[j++];
361 nat_tot += ind.nrecv[nzone+1];
363 nzone += nzone;
366 wallcycle_stop(wcycle, ewcMOVEX);
369 void dd_move_f(gmx_domdec_t *dd,
370 gmx::ForceWithShiftForces *forceWithShiftForces,
371 gmx_wallcycle *wcycle)
373 wallcycle_start(wcycle, ewcMOVEF);
375 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
376 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
378 gmx_domdec_comm_t &comm = *dd->comm;
379 int nzone = comm.zones.n/2;
380 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
381 for (int d = dd->ndim-1; d >= 0; d--)
383 /* Only forces in domains near the PBC boundaries need to
384 consider PBC in the treatment of fshift */
385 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
386 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
387 /* Determine which shift vector we need */
388 ivec vis = { 0, 0, 0 };
389 vis[dd->dim[d]] = 1;
390 const int is = IVEC2IS(vis);
392 /* Loop over the pulses */
393 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
394 for (int p = cd.numPulses() - 1; p >= 0; p--)
396 const gmx_domdec_ind_t &ind = cd.ind[p];
397 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
398 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
400 nat_tot -= ind.nrecv[nzone+1];
402 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
404 gmx::ArrayRef<gmx::RVec> sendBuffer;
405 if (cd.receiveInPlace)
407 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
409 else
411 sendBuffer = sendBufferAccess.buffer;
412 int j = 0;
413 for (int zone = 0; zone < nzone; zone++)
415 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
417 sendBuffer[j++] = f[i];
421 /* Communicate the forces */
422 ddSendrecv(dd, d, dddirForward,
423 sendBuffer, receiveBuffer);
424 /* Add the received forces */
425 int n = 0;
426 if (!shiftForcesNeedPbc)
428 for (int j : ind.index)
430 for (int d = 0; d < DIM; d++)
432 f[j][d] += receiveBuffer[n][d];
434 n++;
437 else if (!applyScrewPbc)
439 for (int j : ind.index)
441 for (int d = 0; d < DIM; d++)
443 f[j][d] += receiveBuffer[n][d];
445 /* Add this force to the shift force */
446 for (int d = 0; d < DIM; d++)
448 fshift[is][d] += receiveBuffer[n][d];
450 n++;
453 else
455 for (int j : ind.index)
457 /* Rotate the force */
458 f[j][XX] += receiveBuffer[n][XX];
459 f[j][YY] -= receiveBuffer[n][YY];
460 f[j][ZZ] -= receiveBuffer[n][ZZ];
461 if (shiftForcesNeedPbc)
463 /* Add this force to the shift force */
464 for (int d = 0; d < DIM; d++)
466 fshift[is][d] += receiveBuffer[n][d];
469 n++;
473 nzone /= 2;
475 wallcycle_stop(wcycle, ewcMOVEF);
478 /* Convenience function for extracting a real buffer from an rvec buffer
480 * To reduce the number of temporary communication buffers and avoid
481 * cache polution, we reuse gmx::RVec buffers for storing reals.
482 * This functions return a real buffer reference with the same number
483 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
485 static gmx::ArrayRef<real>
486 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
488 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
489 arrayRef.size());
492 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
494 int nzone, nat_tot;
495 gmx_domdec_comm_t *comm;
496 gmx_domdec_comm_dim_t *cd;
498 comm = dd->comm;
500 nzone = 1;
501 nat_tot = comm->atomRanges.numHomeAtoms();
502 for (int d = 0; d < dd->ndim; d++)
504 cd = &comm->cd[d];
505 for (const gmx_domdec_ind_t &ind : cd->ind)
507 /* Note: We provision for RVec instead of real, so a factor of 3
508 * more than needed. The buffer actually already has this size
509 * and we pass a plain pointer below, so this does not matter.
511 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
512 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
513 int n = 0;
514 for (int j : ind.index)
516 sendBuffer[n++] = v[j];
519 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
521 gmx::ArrayRef<real> receiveBuffer;
522 if (cd->receiveInPlace)
524 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
526 else
528 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
530 /* Send and receive the data */
531 ddSendrecv(dd, d, dddirBackward,
532 sendBuffer, receiveBuffer);
533 if (!cd->receiveInPlace)
535 int j = 0;
536 for (int zone = 0; zone < nzone; zone++)
538 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
540 v[i] = receiveBuffer[j++];
544 nat_tot += ind.nrecv[nzone+1];
546 nzone += nzone;
550 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
552 int nzone, nat_tot;
553 gmx_domdec_comm_t *comm;
554 gmx_domdec_comm_dim_t *cd;
556 comm = dd->comm;
558 nzone = comm->zones.n/2;
559 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
560 for (int d = dd->ndim-1; d >= 0; d--)
562 cd = &comm->cd[d];
563 for (int p = cd->numPulses() - 1; p >= 0; p--)
565 const gmx_domdec_ind_t &ind = cd->ind[p];
567 /* Note: We provision for RVec instead of real, so a factor of 3
568 * more than needed. The buffer actually already has this size
569 * and we typecast, so this works as intended.
571 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
572 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
573 nat_tot -= ind.nrecv[nzone + 1];
575 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
577 gmx::ArrayRef<real> sendBuffer;
578 if (cd->receiveInPlace)
580 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
582 else
584 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
585 int j = 0;
586 for (int zone = 0; zone < nzone; zone++)
588 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
590 sendBuffer[j++] = v[i];
594 /* Communicate the forces */
595 ddSendrecv(dd, d, dddirForward,
596 sendBuffer, receiveBuffer);
597 /* Add the received forces */
598 int n = 0;
599 for (int j : ind.index)
601 v[j] += receiveBuffer[n];
602 n++;
605 nzone /= 2;
609 real dd_cutoff_multibody(const gmx_domdec_t *dd)
611 const gmx_domdec_comm_t &comm = *dd->comm;
612 const DDSystemInfo &systemInfo = comm.systemInfo;
614 real r = -1;
615 if (systemInfo.haveInterDomainMultiBodyBondeds)
617 if (comm.cutoff_mbody > 0)
619 r = comm.cutoff_mbody;
621 else
623 /* cutoff_mbody=0 means we do not have DLB */
624 r = comm.cellsize_min[dd->dim[0]];
625 for (int di = 1; di < dd->ndim; di++)
627 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
629 if (comm.systemInfo.filterBondedCommunication)
631 r = std::max(r, comm.cutoff_mbody);
633 else
635 r = std::min(r, systemInfo.cutoff);
640 return r;
643 real dd_cutoff_twobody(const gmx_domdec_t *dd)
645 real r_mb;
647 r_mb = dd_cutoff_multibody(dd);
649 return std::max(dd->comm->systemInfo.cutoff, r_mb);
653 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
654 const CartesianRankSetup &cartSetup,
655 const ivec coord,
656 ivec coord_pme)
658 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
659 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
660 copy_ivec(coord, coord_pme);
661 coord_pme[cartSetup.cartpmedim] =
662 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
665 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
666 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
667 const int ddCellIndex)
669 const int npp = ddRankSetup.numPPRanks;
670 const int npme = ddRankSetup.numRanksDoingPme;
672 /* Here we assign a PME node to communicate with this DD node
673 * by assuming that the major index of both is x.
674 * We add npme/2 to obtain an even distribution.
676 return (ddCellIndex*npme + npme/2)/npp;
679 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
681 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
683 int n = 0;
684 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
686 const int p0 = ddindex2pmeindex(ddRankSetup, i);
687 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
688 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
690 if (debug)
692 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
694 pmeRanks[n] = i + 1 + n;
695 n++;
699 return pmeRanks;
702 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
704 gmx_domdec_t *dd;
705 ivec coords;
706 int slab;
708 dd = cr->dd;
710 if (dd->comm->bCartesian) {
711 gmx_ddindex2xyz(dd->nc,ddindex,coords);
712 dd_coords2pmecoords(dd,coords,coords_pme);
713 copy_ivec(dd->ntot,nc);
714 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
715 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
717 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
718 } else {
719 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
722 coords[XX] = x;
723 coords[YY] = y;
724 coords[ZZ] = z;
725 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
727 return slab;
730 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
732 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
733 ivec coords = { x, y, z };
734 int nodeid = -1;
736 if (cartSetup.bCartesianPP_PME)
738 #if GMX_MPI
739 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
740 #endif
742 else
744 int ddindex = dd_index(cr->dd->nc, coords);
745 if (cartSetup.bCartesianPP)
747 nodeid = cartSetup.ddindex2simnodeid[ddindex];
749 else
751 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
753 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
755 else
757 nodeid = ddindex;
762 return nodeid;
765 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
766 const CartesianRankSetup &cartSetup,
767 gmx::ArrayRef<const int> pmeRanks,
768 const t_commrec gmx_unused *cr,
769 const int sim_nodeid)
771 int pmenode = -1;
773 /* This assumes a uniform x domain decomposition grid cell size */
774 if (cartSetup.bCartesianPP_PME)
776 #if GMX_MPI
777 ivec coord, coord_pme;
778 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
779 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
781 /* This is a PP rank */
782 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
783 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
785 #endif
787 else if (cartSetup.bCartesianPP)
789 if (sim_nodeid < ddRankSetup.numPPRanks)
791 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
794 else
796 /* This assumes DD cells with identical x coordinates
797 * are numbered sequentially.
799 if (pmeRanks.empty())
801 if (sim_nodeid < ddRankSetup.numPPRanks)
803 /* The DD index equals the nodeid */
804 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
807 else
809 int i = 0;
810 while (sim_nodeid > pmeRanks[i])
812 i++;
814 if (sim_nodeid < pmeRanks[i])
816 pmenode = pmeRanks[i];
821 return pmenode;
824 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
826 if (dd != nullptr)
828 return {
829 dd->comm->ddRankSetup.npmenodes_x,
830 dd->comm->ddRankSetup.npmenodes_y
833 else
835 return {
836 1, 1
841 std::vector<int> get_pme_ddranks(const t_commrec *cr,
842 const int pmenodeid)
844 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
845 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
846 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
847 std::vector<int> ddranks;
848 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
850 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
852 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
854 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
856 if (cartSetup.bCartesianPP_PME)
858 ivec coord = { x, y, z };
859 ivec coord_pme;
860 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
861 if (cr->dd->ci[XX] == coord_pme[XX] &&
862 cr->dd->ci[YY] == coord_pme[YY] &&
863 cr->dd->ci[ZZ] == coord_pme[ZZ])
865 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
868 else
870 /* The slab corresponds to the nodeid in the PME group */
871 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
873 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
879 return ddranks;
882 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
883 gmx::ArrayRef<const int> pmeRanks,
884 const t_commrec *cr)
886 gmx_bool bReceive = TRUE;
888 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
889 if (ddRankSetup.usePmeOnlyRanks)
891 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
892 if (cartSetup.bCartesianPP_PME)
894 #if GMX_MPI
895 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
896 ivec coords;
897 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
898 coords[cartSetup.cartpmedim]++;
899 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
901 int rank;
902 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
903 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
905 /* This is not the last PP node for pmenode */
906 bReceive = FALSE;
909 #else
910 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
911 #endif
913 else
915 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
916 if (cr->sim_nodeid+1 < cr->nnodes &&
917 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
919 /* This is not the last PP node for pmenode */
920 bReceive = FALSE;
925 return bReceive;
928 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
930 gmx_domdec_comm_t *comm;
931 int i;
933 comm = dd->comm;
935 snew(*dim_f, dd->nc[dim]+1);
936 (*dim_f)[0] = 0;
937 for (i = 1; i < dd->nc[dim]; i++)
939 if (comm->slb_frac[dim])
941 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
943 else
945 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
948 (*dim_f)[dd->nc[dim]] = 1;
951 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
953 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
955 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
957 ddpme->dim = YY;
959 else
961 ddpme->dim = dimind;
963 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
965 ddpme->nslab = (ddpme->dim == 0 ?
966 ddRankSetup.npmenodes_x :
967 ddRankSetup.npmenodes_y);
969 if (ddpme->nslab <= 1)
971 return;
974 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
975 /* Determine for each PME slab the PP location range for dimension dim */
976 snew(ddpme->pp_min, ddpme->nslab);
977 snew(ddpme->pp_max, ddpme->nslab);
978 for (int slab = 0; slab < ddpme->nslab; slab++)
980 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
981 ddpme->pp_max[slab] = 0;
983 for (int i = 0; i < dd->nnodes; i++)
985 ivec xyz;
986 ddindex2xyz(dd->nc, i, xyz);
987 /* For y only use our y/z slab.
988 * This assumes that the PME x grid size matches the DD grid size.
990 if (dimind == 0 || xyz[XX] == dd->ci[XX])
992 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
993 int slab;
994 if (dimind == 0)
996 slab = pmeindex/nso;
998 else
1000 slab = pmeindex % ddpme->nslab;
1002 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1003 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1007 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1010 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1012 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1014 if (ddRankSetup.ddpme[0].dim == XX)
1016 return ddRankSetup.ddpme[0].maxshift;
1018 else
1020 return 0;
1024 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1026 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1028 if (ddRankSetup.ddpme[0].dim == YY)
1030 return ddRankSetup.ddpme[0].maxshift;
1032 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1034 return ddRankSetup.ddpme[1].maxshift;
1036 else
1038 return 0;
1042 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1044 return dd.comm->systemInfo.haveSplitConstraints;
1047 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1049 /* Note that the cycles value can be incorrect, either 0 or some
1050 * extremely large value, when our thread migrated to another core
1051 * with an unsynchronized cycle counter. If this happens less often
1052 * that once per nstlist steps, this will not cause issues, since
1053 * we later subtract the maximum value from the sum over nstlist steps.
1054 * A zero count will slightly lower the total, but that's a small effect.
1055 * Note that the main purpose of the subtraction of the maximum value
1056 * is to avoid throwing off the load balancing when stalls occur due
1057 * e.g. system activity or network congestion.
1059 dd->comm->cycl[ddCycl] += cycles;
1060 dd->comm->cycl_n[ddCycl]++;
1061 if (cycles > dd->comm->cycl_max[ddCycl])
1063 dd->comm->cycl_max[ddCycl] = cycles;
1067 #if GMX_MPI
1068 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1070 MPI_Comm c_row;
1071 int dim, i, rank;
1072 ivec loc_c;
1073 gmx_bool bPartOfGroup = FALSE;
1075 dim = dd->dim[dim_ind];
1076 copy_ivec(loc, loc_c);
1077 for (i = 0; i < dd->nc[dim]; i++)
1079 loc_c[dim] = i;
1080 rank = dd_index(dd->nc, loc_c);
1081 if (rank == dd->rank)
1083 /* This process is part of the group */
1084 bPartOfGroup = TRUE;
1087 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1088 &c_row);
1089 if (bPartOfGroup)
1091 dd->comm->mpi_comm_load[dim_ind] = c_row;
1092 if (!isDlbDisabled(dd->comm))
1094 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1096 if (dd->ci[dim] == dd->master_ci[dim])
1098 /* This is the root process of this row */
1099 cellsizes.rowMaster = std::make_unique<RowMaster>();
1101 RowMaster &rowMaster = *cellsizes.rowMaster;
1102 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1103 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1104 rowMaster.isCellMin.resize(dd->nc[dim]);
1105 if (dim_ind > 0)
1107 rowMaster.bounds.resize(dd->nc[dim]);
1109 rowMaster.buf_ncd.resize(dd->nc[dim]);
1111 else
1113 /* This is not a root process, we only need to receive cell_f */
1114 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1117 if (dd->ci[dim] == dd->master_ci[dim])
1119 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1123 #endif
1125 void dd_setup_dlb_resource_sharing(const t_commrec *cr,
1126 int gpu_id)
1128 #if GMX_MPI
1129 int physicalnode_id_hash;
1130 gmx_domdec_t *dd;
1131 MPI_Comm mpi_comm_pp_physicalnode;
1133 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1135 /* Only ranks with short-ranged tasks (currently) use GPUs.
1136 * If we don't have GPUs assigned, there are no resources to share.
1138 return;
1141 physicalnode_id_hash = gmx_physicalnode_id_hash();
1143 dd = cr->dd;
1145 if (debug)
1147 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1148 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1149 dd->rank, physicalnode_id_hash, gpu_id);
1151 /* Split the PP communicator over the physical nodes */
1152 /* TODO: See if we should store this (before), as it's also used for
1153 * for the nodecomm summation.
1155 // TODO PhysicalNodeCommunicator could be extended/used to handle
1156 // the need for per-node per-group communicators.
1157 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1158 &mpi_comm_pp_physicalnode);
1159 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1160 &dd->comm->mpi_comm_gpu_shared);
1161 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1162 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1164 if (debug)
1166 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1169 /* Note that some ranks could share a GPU, while others don't */
1171 if (dd->comm->nrank_gpu_shared == 1)
1173 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1175 #else
1176 GMX_UNUSED_VALUE(cr);
1177 GMX_UNUSED_VALUE(gpu_id);
1178 #endif
1181 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1183 #if GMX_MPI
1184 int dim0, dim1, i, j;
1185 ivec loc;
1187 if (debug)
1189 fprintf(debug, "Making load communicators\n");
1192 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1193 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1195 if (dd->ndim == 0)
1197 return;
1200 clear_ivec(loc);
1201 make_load_communicator(dd, 0, loc);
1202 if (dd->ndim > 1)
1204 dim0 = dd->dim[0];
1205 for (i = 0; i < dd->nc[dim0]; i++)
1207 loc[dim0] = i;
1208 make_load_communicator(dd, 1, loc);
1211 if (dd->ndim > 2)
1213 dim0 = dd->dim[0];
1214 for (i = 0; i < dd->nc[dim0]; i++)
1216 loc[dim0] = i;
1217 dim1 = dd->dim[1];
1218 for (j = 0; j < dd->nc[dim1]; j++)
1220 loc[dim1] = j;
1221 make_load_communicator(dd, 2, loc);
1226 if (debug)
1228 fprintf(debug, "Finished making load communicators\n");
1230 #endif
1233 /*! \brief Sets up the relation between neighboring domains and zones */
1234 static void setup_neighbor_relations(gmx_domdec_t *dd)
1236 int d, dim, m;
1237 ivec tmp, s;
1238 gmx_domdec_zones_t *zones;
1239 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1241 for (d = 0; d < dd->ndim; d++)
1243 dim = dd->dim[d];
1244 copy_ivec(dd->ci, tmp);
1245 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1246 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1247 copy_ivec(dd->ci, tmp);
1248 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1249 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1250 if (debug)
1252 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1253 dd->rank, dim,
1254 dd->neighbor[d][0],
1255 dd->neighbor[d][1]);
1259 int nzone = (1 << dd->ndim);
1260 int nizone = (1 << std::max(dd->ndim - 1, 0));
1261 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1263 zones = &dd->comm->zones;
1265 for (int i = 0; i < nzone; i++)
1267 m = 0;
1268 clear_ivec(zones->shift[i]);
1269 for (d = 0; d < dd->ndim; d++)
1271 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1275 zones->n = nzone;
1276 for (int i = 0; i < nzone; i++)
1278 for (d = 0; d < DIM; d++)
1280 s[d] = dd->ci[d] - zones->shift[i][d];
1281 if (s[d] < 0)
1283 s[d] += dd->nc[d];
1285 else if (s[d] >= dd->nc[d])
1287 s[d] -= dd->nc[d];
1291 for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1293 GMX_RELEASE_ASSERT(ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1294 "The first element for each ddNonbondedZonePairRanges should match its index");
1296 DDPairInteractionRanges iZone;
1297 iZone.iZoneIndex = iZoneIndex;
1298 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1299 * j-zones up to nzone.
1301 iZone.jZoneRange =
1302 gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1303 std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1304 for (dim = 0; dim < DIM; dim++)
1306 if (dd->nc[dim] == 1)
1308 /* All shifts should be allowed */
1309 iZone.shift0[dim] = -1;
1310 iZone.shift1[dim] = 1;
1312 else
1314 /* Determine the min/max j-zone shift wrt the i-zone */
1315 iZone.shift0[dim] = 1;
1316 iZone.shift1[dim] = -1;
1317 for (int jZone : iZone.jZoneRange)
1319 int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1320 if (shift_diff < iZone.shift0[dim])
1322 iZone.shift0[dim] = shift_diff;
1324 if (shift_diff > iZone.shift1[dim])
1326 iZone.shift1[dim] = shift_diff;
1332 zones->iZones.push_back(iZone);
1335 if (!isDlbDisabled(dd->comm))
1337 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1340 if (dd->comm->ddSettings.recordLoad)
1342 make_load_communicators(dd);
1346 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1347 gmx_domdec_t *dd,
1348 t_commrec gmx_unused *cr,
1349 bool gmx_unused reorder)
1351 #if GMX_MPI
1352 gmx_domdec_comm_t *comm = dd->comm;
1353 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1355 if (cartSetup.bCartesianPP)
1357 /* Set up cartesian communication for the particle-particle part */
1358 GMX_LOG(mdlog.info).appendTextFormatted(
1359 "Will use a Cartesian communicator: %d x %d x %d",
1360 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1362 ivec periods;
1363 for (int i = 0; i < DIM; i++)
1365 periods[i] = TRUE;
1367 MPI_Comm comm_cart;
1368 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1369 &comm_cart);
1370 /* We overwrite the old communicator with the new cartesian one */
1371 cr->mpi_comm_mygroup = comm_cart;
1374 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1375 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1377 if (cartSetup.bCartesianPP_PME)
1379 /* Since we want to use the original cartesian setup for sim,
1380 * and not the one after split, we need to make an index.
1382 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1383 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1384 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1385 /* Get the rank of the DD master,
1386 * above we made sure that the master node is a PP node.
1388 int rank;
1389 if (MASTER(cr))
1391 rank = dd->rank;
1393 else
1395 rank = 0;
1397 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1399 else if (cartSetup.bCartesianPP)
1401 if (!comm->ddRankSetup.usePmeOnlyRanks)
1403 /* The PP communicator is also
1404 * the communicator for this simulation
1406 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1408 cr->nodeid = dd->rank;
1410 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1412 /* We need to make an index to go from the coordinates
1413 * to the nodeid of this simulation.
1415 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1416 std::vector<int> buf(dd->nnodes);
1417 if (thisRankHasDuty(cr, DUTY_PP))
1419 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1421 /* Communicate the ddindex to simulation nodeid index */
1422 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1423 cr->mpi_comm_mysim);
1425 /* Determine the master coordinates and rank.
1426 * The DD master should be the same node as the master of this sim.
1428 for (int i = 0; i < dd->nnodes; i++)
1430 if (cartSetup.ddindex2simnodeid[i] == 0)
1432 ddindex2xyz(dd->nc, i, dd->master_ci);
1433 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1436 if (debug)
1438 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1441 else
1443 /* No Cartesian communicators */
1444 /* We use the rank in dd->comm->all as DD index */
1445 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1446 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1447 dd->masterrank = 0;
1448 clear_ivec(dd->master_ci);
1450 #endif
1452 GMX_LOG(mdlog.info).appendTextFormatted(
1453 "Domain decomposition rank %d, coordinates %d %d %d\n",
1454 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1455 if (debug)
1457 fprintf(debug,
1458 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1459 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1463 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1464 t_commrec *cr)
1466 #if GMX_MPI
1467 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1469 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1471 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1472 std::vector<int> buf(dd->nnodes);
1473 if (thisRankHasDuty(cr, DUTY_PP))
1475 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1477 /* Communicate the ddindex to simulation nodeid index */
1478 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1479 cr->mpi_comm_mysim);
1481 #else
1482 GMX_UNUSED_VALUE(dd);
1483 GMX_UNUSED_VALUE(cr);
1484 #endif
1487 static CartesianRankSetup
1488 split_communicator(const gmx::MDLogger &mdlog,
1489 t_commrec *cr,
1490 const DdRankOrder ddRankOrder,
1491 bool gmx_unused reorder,
1492 const DDRankSetup &ddRankSetup,
1493 ivec ddCellIndex,
1494 std::vector<int> *pmeRanks)
1496 CartesianRankSetup cartSetup;
1498 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1499 cartSetup.bCartesianPP_PME = false;
1501 const ivec &numDDCells = ddRankSetup.numPPCells;
1502 /* Initially we set ntot to the number of PP cells */
1503 copy_ivec(numDDCells, cartSetup.ntot);
1505 if (cartSetup.bCartesianPP)
1507 const int numDDCellsTot = ddRankSetup.numPPRanks;
1508 bool bDiv[DIM];
1509 for (int i = 1; i < DIM; i++)
1511 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1513 if (bDiv[YY] || bDiv[ZZ])
1515 cartSetup.bCartesianPP_PME = TRUE;
1516 /* If we have 2D PME decomposition, which is always in x+y,
1517 * we stack the PME only nodes in z.
1518 * Otherwise we choose the direction that provides the thinnest slab
1519 * of PME only nodes as this will have the least effect
1520 * on the PP communication.
1521 * But for the PME communication the opposite might be better.
1523 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1524 !bDiv[YY] ||
1525 numDDCells[YY] > numDDCells[ZZ]))
1527 cartSetup.cartpmedim = ZZ;
1529 else
1531 cartSetup.cartpmedim = YY;
1533 cartSetup.ntot[cartSetup.cartpmedim]
1534 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1536 else
1538 GMX_LOG(mdlog.info).appendTextFormatted(
1539 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1540 ddRankSetup.numRanksDoingPme,
1541 numDDCells[XX], numDDCells[YY],
1542 numDDCells[XX], numDDCells[ZZ]);
1543 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1547 if (cartSetup.bCartesianPP_PME)
1549 #if GMX_MPI
1550 int rank;
1551 ivec periods;
1553 GMX_LOG(mdlog.info).appendTextFormatted(
1554 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1555 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1557 for (int i = 0; i < DIM; i++)
1559 periods[i] = TRUE;
1561 MPI_Comm comm_cart;
1562 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1563 &comm_cart);
1564 MPI_Comm_rank(comm_cart, &rank);
1565 if (MASTER(cr) && rank != 0)
1567 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1570 /* With this assigment we loose the link to the original communicator
1571 * which will usually be MPI_COMM_WORLD, unless have multisim.
1573 cr->mpi_comm_mysim = comm_cart;
1574 cr->sim_nodeid = rank;
1576 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1578 GMX_LOG(mdlog.info).appendTextFormatted(
1579 "Cartesian rank %d, coordinates %d %d %d\n",
1580 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1582 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1584 cr->duty = DUTY_PP;
1586 if (!ddRankSetup.usePmeOnlyRanks ||
1587 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1589 cr->duty = DUTY_PME;
1592 /* Split the sim communicator into PP and PME only nodes */
1593 MPI_Comm_split(cr->mpi_comm_mysim,
1594 getThisRankDuties(cr),
1595 dd_index(cartSetup.ntot, ddCellIndex),
1596 &cr->mpi_comm_mygroup);
1597 #else
1598 GMX_UNUSED_VALUE(ddCellIndex);
1599 #endif
1601 else
1603 switch (ddRankOrder)
1605 case DdRankOrder::pp_pme:
1606 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1607 break;
1608 case DdRankOrder::interleave:
1609 /* Interleave the PP-only and PME-only ranks */
1610 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1611 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1612 break;
1613 case DdRankOrder::cartesian:
1614 break;
1615 default:
1616 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1619 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1621 cr->duty = DUTY_PME;
1623 else
1625 cr->duty = DUTY_PP;
1627 #if GMX_MPI
1628 /* Split the sim communicator into PP and PME only nodes */
1629 MPI_Comm_split(cr->mpi_comm_mysim,
1630 getThisRankDuties(cr),
1631 cr->nodeid,
1632 &cr->mpi_comm_mygroup);
1633 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1634 #endif
1637 GMX_LOG(mdlog.info).appendTextFormatted(
1638 "This rank does only %s work.\n",
1639 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1641 return cartSetup;
1644 /*! \brief Makes the PP communicator and the PME communicator, when needed
1646 * Returns the Cartesian rank setup.
1647 * Sets \p cr->mpi_comm_mygroup
1648 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1649 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1651 static CartesianRankSetup
1652 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1653 const DDSettings &ddSettings,
1654 const DdRankOrder ddRankOrder,
1655 const DDRankSetup &ddRankSetup,
1656 t_commrec *cr,
1657 ivec ddCellIndex,
1658 std::vector<int> *pmeRanks)
1660 CartesianRankSetup cartSetup;
1662 if (ddRankSetup.usePmeOnlyRanks)
1664 /* Split the communicator into a PP and PME part */
1665 cartSetup =
1666 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1667 ddRankSetup, ddCellIndex, pmeRanks);
1669 else
1671 /* All nodes do PP and PME */
1672 /* We do not require separate communicators */
1673 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1675 cartSetup.bCartesianPP = false;
1676 cartSetup.bCartesianPP_PME = false;
1679 return cartSetup;
1682 /*! \brief For PP ranks, sets or makes the communicator
1684 * For PME ranks get the rank id.
1685 * For PP only ranks, sets the PME-only rank.
1687 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1688 const DDSettings &ddSettings,
1689 gmx::ArrayRef<const int> pmeRanks,
1690 t_commrec *cr,
1691 const int numAtomsInSystem,
1692 gmx_domdec_t *dd)
1694 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1695 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1697 if (thisRankHasDuty(cr, DUTY_PP))
1699 /* Copy or make a new PP communicator */
1701 /* We (possibly) reordered the nodes in split_communicator,
1702 * so it is no longer required in make_pp_communicator.
1704 const bool useCartesianReorder =
1705 (ddSettings.useCartesianReorder &&
1706 !cartSetup.bCartesianPP_PME);
1708 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1710 else
1712 receive_ddindex2simnodeid(dd, cr);
1715 if (!thisRankHasDuty(cr, DUTY_PME))
1717 /* Set up the commnuication to our PME node */
1718 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1719 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1720 if (debug)
1722 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1723 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1726 else
1728 dd->pme_nodeid = -1;
1731 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1732 if (MASTER(cr))
1734 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1735 numAtomsInSystem,
1736 numAtomsInSystem);
1740 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1741 const char *dir, int nc, const char *size_string)
1743 real *slb_frac, tot;
1744 int i, n;
1745 double dbl;
1747 slb_frac = nullptr;
1748 if (nc > 1 && size_string != nullptr)
1750 GMX_LOG(mdlog.info).appendTextFormatted(
1751 "Using static load balancing for the %s direction", dir);
1752 snew(slb_frac, nc);
1753 tot = 0;
1754 for (i = 0; i < nc; i++)
1756 dbl = 0;
1757 sscanf(size_string, "%20lf%n", &dbl, &n);
1758 if (dbl == 0)
1760 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1762 slb_frac[i] = dbl;
1763 size_string += n;
1764 tot += slb_frac[i];
1766 /* Normalize */
1767 std::string relativeCellSizes = "Relative cell sizes:";
1768 for (i = 0; i < nc; i++)
1770 slb_frac[i] /= tot;
1771 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1773 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1776 return slb_frac;
1779 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1781 int n = 0;
1782 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1783 int nmol;
1784 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1786 for (auto &ilist : extractILists(*ilists, IF_BOND))
1788 if (NRAL(ilist.functionType) > 2)
1790 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1795 return n;
1798 static int dd_getenv(const gmx::MDLogger &mdlog,
1799 const char *env_var, int def)
1801 char *val;
1802 int nst;
1804 nst = def;
1805 val = getenv(env_var);
1806 if (val)
1808 if (sscanf(val, "%20d", &nst) <= 0)
1810 nst = 1;
1812 GMX_LOG(mdlog.info).appendTextFormatted(
1813 "Found env.var. %s = %s, using value %d",
1814 env_var, val, nst);
1817 return nst;
1820 static void check_dd_restrictions(const gmx_domdec_t *dd,
1821 const t_inputrec *ir,
1822 const gmx::MDLogger &mdlog)
1824 if (ir->ePBC == epbcSCREW &&
1825 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1827 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1830 if (ir->nstlist == 0)
1832 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1835 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1837 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1841 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1842 const ivec numDomains)
1844 real r = ddbox.box_size[XX];
1845 for (int d = 0; d < DIM; d++)
1847 if (numDomains[d] > 1)
1849 /* Check using the initial average cell size */
1850 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1854 return r;
1857 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1859 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1860 const std::string &reasonStr,
1861 const gmx::MDLogger &mdlog)
1863 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1864 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1866 if (cmdlineDlbState == DlbState::onUser)
1868 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1870 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1872 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1874 return DlbState::offForever;
1877 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1879 * This function parses the parameters of "-dlb" command line option setting
1880 * corresponding state values. Then it checks the consistency of the determined
1881 * state with other run parameters and settings. As a result, the initial state
1882 * may be altered or an error may be thrown if incompatibility of options is detected.
1884 * \param [in] mdlog Logger.
1885 * \param [in] dlbOption Enum value for the DLB option.
1886 * \param [in] bRecordLoad True if the load balancer is recording load information.
1887 * \param [in] mdrunOptions Options for mdrun.
1888 * \param [in] ir Pointer mdrun to input parameters.
1889 * \returns DLB initial/startup state.
1891 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1892 DlbOption dlbOption, gmx_bool bRecordLoad,
1893 const gmx::MdrunOptions &mdrunOptions,
1894 const t_inputrec *ir)
1896 DlbState dlbState = DlbState::offCanTurnOn;
1898 switch (dlbOption)
1900 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1901 case DlbOption::no: dlbState = DlbState::offUser; break;
1902 case DlbOption::yes: dlbState = DlbState::onUser; break;
1903 default: gmx_incons("Invalid dlbOption enum value");
1906 /* Reruns don't support DLB: bail or override auto mode */
1907 if (mdrunOptions.rerun)
1909 std::string reasonStr = "it is not supported in reruns.";
1910 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1913 /* Unsupported integrators */
1914 if (!EI_DYNAMICS(ir->eI))
1916 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1917 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1920 /* Without cycle counters we can't time work to balance on */
1921 if (!bRecordLoad)
1923 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1924 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1927 if (mdrunOptions.reproducible)
1929 std::string reasonStr = "you started a reproducible run.";
1930 switch (dlbState)
1932 case DlbState::offUser:
1933 break;
1934 case DlbState::offForever:
1935 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1936 break;
1937 case DlbState::offCanTurnOn:
1938 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1939 case DlbState::onCanTurnOff:
1940 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1941 break;
1942 case DlbState::onUser:
1943 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1944 default:
1945 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1949 return dlbState;
1952 static gmx_domdec_comm_t *init_dd_comm()
1954 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1956 comm->n_load_have = 0;
1957 comm->n_load_collect = 0;
1959 comm->haveTurnedOffDlb = false;
1961 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1963 comm->sum_nat[i] = 0;
1965 comm->ndecomp = 0;
1966 comm->nload = 0;
1967 comm->load_step = 0;
1968 comm->load_sum = 0;
1969 comm->load_max = 0;
1970 clear_ivec(comm->load_lim);
1971 comm->load_mdf = 0;
1972 comm->load_pme = 0;
1974 /* This should be replaced by a unique pointer */
1975 comm->balanceRegion = ddBalanceRegionAllocate();
1977 return comm;
1980 /* Returns whether mtop contains constraints and/or vsites */
1981 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1983 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1984 int nmol;
1985 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1987 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1989 return true;
1993 return false;
1996 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1997 const gmx_mtop_t &mtop,
1998 const t_inputrec &inputrec,
1999 const real cutoffMargin,
2000 DDSystemInfo *systemInfo)
2002 /* When we have constraints and/or vsites, it is beneficial to use
2003 * update groups (when possible) to allow independent update of groups.
2005 if (!systemHasConstraintsOrVsites(mtop))
2007 /* No constraints or vsites, atoms can be updated independently */
2008 return;
2011 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2012 systemInfo->useUpdateGroups =
2013 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2014 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2016 if (systemInfo->useUpdateGroups)
2018 int numUpdateGroups = 0;
2019 for (const auto &molblock : mtop.molblock)
2021 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2024 systemInfo->maxUpdateGroupRadius =
2025 computeMaxUpdateGroupRadius(mtop,
2026 systemInfo->updateGroupingPerMoleculetype,
2027 maxReferenceTemperature(inputrec));
2029 /* To use update groups, the large domain-to-domain cutoff distance
2030 * should be compatible with the box size.
2032 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2034 if (systemInfo->useUpdateGroups)
2036 GMX_LOG(mdlog.info).appendTextFormatted(
2037 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2038 numUpdateGroups,
2039 mtop.natoms/static_cast<double>(numUpdateGroups),
2040 systemInfo->maxUpdateGroupRadius);
2042 else
2044 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2045 systemInfo->updateGroupingPerMoleculetype.clear();
2050 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2051 npbcdim(ePBC2npbcdim(ir.ePBC)),
2052 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2053 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2054 haveScrewPBC(ir.ePBC == epbcSCREW)
2058 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2059 static bool
2060 moleculesAreAlwaysWhole(const gmx_mtop_t &mtop,
2061 const bool useUpdateGroups,
2062 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2064 if (useUpdateGroups)
2066 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2067 "Need one grouping per moltype");
2068 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2070 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2072 return false;
2076 else
2078 for (const auto &moltype : mtop.moltype)
2080 if (moltype.atoms.nr > 1)
2082 return false;
2087 return true;
2090 /*! \brief Generate the simulation system information */
2091 static DDSystemInfo
2092 getSystemInfo(const gmx::MDLogger &mdlog,
2093 const t_commrec *cr,
2094 const DomdecOptions &options,
2095 const gmx_mtop_t &mtop,
2096 const t_inputrec &ir,
2097 const matrix box,
2098 gmx::ArrayRef<const gmx::RVec> xGlobal)
2100 const real tenPercentMargin = 1.1;
2102 DDSystemInfo systemInfo;
2104 /* We need to decide on update groups early, as this affects communication distances */
2105 systemInfo.useUpdateGroups = false;
2106 if (ir.cutoff_scheme == ecutsVERLET)
2108 real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2109 setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2112 systemInfo.moleculesAreAlwaysWhole =
2113 moleculesAreAlwaysWhole(mtop,
2114 systemInfo.useUpdateGroups,
2115 systemInfo.updateGroupingPerMoleculetype);
2116 systemInfo.haveInterDomainBondeds = (!systemInfo.moleculesAreAlwaysWhole ||
2117 mtop.bIntermolecularInteractions);
2118 systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2119 multi_body_bondeds_count(&mtop) > 0);
2121 if (systemInfo.useUpdateGroups)
2123 systemInfo.haveSplitConstraints = false;
2124 systemInfo.haveSplitSettles = false;
2126 else
2128 systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2129 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2130 systemInfo.haveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2133 if (ir.rlist == 0)
2135 /* Set the cut-off to some very large value,
2136 * so we don't need if statements everywhere in the code.
2137 * We use sqrt, since the cut-off is squared in some places.
2139 systemInfo.cutoff = GMX_CUTOFF_INF;
2141 else
2143 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2145 systemInfo.minCutoffForMultiBody = 0;
2147 /* Determine the minimum cell size limit, affected by many factors */
2148 systemInfo.cellsizeLimit = 0;
2149 systemInfo.filterBondedCommunication = false;
2151 /* We do not allow home atoms to move beyond the neighboring domain
2152 * between domain decomposition steps, which limits the cell size.
2153 * Get an estimate of cell size limit due to atom displacement.
2154 * In most cases this is a large overestimate, because it assumes
2155 * non-interaction atoms.
2156 * We set the chance to 1 in a trillion steps.
2158 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2159 const real limitForAtomDisplacement =
2160 minCellSizeForAtomDisplacement(mtop, ir,
2161 systemInfo.updateGroupingPerMoleculetype,
2162 c_chanceThatAtomMovesBeyondDomain);
2163 GMX_LOG(mdlog.info).appendTextFormatted(
2164 "Minimum cell size due to atom displacement: %.3f nm",
2165 limitForAtomDisplacement);
2167 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2168 limitForAtomDisplacement);
2170 /* TODO: PME decomposition currently requires atoms not to be more than
2171 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2172 * In nearly all cases, limitForAtomDisplacement will be smaller
2173 * than 2/3*rlist, so the PME requirement is satisfied.
2174 * But it would be better for both correctness and performance
2175 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2176 * Note that we would need to improve the pairlist buffer case.
2179 if (systemInfo.haveInterDomainBondeds)
2181 if (options.minimumCommunicationRange > 0)
2183 systemInfo.minCutoffForMultiBody =
2184 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2185 if (options.useBondedCommunication)
2187 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2189 else
2191 systemInfo.cutoff = std::max(systemInfo.cutoff,
2192 systemInfo.minCutoffForMultiBody);
2195 else if (ir.bPeriodicMols)
2197 /* Can not easily determine the required cut-off */
2198 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2199 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2201 else
2203 real r_2b, r_mb;
2205 if (MASTER(cr))
2207 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2208 options.checkBondedInteractions,
2209 &r_2b, &r_mb);
2211 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2212 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2214 /* We use an initial margin of 10% for the minimum cell size,
2215 * except when we are just below the non-bonded cut-off.
2217 if (options.useBondedCommunication)
2219 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2221 const real r_bonded = std::max(r_2b, r_mb);
2222 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2223 /* This is the (only) place where we turn on the filtering */
2224 systemInfo.filterBondedCommunication = true;
2226 else
2228 const real r_bonded = r_mb;
2229 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2230 systemInfo.cutoff);
2232 /* We determine cutoff_mbody later */
2233 systemInfo.increaseMultiBodyCutoff = true;
2235 else
2237 /* No special bonded communication,
2238 * simply increase the DD cut-off.
2240 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2241 systemInfo.cutoff = std::max(systemInfo.cutoff,
2242 systemInfo.minCutoffForMultiBody);
2245 GMX_LOG(mdlog.info).appendTextFormatted(
2246 "Minimum cell size due to bonded interactions: %.3f nm",
2247 systemInfo.minCutoffForMultiBody);
2249 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2250 systemInfo.minCutoffForMultiBody);
2253 systemInfo.constraintCommunicationRange = 0;
2254 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2256 /* There is a cell size limit due to the constraints (P-LINCS) */
2257 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2258 GMX_LOG(mdlog.info).appendTextFormatted(
2259 "Estimated maximum distance required for P-LINCS: %.3f nm",
2260 systemInfo.constraintCommunicationRange);
2261 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2263 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2266 else if (options.constraintCommunicationRange > 0)
2268 /* Here we do not check for dd->splitConstraints.
2269 * because one can also set a cell size limit for virtual sites only
2270 * and at this point we don't know yet if there are intercg v-sites.
2272 GMX_LOG(mdlog.info).appendTextFormatted(
2273 "User supplied maximum distance required for P-LINCS: %.3f nm",
2274 options.constraintCommunicationRange);
2275 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2277 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2278 systemInfo.constraintCommunicationRange);
2280 return systemInfo;
2283 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2284 * implemented. */
2285 static void
2286 checkDDGridSetup(const DDGridSetup &ddGridSetup,
2287 const t_commrec *cr,
2288 const DomdecOptions &options,
2289 const DDSettings &ddSettings,
2290 const DDSystemInfo &systemInfo,
2291 const real cellsizeLimit,
2292 const gmx_ddbox_t &ddbox)
2294 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2296 char buf[STRLEN];
2297 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2298 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2299 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2300 !bC ? "-rdd" : "-rcon",
2301 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2302 bC ? " or your LINCS settings" : "");
2304 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2305 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2306 "%s\n"
2307 "Look in the log file for details on the domain decomposition",
2308 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2309 cellsizeLimit,
2310 buf);
2313 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2314 if (acs < cellsizeLimit)
2316 if (options.numCells[XX] <= 0)
2318 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2320 else
2322 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2323 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2324 acs, cellsizeLimit);
2328 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2329 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2331 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2332 "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2333 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2335 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2337 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2338 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2342 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2343 static DDRankSetup
2344 getDDRankSetup(const gmx::MDLogger &mdlog,
2345 t_commrec *cr,
2346 const DDGridSetup &ddGridSetup,
2347 const t_inputrec &ir)
2349 GMX_LOG(mdlog.info).appendTextFormatted(
2350 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2351 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2352 ddGridSetup.numPmeOnlyRanks);
2354 DDRankSetup ddRankSetup;
2356 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2357 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2359 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2360 if (ddRankSetup.usePmeOnlyRanks)
2362 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2364 else
2366 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2369 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2371 /* The following choices should match those
2372 * in comm_cost_est in domdec_setup.c.
2373 * Note that here the checks have to take into account
2374 * that the decomposition might occur in a different order than xyz
2375 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2376 * in which case they will not match those in comm_cost_est,
2377 * but since that is mainly for testing purposes that's fine.
2379 if (ddGridSetup.numDDDimensions >= 2 &&
2380 ddGridSetup.ddDimensions[0] == XX &&
2381 ddGridSetup.ddDimensions[1] == YY &&
2382 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2383 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2384 getenv("GMX_PMEONEDD") == nullptr)
2386 ddRankSetup.npmedecompdim = 2;
2387 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2388 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2390 else
2392 /* In case nc is 1 in both x and y we could still choose to
2393 * decompose pme in y instead of x, but we use x for simplicity.
2395 ddRankSetup.npmedecompdim = 1;
2396 if (ddGridSetup.ddDimensions[0] == YY)
2398 ddRankSetup.npmenodes_x = 1;
2399 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2401 else
2403 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2404 ddRankSetup.npmenodes_y = 1;
2407 GMX_LOG(mdlog.info).appendTextFormatted(
2408 "PME domain decomposition: %d x %d x %d",
2409 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2411 else
2413 ddRankSetup.npmedecompdim = 0;
2414 ddRankSetup.npmenodes_x = 0;
2415 ddRankSetup.npmenodes_y = 0;
2418 return ddRankSetup;
2421 /*! \brief Set the cell size and interaction limits */
2422 static void set_dd_limits(const gmx::MDLogger &mdlog,
2423 t_commrec *cr, gmx_domdec_t *dd,
2424 const DomdecOptions &options,
2425 const DDSettings &ddSettings,
2426 const DDSystemInfo &systemInfo,
2427 const DDGridSetup &ddGridSetup,
2428 const int numPPRanks,
2429 const gmx_mtop_t *mtop,
2430 const t_inputrec *ir,
2431 const gmx_ddbox_t &ddbox)
2433 gmx_domdec_comm_t *comm = dd->comm;
2434 comm->ddSettings = ddSettings;
2436 /* Initialize to GPU share count to 0, might change later */
2437 comm->nrank_gpu_shared = 0;
2439 comm->dlbState = comm->ddSettings.initialDlbState;
2440 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2441 /* To consider turning DLB on after 2*nstlist steps we need to check
2442 * at partitioning count 3. Thus we need to increase the first count by 2.
2444 comm->ddPartioningCountFirstDlbOff += 2;
2446 comm->bPMELoadBalDLBLimits = FALSE;
2448 /* Allocate the charge group/atom sorting struct */
2449 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2451 comm->systemInfo = systemInfo;
2453 if (systemInfo.useUpdateGroups)
2455 /* Note: We would like to use dd->nnodes for the atom count estimate,
2456 * but that is not yet available here. But this anyhow only
2457 * affect performance up to the second dd_partition_system call.
2459 const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
2460 comm->updateGroupsCog =
2461 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2462 systemInfo.updateGroupingPerMoleculetype,
2463 maxReferenceTemperature(*ir),
2464 homeAtomCountEstimate);
2467 /* Set the DD setup given by ddGridSetup */
2468 copy_ivec(ddGridSetup.numDomains, dd->nc);
2469 dd->ndim = ddGridSetup.numDDDimensions;
2470 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2472 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2474 snew(comm->slb_frac, DIM);
2475 if (isDlbDisabled(comm))
2477 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2478 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2479 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2482 /* Set the multi-body cut-off and cellsize limit for DLB */
2483 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2484 comm->cellsize_limit = systemInfo.cellsizeLimit;
2485 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2487 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2489 /* Set the bonded communication distance to halfway
2490 * the minimum and the maximum,
2491 * since the extra communication cost is nearly zero.
2493 real acs = average_cellsize_min(ddbox, dd->nc);
2494 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2495 if (!isDlbDisabled(comm))
2497 /* Check if this does not limit the scaling */
2498 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2499 options.dlbScaling*acs);
2501 if (!systemInfo.filterBondedCommunication)
2503 /* Without bBondComm do not go beyond the n.b. cut-off */
2504 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2505 if (comm->cellsize_limit >= systemInfo.cutoff)
2507 /* We don't loose a lot of efficieny
2508 * when increasing it to the n.b. cut-off.
2509 * It can even be slightly faster, because we need
2510 * less checks for the communication setup.
2512 comm->cutoff_mbody = systemInfo.cutoff;
2515 /* Check if we did not end up below our original limit */
2516 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2517 systemInfo.minCutoffForMultiBody);
2519 if (comm->cutoff_mbody > comm->cellsize_limit)
2521 comm->cellsize_limit = comm->cutoff_mbody;
2524 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2527 if (debug)
2529 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2530 "cellsize limit %f\n",
2531 gmx::boolToString(systemInfo.filterBondedCommunication),
2532 comm->cellsize_limit);
2535 if (MASTER(cr))
2537 check_dd_restrictions(dd, ir, mdlog);
2541 void dd_init_bondeds(FILE *fplog,
2542 gmx_domdec_t *dd,
2543 const gmx_mtop_t *mtop,
2544 const gmx_vsite_t *vsite,
2545 const t_inputrec *ir,
2546 gmx_bool bBCheck,
2547 cginfo_mb_t *cginfo_mb)
2549 gmx_domdec_comm_t *comm;
2551 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2553 comm = dd->comm;
2555 if (comm->systemInfo.filterBondedCommunication)
2557 /* Communicate atoms beyond the cut-off for bonded interactions */
2558 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2560 else
2562 /* Only communicate atoms based on cut-off */
2563 comm->bondedLinks = nullptr;
2567 static void writeSettings(gmx::TextWriter *log,
2568 gmx_domdec_t *dd,
2569 const gmx_mtop_t *mtop,
2570 const t_inputrec *ir,
2571 gmx_bool bDynLoadBal,
2572 real dlb_scale,
2573 const gmx_ddbox_t *ddbox)
2575 gmx_domdec_comm_t *comm;
2576 int d;
2577 ivec np;
2578 real limit, shrink;
2580 comm = dd->comm;
2582 if (bDynLoadBal)
2584 log->writeString("The maximum number of communication pulses is:");
2585 for (d = 0; d < dd->ndim; d++)
2587 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2589 log->ensureLineBreak();
2590 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2591 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2592 log->writeString("The allowed shrink of domain decomposition cells is:");
2593 for (d = 0; d < DIM; d++)
2595 if (dd->nc[d] > 1)
2597 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2599 shrink = 0;
2601 else
2603 shrink =
2604 comm->cellsize_min_dlb[d]/
2605 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2607 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2610 log->ensureLineBreak();
2612 else
2614 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2615 log->writeString("The initial number of communication pulses is:");
2616 for (d = 0; d < dd->ndim; d++)
2618 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2620 log->ensureLineBreak();
2621 log->writeString("The initial domain decomposition cell size is:");
2622 for (d = 0; d < DIM; d++)
2624 if (dd->nc[d] > 1)
2626 log->writeStringFormatted(" %c %.2f nm",
2627 dim2char(d), dd->comm->cellsize_min[d]);
2630 log->ensureLineBreak();
2631 log->writeLine();
2634 const bool haveInterDomainVsites =
2635 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2637 if (comm->systemInfo.haveInterDomainBondeds ||
2638 haveInterDomainVsites ||
2639 comm->systemInfo.haveSplitConstraints ||
2640 comm->systemInfo.haveSplitSettles)
2642 std::string decompUnits;
2643 if (comm->systemInfo.useUpdateGroups)
2645 decompUnits = "atom groups";
2647 else
2649 decompUnits = "atoms";
2652 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2653 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2655 if (bDynLoadBal)
2657 limit = dd->comm->cellsize_limit;
2659 else
2661 if (dd->unitCellInfo.ddBoxIsDynamic)
2663 log->writeLine("(the following are initial values, they could change due to box deformation)");
2665 limit = dd->comm->cellsize_min[XX];
2666 for (d = 1; d < DIM; d++)
2668 limit = std::min(limit, dd->comm->cellsize_min[d]);
2672 if (comm->systemInfo.haveInterDomainBondeds)
2674 log->writeLineFormatted("%40s %-7s %6.3f nm",
2675 "two-body bonded interactions", "(-rdd)",
2676 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2677 log->writeLineFormatted("%40s %-7s %6.3f nm",
2678 "multi-body bonded interactions", "(-rdd)",
2679 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2681 if (haveInterDomainVsites)
2683 log->writeLineFormatted("%40s %-7s %6.3f nm",
2684 "virtual site constructions", "(-rcon)", limit);
2686 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2688 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2689 1+ir->nProjOrder);
2690 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2691 separation.c_str(), "(-rcon)", limit);
2693 log->ensureLineBreak();
2697 static void logSettings(const gmx::MDLogger &mdlog,
2698 gmx_domdec_t *dd,
2699 const gmx_mtop_t *mtop,
2700 const t_inputrec *ir,
2701 real dlb_scale,
2702 const gmx_ddbox_t *ddbox)
2704 gmx::StringOutputStream stream;
2705 gmx::TextWriter log(&stream);
2706 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2707 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2710 log.ensureEmptyLine();
2711 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2713 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2715 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2718 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2719 gmx_domdec_t *dd,
2720 real dlb_scale,
2721 const t_inputrec *ir,
2722 const gmx_ddbox_t *ddbox)
2724 gmx_domdec_comm_t *comm;
2725 int d, dim, npulse, npulse_d_max, npulse_d;
2726 gmx_bool bNoCutOff;
2728 comm = dd->comm;
2730 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2732 /* Determine the maximum number of comm. pulses in one dimension */
2734 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2736 /* Determine the maximum required number of grid pulses */
2737 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2739 /* Only a single pulse is required */
2740 npulse = 1;
2742 else if (!bNoCutOff && comm->cellsize_limit > 0)
2744 /* We round down slightly here to avoid overhead due to the latency
2745 * of extra communication calls when the cut-off
2746 * would be only slightly longer than the cell size.
2747 * Later cellsize_limit is redetermined,
2748 * so we can not miss interactions due to this rounding.
2750 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2752 else
2754 /* There is no cell size limit */
2755 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2758 if (!bNoCutOff && npulse > 1)
2760 /* See if we can do with less pulses, based on dlb_scale */
2761 npulse_d_max = 0;
2762 for (d = 0; d < dd->ndim; d++)
2764 dim = dd->dim[d];
2765 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2766 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2767 npulse_d_max = std::max(npulse_d_max, npulse_d);
2769 npulse = std::min(npulse, npulse_d_max);
2772 /* This env var can override npulse */
2773 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2774 if (d > 0)
2776 npulse = d;
2779 comm->maxpulse = 1;
2780 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2781 for (d = 0; d < dd->ndim; d++)
2783 if (comm->ddSettings.request1DAnd1Pulse)
2785 comm->cd[d].np_dlb = 1;
2787 else
2789 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2790 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2792 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2794 comm->bVacDLBNoLimit = FALSE;
2798 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2799 if (!comm->bVacDLBNoLimit)
2801 comm->cellsize_limit = std::max(comm->cellsize_limit,
2802 comm->systemInfo.cutoff/comm->maxpulse);
2804 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2805 /* Set the minimum cell size for each DD dimension */
2806 for (d = 0; d < dd->ndim; d++)
2808 if (comm->bVacDLBNoLimit ||
2809 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2811 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2813 else
2815 comm->cellsize_min_dlb[dd->dim[d]] =
2816 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2819 if (comm->cutoff_mbody <= 0)
2821 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2823 if (isDlbOn(comm))
2825 set_dlb_limits(dd);
2829 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2831 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2834 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2836 /* If each molecule is a single charge group
2837 * or we use domain decomposition for each periodic dimension,
2838 * we do not need to take pbc into account for the bonded interactions.
2840 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2841 !(dd->nc[XX] > 1 &&
2842 dd->nc[YY] > 1 &&
2843 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2846 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2847 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2848 gmx_domdec_t *dd, real dlb_scale,
2849 const gmx_mtop_t *mtop, const t_inputrec *ir,
2850 const gmx_ddbox_t *ddbox)
2852 gmx_domdec_comm_t *comm = dd->comm;
2853 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2855 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2857 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2858 if (ddRankSetup.npmedecompdim >= 2)
2860 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2863 else
2865 ddRankSetup.numRanksDoingPme = 0;
2866 if (dd->pme_nodeid >= 0)
2868 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2869 "Can not have separate PME ranks without PME electrostatics");
2873 if (debug)
2875 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2877 if (!isDlbDisabled(comm))
2879 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2882 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2884 real vol_frac;
2885 if (ir->ePBC == epbcNONE)
2887 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2889 else
2891 vol_frac =
2892 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2894 if (debug)
2896 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2898 int natoms_tot = mtop->natoms;
2900 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2901 static_cast<int>(vol_frac*natoms_tot));
2904 /*! \brief Get some important DD parameters which can be modified by env.vars */
2905 static DDSettings
2906 getDDSettings(const gmx::MDLogger &mdlog,
2907 const DomdecOptions &options,
2908 const gmx::MdrunOptions &mdrunOptions,
2909 const t_inputrec &ir)
2911 DDSettings ddSettings;
2913 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2914 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2915 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2916 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2917 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2918 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2919 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2920 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2921 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2922 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2924 if (ddSettings.useSendRecv2)
2926 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2929 if (ddSettings.eFlop)
2931 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2932 ddSettings.recordLoad = true;
2934 else
2936 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2939 ddSettings.initialDlbState =
2940 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2941 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2942 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2944 return ddSettings;
2947 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2948 unitCellInfo(ir)
2952 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2954 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2955 * generally requires a larger box than other possible decompositions
2956 * with the same rank count, so the calling code might need to decide
2957 * what is the most appropriate way to run the simulation based on
2958 * whether such a DD is possible.
2960 * This function works like init_domain_decomposition(), but will not
2961 * give a fatal error, and only uses \c cr for communicating between
2962 * ranks.
2964 * It is safe to call before thread-MPI spawns ranks, so that
2965 * thread-MPI can decide whether and how to trigger the GPU halo
2966 * exchange code path. The number of PME ranks, if any, should be set
2967 * in \c options.numPmeRanks.
2969 static bool
2970 canMake1DAnd1PulseDomainDecomposition(const DDSettings &ddSettingsOriginal,
2971 const t_commrec *cr,
2972 const int numRanksRequested,
2973 const DomdecOptions &options,
2974 const gmx_mtop_t &mtop,
2975 const t_inputrec &ir,
2976 const matrix box,
2977 gmx::ArrayRef<const gmx::RVec> xGlobal)
2979 // Ensure we don't write any output from this checking routine
2980 gmx::MDLogger dummyLogger;
2982 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2984 DDSettings ddSettings = ddSettingsOriginal;
2985 ddSettings.request1DAnd1Pulse = true;
2986 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2987 !isDlbDisabled(ddSettings.initialDlbState),
2988 options.dlbScaling, ir,
2989 systemInfo.cellsizeLimit);
2990 gmx_ddbox_t ddbox = {0};
2991 DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2992 ddSettings, systemInfo, gridSetupCellsizeLimit,
2993 mtop, ir, box, xGlobal, &ddbox);
2995 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2997 return canMakeDDWith1DAnd1Pulse;
3000 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
3002 const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
3003 const int productOfDimensionSizes = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
3004 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
3006 const bool hasMax1Pulse =
3007 ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
3008 (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3010 return decompositionHasOneDimension && hasMax1Pulse;
3014 namespace gmx
3017 // TODO once the functionality stablizes, move this class and
3018 // supporting functionality into builder.cpp
3019 /*! \brief Impl class for DD builder */
3020 class DomainDecompositionBuilder::Impl
3022 public:
3023 //! Constructor
3024 Impl(const MDLogger &mdlog,
3025 t_commrec *cr,
3026 const DomdecOptions &options,
3027 const MdrunOptions &mdrunOptions,
3028 bool prefer1DAnd1Pulse,
3029 const gmx_mtop_t &mtop,
3030 const t_inputrec &ir,
3031 const matrix box,
3032 ArrayRef<const RVec> xGlobal);
3034 //! Build the resulting DD manager
3035 gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3037 //! Objects used in constructing and configuring DD
3038 //! {
3039 //! Logging object
3040 const MDLogger &mdlog_;
3041 //! Communication object
3042 t_commrec *cr_;
3043 //! User-supplied options configuring DD behavior
3044 const DomdecOptions options_;
3045 //! Global system topology
3046 const gmx_mtop_t &mtop_;
3047 //! User input values from the tpr file
3048 const t_inputrec &ir_;
3049 //! }
3051 //! Internal objects used in constructing DD
3052 //! {
3053 //! Settings combined from the user input
3054 DDSettings ddSettings_;
3055 //! Information derived from the simulation system
3056 DDSystemInfo systemInfo_;
3057 //! Box structure
3058 gmx_ddbox_t ddbox_ = { 0 };
3059 //! Organization of the DD grids
3060 DDGridSetup ddGridSetup_;
3061 //! Organzation of the DD ranks
3062 DDRankSetup ddRankSetup_;
3063 //! Number of DD cells in each dimension
3064 ivec ddCellIndex_ = { 0, 0, 0 };
3065 //! IDs of PME-only ranks
3066 std::vector<int> pmeRanks_;
3067 //! Contains a valid Cartesian-communicator-based setup, or defaults.
3068 CartesianRankSetup cartSetup_;
3069 //! }
3073 DomainDecompositionBuilder::Impl::Impl(const MDLogger &mdlog,
3074 t_commrec *cr,
3075 const DomdecOptions &options,
3076 const MdrunOptions &mdrunOptions,
3077 const bool prefer1DAnd1Pulse,
3078 const gmx_mtop_t &mtop,
3079 const t_inputrec &ir,
3080 const matrix box,
3081 ArrayRef<const RVec> xGlobal)
3082 : mdlog_(mdlog),
3083 cr_(cr),
3084 options_(options),
3085 mtop_(mtop),
3086 ir_(ir)
3088 GMX_LOG(mdlog_.info).appendTextFormatted(
3089 "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3091 ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3093 if (prefer1DAnd1Pulse &&
3094 canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3095 mtop_, ir_, box, xGlobal))
3097 ddSettings_.request1DAnd1Pulse = true;
3100 if (ddSettings_.eFlop > 1)
3102 /* Ensure that we have different random flop counts on different ranks */
3103 srand(1 + cr_->nodeid);
3106 systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3108 const int numRanksRequested = cr_->nnodes;
3109 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3111 // DD grid setup uses a more different cell size limit for
3112 // automated setup than the one in systemInfo_. The latter is used
3113 // in set_dd_limits() to configure DLB, for example.
3114 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3115 !isDlbDisabled(ddSettings_.initialDlbState),
3116 options_.dlbScaling, ir_,
3117 systemInfo_.cellsizeLimit);
3118 ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3119 ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3120 mtop_, ir_, box, xGlobal, &ddbox_);
3121 checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3123 cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3125 ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3127 /* Generate the group communicator, also decides the duty of each rank */
3128 cartSetup_ =
3129 makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3130 ddRankSetup_, cr_,
3131 ddCellIndex_, &pmeRanks_);
3134 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3136 gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3138 copy_ivec(ddCellIndex_, dd->ci);
3140 dd->comm = init_dd_comm();
3142 dd->comm->ddRankSetup = ddRankSetup_;
3143 dd->comm->cartesianRankSetup = cartSetup_;
3145 set_dd_limits(mdlog_, cr_, dd, options_,
3146 ddSettings_, systemInfo_,
3147 ddGridSetup_,
3148 ddRankSetup_.numPPRanks,
3149 &mtop_, &ir_,
3150 ddbox_);
3152 setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3154 if (thisRankHasDuty(cr_, DUTY_PP))
3156 set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3158 setup_neighbor_relations(dd);
3161 /* Set overallocation to avoid frequent reallocation of arrays */
3162 set_over_alloc_dd(TRUE);
3164 dd->atomSets = atomSets;
3166 return dd;
3169 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger &mdlog,
3170 t_commrec *cr,
3171 const DomdecOptions &options,
3172 const MdrunOptions &mdrunOptions,
3173 const bool prefer1DAnd1Pulse,
3174 const gmx_mtop_t &mtop,
3175 const t_inputrec &ir,
3176 const matrix box,
3177 ArrayRef<const RVec> xGlobal)
3178 : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3182 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3184 return impl_->build(atomSets);
3187 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3189 } // namespace gmx
3191 static gmx_bool test_dd_cutoff(t_commrec *cr,
3192 const matrix box,
3193 gmx::ArrayRef<const gmx::RVec> x,
3194 real cutoffRequested)
3196 gmx_domdec_t *dd;
3197 gmx_ddbox_t ddbox;
3198 int d, dim, np;
3199 real inv_cell_size;
3200 int LocallyLimited;
3202 dd = cr->dd;
3204 set_ddbox(*dd, false, box, true, x, &ddbox);
3206 LocallyLimited = 0;
3208 for (d = 0; d < dd->ndim; d++)
3210 dim = dd->dim[d];
3212 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3213 if (dd->unitCellInfo.ddBoxIsDynamic)
3215 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3218 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3220 if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3222 return FALSE;
3225 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3227 if (np > dd->comm->cd[d].np_dlb)
3229 return FALSE;
3232 /* If a current local cell size is smaller than the requested
3233 * cut-off, we could still fix it, but this gets very complicated.
3234 * Without fixing here, we might actually need more checks.
3236 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3237 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3239 LocallyLimited = 1;
3244 if (!isDlbDisabled(dd->comm))
3246 /* If DLB is not active yet, we don't need to check the grid jumps.
3247 * Actually we shouldn't, because then the grid jump data is not set.
3249 if (isDlbOn(dd->comm) &&
3250 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3252 LocallyLimited = 1;
3255 gmx_sumi(1, &LocallyLimited, cr);
3257 if (LocallyLimited > 0)
3259 return FALSE;
3263 return TRUE;
3266 gmx_bool change_dd_cutoff(t_commrec *cr,
3267 const matrix box,
3268 gmx::ArrayRef<const gmx::RVec> x,
3269 real cutoffRequested)
3271 gmx_bool bCutoffAllowed;
3273 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3275 if (bCutoffAllowed)
3277 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3280 return bCutoffAllowed;