Make PBC type enumeration into PbcType enum class
[gromacs.git] / src / gromacs / domdec / domdec.cpp
blob49460d9493e09f1d5367cf655203557864173afe
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "domdec.h"
41 #include "config.h"
43 #include <cassert>
44 #include <cinttypes>
45 #include <climits>
46 #include <cmath>
47 #include <cstdio>
48 #include <cstdlib>
49 #include <cstring>
51 #include <algorithm>
52 #include <memory>
54 #include "gromacs/domdec/builder.h"
55 #include "gromacs/domdec/collect.h"
56 #include "gromacs/domdec/dlb.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec_network.h"
59 #include "gromacs/domdec/ga2la.h"
60 #include "gromacs/domdec/gpuhaloexchange.h"
61 #include "gromacs/domdec/options.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/gmxlib/network.h"
64 #include "gromacs/gmxlib/nrnb.h"
65 #include "gromacs/gpu_utils/gpu_utils.h"
66 #include "gromacs/hardware/hw_info.h"
67 #include "gromacs/listed_forces/manage_threading.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/calc_verletbuf.h"
71 #include "gromacs/mdlib/constr.h"
72 #include "gromacs/mdlib/constraintrange.h"
73 #include "gromacs/mdlib/updategroups.h"
74 #include "gromacs/mdlib/vsite.h"
75 #include "gromacs/mdtypes/commrec.h"
76 #include "gromacs/mdtypes/forceoutput.h"
77 #include "gromacs/mdtypes/inputrec.h"
78 #include "gromacs/mdtypes/mdrunoptions.h"
79 #include "gromacs/mdtypes/state.h"
80 #include "gromacs/pbcutil/ishift.h"
81 #include "gromacs/pbcutil/pbc.h"
82 #include "gromacs/pulling/pull.h"
83 #include "gromacs/timing/wallcycle.h"
84 #include "gromacs/topology/block.h"
85 #include "gromacs/topology/idef.h"
86 #include "gromacs/topology/ifunc.h"
87 #include "gromacs/topology/mtop_lookup.h"
88 #include "gromacs/topology/mtop_util.h"
89 #include "gromacs/topology/topology.h"
90 #include "gromacs/utility/basedefinitions.h"
91 #include "gromacs/utility/basenetwork.h"
92 #include "gromacs/utility/cstringutil.h"
93 #include "gromacs/utility/exceptions.h"
94 #include "gromacs/utility/fatalerror.h"
95 #include "gromacs/utility/gmxmpi.h"
96 #include "gromacs/utility/logger.h"
97 #include "gromacs/utility/real.h"
98 #include "gromacs/utility/smalloc.h"
99 #include "gromacs/utility/strconvert.h"
100 #include "gromacs/utility/stringstream.h"
101 #include "gromacs/utility/stringutil.h"
102 #include "gromacs/utility/textwriter.h"
104 #include "atomdistribution.h"
105 #include "box.h"
106 #include "cellsizes.h"
107 #include "distribute.h"
108 #include "domdec_constraints.h"
109 #include "domdec_internal.h"
110 #include "domdec_setup.h"
111 #include "domdec_vsite.h"
112 #include "redistribute.h"
113 #include "utility.h"
115 // TODO remove this when moving domdec into gmx namespace
116 using gmx::DdRankOrder;
117 using gmx::DlbOption;
118 using gmx::DomdecOptions;
120 static const char* edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
122 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
123 #define DD_CGIBS 2
125 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
126 #define DD_FLAG_NRCG 65535
127 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
128 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
130 /* The DD zone order */
131 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
132 { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
134 /* The non-bonded zone-pair setup for domain decomposition
135 * The first number is the i-zone, the second number the first j-zone seen by
136 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
137 * As is, this is for 3D decomposition, where there are 4 i-zones.
138 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
139 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
141 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
142 { 1, 3, 6 },
143 { 2, 5, 6 },
144 { 3, 5, 7 } };
146 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
148 xyz[XX] = ind / (nc[YY] * nc[ZZ]);
149 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
150 xyz[ZZ] = ind % nc[ZZ];
153 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
155 int ddnodeid = -1;
157 const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
158 const int ddindex = dd_index(dd->numCells, c);
159 if (cartSetup.bCartesianPP_PME)
161 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
163 else if (cartSetup.bCartesianPP)
165 #if GMX_MPI
166 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
167 #endif
169 else
171 ddnodeid = ddindex;
174 return ddnodeid;
177 int ddglatnr(const gmx_domdec_t* dd, int i)
179 int atnr;
181 if (dd == nullptr)
183 atnr = i + 1;
185 else
187 if (i >= dd->comm->atomRanges.numAtomsTotal())
189 gmx_fatal(FARGS,
190 "glatnr called with %d, which is larger than the local number of atoms (%d)",
191 i, dd->comm->atomRanges.numAtomsTotal());
193 atnr = dd->globalAtomIndices[i] + 1;
196 return atnr;
199 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
201 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
202 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
205 void dd_store_state(gmx_domdec_t* dd, t_state* state)
207 int i;
209 if (state->ddp_count != dd->ddp_count)
211 gmx_incons("The MD state does not match the domain decomposition state");
214 state->cg_gl.resize(dd->ncg_home);
215 for (i = 0; i < dd->ncg_home; i++)
217 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
220 state->ddp_count_cg_gl = dd->ddp_count;
223 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
225 return &dd->comm->zones;
228 int dd_numAtomsZones(const gmx_domdec_t& dd)
230 return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
233 int dd_numHomeAtoms(const gmx_domdec_t& dd)
235 return dd.comm->atomRanges.numHomeAtoms();
238 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
240 /* We currently set mdatoms entries for all atoms:
241 * local + non-local + communicated for vsite + constraints
244 return dd->comm->atomRanges.numAtomsTotal();
247 int dd_natoms_vsite(const gmx_domdec_t* dd)
249 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
252 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
254 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
255 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
258 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
260 wallcycle_start(wcycle, ewcMOVEX);
262 int nzone, nat_tot;
263 gmx_domdec_comm_t* comm;
264 gmx_domdec_comm_dim_t* cd;
265 rvec shift = { 0, 0, 0 };
266 gmx_bool bPBC, bScrew;
268 comm = dd->comm;
270 nzone = 1;
271 nat_tot = comm->atomRanges.numHomeAtoms();
272 for (int d = 0; d < dd->ndim; d++)
274 bPBC = (dd->ci[dd->dim[d]] == 0);
275 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
276 if (bPBC)
278 copy_rvec(box[dd->dim[d]], shift);
280 cd = &comm->cd[d];
281 for (const gmx_domdec_ind_t& ind : cd->ind)
283 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
284 gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
285 int n = 0;
286 if (!bPBC)
288 for (int j : ind.index)
290 sendBuffer[n] = x[j];
291 n++;
294 else if (!bScrew)
296 for (int j : ind.index)
298 /* We need to shift the coordinates */
299 for (int d = 0; d < DIM; d++)
301 sendBuffer[n][d] = x[j][d] + shift[d];
303 n++;
306 else
308 for (int j : ind.index)
310 /* Shift x */
311 sendBuffer[n][XX] = x[j][XX] + shift[XX];
312 /* Rotate y and z.
313 * This operation requires a special shift force
314 * treatment, which is performed in calc_vir.
316 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
317 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
318 n++;
322 DDBufferAccess<gmx::RVec> receiveBufferAccess(
323 comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
325 gmx::ArrayRef<gmx::RVec> receiveBuffer;
326 if (cd->receiveInPlace)
328 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
330 else
332 receiveBuffer = receiveBufferAccess.buffer;
334 /* Send and receive the coordinates */
335 ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
337 if (!cd->receiveInPlace)
339 int j = 0;
340 for (int zone = 0; zone < nzone; zone++)
342 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
344 x[i] = receiveBuffer[j++];
348 nat_tot += ind.nrecv[nzone + 1];
350 nzone += nzone;
353 wallcycle_stop(wcycle, ewcMOVEX);
356 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
358 wallcycle_start(wcycle, ewcMOVEF);
360 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
361 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
363 gmx_domdec_comm_t& comm = *dd->comm;
364 int nzone = comm.zones.n / 2;
365 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
366 for (int d = dd->ndim - 1; d >= 0; d--)
368 /* Only forces in domains near the PBC boundaries need to
369 consider PBC in the treatment of fshift */
370 const bool shiftForcesNeedPbc =
371 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
372 const bool applyScrewPbc =
373 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
374 /* Determine which shift vector we need */
375 ivec vis = { 0, 0, 0 };
376 vis[dd->dim[d]] = 1;
377 const int is = IVEC2IS(vis);
379 /* Loop over the pulses */
380 const gmx_domdec_comm_dim_t& cd = comm.cd[d];
381 for (int p = cd.numPulses() - 1; p >= 0; p--)
383 const gmx_domdec_ind_t& ind = cd.ind[p];
384 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
385 gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
387 nat_tot -= ind.nrecv[nzone + 1];
389 DDBufferAccess<gmx::RVec> sendBufferAccess(
390 comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
392 gmx::ArrayRef<gmx::RVec> sendBuffer;
393 if (cd.receiveInPlace)
395 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
397 else
399 sendBuffer = sendBufferAccess.buffer;
400 int j = 0;
401 for (int zone = 0; zone < nzone; zone++)
403 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
405 sendBuffer[j++] = f[i];
409 /* Communicate the forces */
410 ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
411 /* Add the received forces */
412 int n = 0;
413 if (!shiftForcesNeedPbc)
415 for (int j : ind.index)
417 for (int d = 0; d < DIM; d++)
419 f[j][d] += receiveBuffer[n][d];
421 n++;
424 else if (!applyScrewPbc)
426 for (int j : ind.index)
428 for (int d = 0; d < DIM; d++)
430 f[j][d] += receiveBuffer[n][d];
432 /* Add this force to the shift force */
433 for (int d = 0; d < DIM; d++)
435 fshift[is][d] += receiveBuffer[n][d];
437 n++;
440 else
442 for (int j : ind.index)
444 /* Rotate the force */
445 f[j][XX] += receiveBuffer[n][XX];
446 f[j][YY] -= receiveBuffer[n][YY];
447 f[j][ZZ] -= receiveBuffer[n][ZZ];
448 if (shiftForcesNeedPbc)
450 /* Add this force to the shift force */
451 for (int d = 0; d < DIM; d++)
453 fshift[is][d] += receiveBuffer[n][d];
456 n++;
460 nzone /= 2;
462 wallcycle_stop(wcycle, ewcMOVEF);
465 /* Convenience function for extracting a real buffer from an rvec buffer
467 * To reduce the number of temporary communication buffers and avoid
468 * cache polution, we reuse gmx::RVec buffers for storing reals.
469 * This functions return a real buffer reference with the same number
470 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
472 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
474 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
477 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
479 int nzone, nat_tot;
480 gmx_domdec_comm_t* comm;
481 gmx_domdec_comm_dim_t* cd;
483 comm = dd->comm;
485 nzone = 1;
486 nat_tot = comm->atomRanges.numHomeAtoms();
487 for (int d = 0; d < dd->ndim; d++)
489 cd = &comm->cd[d];
490 for (const gmx_domdec_ind_t& ind : cd->ind)
492 /* Note: We provision for RVec instead of real, so a factor of 3
493 * more than needed. The buffer actually already has this size
494 * and we pass a plain pointer below, so this does not matter.
496 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
497 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
498 int n = 0;
499 for (int j : ind.index)
501 sendBuffer[n++] = v[j];
504 DDBufferAccess<gmx::RVec> receiveBufferAccess(
505 comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
507 gmx::ArrayRef<real> receiveBuffer;
508 if (cd->receiveInPlace)
510 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
512 else
514 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
516 /* Send and receive the data */
517 ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
518 if (!cd->receiveInPlace)
520 int j = 0;
521 for (int zone = 0; zone < nzone; zone++)
523 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
525 v[i] = receiveBuffer[j++];
529 nat_tot += ind.nrecv[nzone + 1];
531 nzone += nzone;
535 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
537 int nzone, nat_tot;
538 gmx_domdec_comm_t* comm;
539 gmx_domdec_comm_dim_t* cd;
541 comm = dd->comm;
543 nzone = comm->zones.n / 2;
544 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
545 for (int d = dd->ndim - 1; d >= 0; d--)
547 cd = &comm->cd[d];
548 for (int p = cd->numPulses() - 1; p >= 0; p--)
550 const gmx_domdec_ind_t& ind = cd->ind[p];
552 /* Note: We provision for RVec instead of real, so a factor of 3
553 * more than needed. The buffer actually already has this size
554 * and we typecast, so this works as intended.
556 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
557 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
558 nat_tot -= ind.nrecv[nzone + 1];
560 DDBufferAccess<gmx::RVec> sendBufferAccess(
561 comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
563 gmx::ArrayRef<real> sendBuffer;
564 if (cd->receiveInPlace)
566 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
568 else
570 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
571 int j = 0;
572 for (int zone = 0; zone < nzone; zone++)
574 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
576 sendBuffer[j++] = v[i];
580 /* Communicate the forces */
581 ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
582 /* Add the received forces */
583 int n = 0;
584 for (int j : ind.index)
586 v[j] += receiveBuffer[n];
587 n++;
590 nzone /= 2;
594 real dd_cutoff_multibody(const gmx_domdec_t* dd)
596 const gmx_domdec_comm_t& comm = *dd->comm;
597 const DDSystemInfo& systemInfo = comm.systemInfo;
599 real r = -1;
600 if (systemInfo.haveInterDomainMultiBodyBondeds)
602 if (comm.cutoff_mbody > 0)
604 r = comm.cutoff_mbody;
606 else
608 /* cutoff_mbody=0 means we do not have DLB */
609 r = comm.cellsize_min[dd->dim[0]];
610 for (int di = 1; di < dd->ndim; di++)
612 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
614 if (comm.systemInfo.filterBondedCommunication)
616 r = std::max(r, comm.cutoff_mbody);
618 else
620 r = std::min(r, systemInfo.cutoff);
625 return r;
628 real dd_cutoff_twobody(const gmx_domdec_t* dd)
630 real r_mb;
632 r_mb = dd_cutoff_multibody(dd);
634 return std::max(dd->comm->systemInfo.cutoff, r_mb);
638 static void dd_cart_coord2pmecoord(const DDRankSetup& ddRankSetup,
639 const CartesianRankSetup& cartSetup,
640 const ivec coord,
641 ivec coord_pme)
643 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
644 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
645 copy_ivec(coord, coord_pme);
646 coord_pme[cartSetup.cartpmedim] =
647 nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
650 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
651 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
653 const int npp = ddRankSetup.numPPRanks;
654 const int npme = ddRankSetup.numRanksDoingPme;
656 /* Here we assign a PME node to communicate with this DD node
657 * by assuming that the major index of both is x.
658 * We add npme/2 to obtain an even distribution.
660 return (ddCellIndex * npme + npme / 2) / npp;
663 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
665 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
667 int n = 0;
668 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
670 const int p0 = ddindex2pmeindex(ddRankSetup, i);
671 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
672 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
674 if (debug)
676 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
678 pmeRanks[n] = i + 1 + n;
679 n++;
683 return pmeRanks;
686 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
688 gmx_domdec_t* dd;
689 ivec coords;
690 int slab;
692 dd = cr->dd;
693 coords[XX] = x;
694 coords[YY] = y;
695 coords[ZZ] = z;
696 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
698 return slab;
701 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
703 const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
704 ivec coords = { x, y, z };
705 int nodeid = -1;
707 if (cartSetup.bCartesianPP_PME)
709 #if GMX_MPI
710 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
711 #endif
713 else
715 int ddindex = dd_index(cr->dd->numCells, coords);
716 if (cartSetup.bCartesianPP)
718 nodeid = cartSetup.ddindex2simnodeid[ddindex];
720 else
722 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
724 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
726 else
728 nodeid = ddindex;
733 return nodeid;
736 static int dd_simnode2pmenode(const DDRankSetup& ddRankSetup,
737 const CartesianRankSetup& cartSetup,
738 gmx::ArrayRef<const int> pmeRanks,
739 const t_commrec gmx_unused* cr,
740 const int sim_nodeid)
742 int pmenode = -1;
744 /* This assumes a uniform x domain decomposition grid cell size */
745 if (cartSetup.bCartesianPP_PME)
747 #if GMX_MPI
748 ivec coord, coord_pme;
749 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
750 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
752 /* This is a PP rank */
753 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
754 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
756 #endif
758 else if (cartSetup.bCartesianPP)
760 if (sim_nodeid < ddRankSetup.numPPRanks)
762 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
765 else
767 /* This assumes DD cells with identical x coordinates
768 * are numbered sequentially.
770 if (pmeRanks.empty())
772 if (sim_nodeid < ddRankSetup.numPPRanks)
774 /* The DD index equals the nodeid */
775 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
778 else
780 int i = 0;
781 while (sim_nodeid > pmeRanks[i])
783 i++;
785 if (sim_nodeid < pmeRanks[i])
787 pmenode = pmeRanks[i];
792 return pmenode;
795 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
797 if (dd != nullptr)
799 return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
801 else
803 return { 1, 1 };
807 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
809 const DDRankSetup& ddRankSetup = cr->dd->comm->ddRankSetup;
810 const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
811 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
812 "This function should only be called when PME-only ranks are in use");
813 std::vector<int> ddranks;
814 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
816 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
818 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
820 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
822 if (cartSetup.bCartesianPP_PME)
824 ivec coord = { x, y, z };
825 ivec coord_pme;
826 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
827 if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
828 && cr->dd->ci[ZZ] == coord_pme[ZZ])
830 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
833 else
835 /* The slab corresponds to the nodeid in the PME group */
836 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
838 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
844 return ddranks;
847 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
849 gmx_bool bReceive = TRUE;
851 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
852 if (ddRankSetup.usePmeOnlyRanks)
854 const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
855 if (cartSetup.bCartesianPP_PME)
857 #if GMX_MPI
858 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
859 ivec coords;
860 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
861 coords[cartSetup.cartpmedim]++;
862 if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
864 int rank;
865 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
866 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
868 /* This is not the last PP node for pmenode */
869 bReceive = FALSE;
872 #else
873 GMX_RELEASE_ASSERT(
874 false,
875 "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
876 #endif
878 else
880 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
881 if (cr->sim_nodeid + 1 < cr->nnodes
882 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
884 /* This is not the last PP node for pmenode */
885 bReceive = FALSE;
890 return bReceive;
893 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
895 gmx_domdec_comm_t* comm;
896 int i;
898 comm = dd->comm;
900 snew(*dim_f, dd->numCells[dim] + 1);
901 (*dim_f)[0] = 0;
902 for (i = 1; i < dd->numCells[dim]; i++)
904 if (comm->slb_frac[dim])
906 (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
908 else
910 (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
913 (*dim_f)[dd->numCells[dim]] = 1;
916 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
918 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
920 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
922 ddpme->dim = YY;
924 else
926 ddpme->dim = dimind;
928 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
930 ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
932 if (ddpme->nslab <= 1)
934 return;
937 const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
938 /* Determine for each PME slab the PP location range for dimension dim */
939 snew(ddpme->pp_min, ddpme->nslab);
940 snew(ddpme->pp_max, ddpme->nslab);
941 for (int slab = 0; slab < ddpme->nslab; slab++)
943 ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
944 ddpme->pp_max[slab] = 0;
946 for (int i = 0; i < dd->nnodes; i++)
948 ivec xyz;
949 ddindex2xyz(dd->numCells, i, xyz);
950 /* For y only use our y/z slab.
951 * This assumes that the PME x grid size matches the DD grid size.
953 if (dimind == 0 || xyz[XX] == dd->ci[XX])
955 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
956 int slab;
957 if (dimind == 0)
959 slab = pmeindex / nso;
961 else
963 slab = pmeindex % ddpme->nslab;
965 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
966 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
970 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
973 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
975 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
977 if (ddRankSetup.ddpme[0].dim == XX)
979 return ddRankSetup.ddpme[0].maxshift;
981 else
983 return 0;
987 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
989 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
991 if (ddRankSetup.ddpme[0].dim == YY)
993 return ddRankSetup.ddpme[0].maxshift;
995 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
997 return ddRankSetup.ddpme[1].maxshift;
999 else
1001 return 0;
1005 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1007 return dd.comm->systemInfo.haveSplitConstraints;
1010 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1012 return dd.comm->systemInfo.useUpdateGroups;
1015 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1017 /* Note that the cycles value can be incorrect, either 0 or some
1018 * extremely large value, when our thread migrated to another core
1019 * with an unsynchronized cycle counter. If this happens less often
1020 * that once per nstlist steps, this will not cause issues, since
1021 * we later subtract the maximum value from the sum over nstlist steps.
1022 * A zero count will slightly lower the total, but that's a small effect.
1023 * Note that the main purpose of the subtraction of the maximum value
1024 * is to avoid throwing off the load balancing when stalls occur due
1025 * e.g. system activity or network congestion.
1027 dd->comm->cycl[ddCycl] += cycles;
1028 dd->comm->cycl_n[ddCycl]++;
1029 if (cycles > dd->comm->cycl_max[ddCycl])
1031 dd->comm->cycl_max[ddCycl] = cycles;
1035 #if GMX_MPI
1036 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1038 MPI_Comm c_row;
1039 int dim, i, rank;
1040 ivec loc_c;
1041 gmx_bool bPartOfGroup = FALSE;
1043 dim = dd->dim[dim_ind];
1044 copy_ivec(loc, loc_c);
1045 for (i = 0; i < dd->numCells[dim]; i++)
1047 loc_c[dim] = i;
1048 rank = dd_index(dd->numCells, loc_c);
1049 if (rank == dd->rank)
1051 /* This process is part of the group */
1052 bPartOfGroup = TRUE;
1055 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1056 if (bPartOfGroup)
1058 dd->comm->mpi_comm_load[dim_ind] = c_row;
1059 if (!isDlbDisabled(dd->comm))
1061 DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1063 if (dd->ci[dim] == dd->master_ci[dim])
1065 /* This is the root process of this row */
1066 cellsizes.rowMaster = std::make_unique<RowMaster>();
1068 RowMaster& rowMaster = *cellsizes.rowMaster;
1069 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1070 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1071 rowMaster.isCellMin.resize(dd->numCells[dim]);
1072 if (dim_ind > 0)
1074 rowMaster.bounds.resize(dd->numCells[dim]);
1076 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1078 else
1080 /* This is not a root process, we only need to receive cell_f */
1081 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1084 if (dd->ci[dim] == dd->master_ci[dim])
1086 snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1090 #endif
1092 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1094 #if GMX_MPI
1095 int physicalnode_id_hash;
1096 gmx_domdec_t* dd;
1097 MPI_Comm mpi_comm_pp_physicalnode;
1099 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1101 /* Only ranks with short-ranged tasks (currently) use GPUs.
1102 * If we don't have GPUs assigned, there are no resources to share.
1104 return;
1107 physicalnode_id_hash = gmx_physicalnode_id_hash();
1109 dd = cr->dd;
1111 if (debug)
1113 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1114 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1115 physicalnode_id_hash, gpu_id);
1117 /* Split the PP communicator over the physical nodes */
1118 /* TODO: See if we should store this (before), as it's also used for
1119 * for the nodecomm summation.
1121 // TODO PhysicalNodeCommunicator could be extended/used to handle
1122 // the need for per-node per-group communicators.
1123 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1124 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1125 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1126 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1128 if (debug)
1130 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1133 /* Note that some ranks could share a GPU, while others don't */
1135 if (dd->comm->nrank_gpu_shared == 1)
1137 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1139 #else
1140 GMX_UNUSED_VALUE(cr);
1141 GMX_UNUSED_VALUE(gpu_id);
1142 #endif
1145 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1147 #if GMX_MPI
1148 int dim0, dim1, i, j;
1149 ivec loc;
1151 if (debug)
1153 fprintf(debug, "Making load communicators\n");
1156 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1157 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1159 if (dd->ndim == 0)
1161 return;
1164 clear_ivec(loc);
1165 make_load_communicator(dd, 0, loc);
1166 if (dd->ndim > 1)
1168 dim0 = dd->dim[0];
1169 for (i = 0; i < dd->numCells[dim0]; i++)
1171 loc[dim0] = i;
1172 make_load_communicator(dd, 1, loc);
1175 if (dd->ndim > 2)
1177 dim0 = dd->dim[0];
1178 for (i = 0; i < dd->numCells[dim0]; i++)
1180 loc[dim0] = i;
1181 dim1 = dd->dim[1];
1182 for (j = 0; j < dd->numCells[dim1]; j++)
1184 loc[dim1] = j;
1185 make_load_communicator(dd, 2, loc);
1190 if (debug)
1192 fprintf(debug, "Finished making load communicators\n");
1194 #endif
1197 /*! \brief Sets up the relation between neighboring domains and zones */
1198 static void setup_neighbor_relations(gmx_domdec_t* dd)
1200 int d, dim, m;
1201 ivec tmp, s;
1202 gmx_domdec_zones_t* zones;
1203 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1205 for (d = 0; d < dd->ndim; d++)
1207 dim = dd->dim[d];
1208 copy_ivec(dd->ci, tmp);
1209 tmp[dim] = (tmp[dim] + 1) % dd->numCells[dim];
1210 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1211 copy_ivec(dd->ci, tmp);
1212 tmp[dim] = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1213 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1214 if (debug)
1216 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1217 dd->neighbor[d][0], dd->neighbor[d][1]);
1221 int nzone = (1 << dd->ndim);
1222 int nizone = (1 << std::max(dd->ndim - 1, 0));
1223 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1225 zones = &dd->comm->zones;
1227 for (int i = 0; i < nzone; i++)
1229 m = 0;
1230 clear_ivec(zones->shift[i]);
1231 for (d = 0; d < dd->ndim; d++)
1233 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1237 zones->n = nzone;
1238 for (int i = 0; i < nzone; i++)
1240 for (d = 0; d < DIM; d++)
1242 s[d] = dd->ci[d] - zones->shift[i][d];
1243 if (s[d] < 0)
1245 s[d] += dd->numCells[d];
1247 else if (s[d] >= dd->numCells[d])
1249 s[d] -= dd->numCells[d];
1253 for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1255 GMX_RELEASE_ASSERT(
1256 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1257 "The first element for each ddNonbondedZonePairRanges should match its index");
1259 DDPairInteractionRanges iZone;
1260 iZone.iZoneIndex = iZoneIndex;
1261 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1262 * j-zones up to nzone.
1264 iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1265 std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1266 for (dim = 0; dim < DIM; dim++)
1268 if (dd->numCells[dim] == 1)
1270 /* All shifts should be allowed */
1271 iZone.shift0[dim] = -1;
1272 iZone.shift1[dim] = 1;
1274 else
1276 /* Determine the min/max j-zone shift wrt the i-zone */
1277 iZone.shift0[dim] = 1;
1278 iZone.shift1[dim] = -1;
1279 for (int jZone : iZone.jZoneRange)
1281 int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1282 if (shift_diff < iZone.shift0[dim])
1284 iZone.shift0[dim] = shift_diff;
1286 if (shift_diff > iZone.shift1[dim])
1288 iZone.shift1[dim] = shift_diff;
1294 zones->iZones.push_back(iZone);
1297 if (!isDlbDisabled(dd->comm))
1299 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1302 if (dd->comm->ddSettings.recordLoad)
1304 make_load_communicators(dd);
1308 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1309 gmx_domdec_t* dd,
1310 t_commrec gmx_unused* cr,
1311 bool gmx_unused reorder)
1313 #if GMX_MPI
1314 gmx_domdec_comm_t* comm = dd->comm;
1315 CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1317 if (cartSetup.bCartesianPP)
1319 /* Set up cartesian communication for the particle-particle part */
1320 GMX_LOG(mdlog.info)
1321 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1322 dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
1324 ivec periods;
1325 for (int i = 0; i < DIM; i++)
1327 periods[i] = TRUE;
1329 MPI_Comm comm_cart;
1330 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
1331 &comm_cart);
1332 /* We overwrite the old communicator with the new cartesian one */
1333 cr->mpi_comm_mygroup = comm_cart;
1336 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1337 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1339 if (cartSetup.bCartesianPP_PME)
1341 /* Since we want to use the original cartesian setup for sim,
1342 * and not the one after split, we need to make an index.
1344 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1345 cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1346 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1347 /* Get the rank of the DD master,
1348 * above we made sure that the master node is a PP node.
1350 int rank;
1351 if (MASTER(cr))
1353 rank = dd->rank;
1355 else
1357 rank = 0;
1359 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1361 else if (cartSetup.bCartesianPP)
1363 if (!comm->ddRankSetup.usePmeOnlyRanks)
1365 /* The PP communicator is also
1366 * the communicator for this simulation
1368 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1370 cr->nodeid = dd->rank;
1372 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1374 /* We need to make an index to go from the coordinates
1375 * to the nodeid of this simulation.
1377 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1378 std::vector<int> buf(dd->nnodes);
1379 if (thisRankHasDuty(cr, DUTY_PP))
1381 buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1383 /* Communicate the ddindex to simulation nodeid index */
1384 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1385 cr->mpi_comm_mysim);
1387 /* Determine the master coordinates and rank.
1388 * The DD master should be the same node as the master of this sim.
1390 for (int i = 0; i < dd->nnodes; i++)
1392 if (cartSetup.ddindex2simnodeid[i] == 0)
1394 ddindex2xyz(dd->numCells, i, dd->master_ci);
1395 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1398 if (debug)
1400 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1403 else
1405 /* No Cartesian communicators */
1406 /* We use the rank in dd->comm->all as DD index */
1407 ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1408 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1409 dd->masterrank = 0;
1410 clear_ivec(dd->master_ci);
1412 #endif
1414 GMX_LOG(mdlog.info)
1415 .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1416 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1417 if (debug)
1419 fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1420 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1424 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1426 #if GMX_MPI
1427 CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1429 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1431 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1432 std::vector<int> buf(dd->nnodes);
1433 if (thisRankHasDuty(cr, DUTY_PP))
1435 buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1437 /* Communicate the ddindex to simulation nodeid index */
1438 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1439 cr->mpi_comm_mysim);
1441 #else
1442 GMX_UNUSED_VALUE(dd);
1443 GMX_UNUSED_VALUE(cr);
1444 #endif
1447 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1448 t_commrec* cr,
1449 const DdRankOrder ddRankOrder,
1450 bool gmx_unused reorder,
1451 const DDRankSetup& ddRankSetup,
1452 ivec ddCellIndex,
1453 std::vector<int>* pmeRanks)
1455 CartesianRankSetup cartSetup;
1457 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1458 cartSetup.bCartesianPP_PME = false;
1460 const ivec& numDDCells = ddRankSetup.numPPCells;
1461 /* Initially we set ntot to the number of PP cells */
1462 copy_ivec(numDDCells, cartSetup.ntot);
1464 if (cartSetup.bCartesianPP)
1466 const int numDDCellsTot = ddRankSetup.numPPRanks;
1467 bool bDiv[DIM];
1468 for (int i = 1; i < DIM; i++)
1470 bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1472 if (bDiv[YY] || bDiv[ZZ])
1474 cartSetup.bCartesianPP_PME = TRUE;
1475 /* If we have 2D PME decomposition, which is always in x+y,
1476 * we stack the PME only nodes in z.
1477 * Otherwise we choose the direction that provides the thinnest slab
1478 * of PME only nodes as this will have the least effect
1479 * on the PP communication.
1480 * But for the PME communication the opposite might be better.
1482 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1484 cartSetup.cartpmedim = ZZ;
1486 else
1488 cartSetup.cartpmedim = YY;
1490 cartSetup.ntot[cartSetup.cartpmedim] +=
1491 (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1493 else
1495 GMX_LOG(mdlog.info)
1496 .appendTextFormatted(
1497 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1498 "nx*nz (%d*%d)",
1499 ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1500 numDDCells[XX], numDDCells[ZZ]);
1501 GMX_LOG(mdlog.info)
1502 .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1506 if (cartSetup.bCartesianPP_PME)
1508 #if GMX_MPI
1509 int rank;
1510 ivec periods;
1512 GMX_LOG(mdlog.info)
1513 .appendTextFormatted(
1514 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1515 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1517 for (int i = 0; i < DIM; i++)
1519 periods[i] = TRUE;
1521 MPI_Comm comm_cart;
1522 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1523 &comm_cart);
1524 MPI_Comm_rank(comm_cart, &rank);
1525 if (MASTER(cr) && rank != 0)
1527 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1530 /* With this assigment we loose the link to the original communicator
1531 * which will usually be MPI_COMM_WORLD, unless have multisim.
1533 cr->mpi_comm_mysim = comm_cart;
1534 cr->sim_nodeid = rank;
1536 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1538 GMX_LOG(mdlog.info)
1539 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1540 ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1542 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1544 cr->duty = DUTY_PP;
1546 if (!ddRankSetup.usePmeOnlyRanks
1547 || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1549 cr->duty = DUTY_PME;
1552 /* Split the sim communicator into PP and PME only nodes */
1553 MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1554 dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1555 #else
1556 GMX_UNUSED_VALUE(ddCellIndex);
1557 #endif
1559 else
1561 switch (ddRankOrder)
1563 case DdRankOrder::pp_pme:
1564 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1565 break;
1566 case DdRankOrder::interleave:
1567 /* Interleave the PP-only and PME-only ranks */
1568 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1569 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1570 break;
1571 case DdRankOrder::cartesian: break;
1572 default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1575 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1577 cr->duty = DUTY_PME;
1579 else
1581 cr->duty = DUTY_PP;
1583 #if GMX_MPI
1584 /* Split the sim communicator into PP and PME only nodes */
1585 MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1586 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1587 #endif
1590 GMX_LOG(mdlog.info)
1591 .appendTextFormatted("This rank does only %s work.\n",
1592 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1594 return cartSetup;
1597 /*! \brief Makes the PP communicator and the PME communicator, when needed
1599 * Returns the Cartesian rank setup.
1600 * Sets \p cr->mpi_comm_mygroup
1601 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1602 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1604 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1605 const DDSettings& ddSettings,
1606 const DdRankOrder ddRankOrder,
1607 const DDRankSetup& ddRankSetup,
1608 t_commrec* cr,
1609 ivec ddCellIndex,
1610 std::vector<int>* pmeRanks)
1612 CartesianRankSetup cartSetup;
1614 if (ddRankSetup.usePmeOnlyRanks)
1616 /* Split the communicator into a PP and PME part */
1617 cartSetup = split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1618 ddRankSetup, ddCellIndex, pmeRanks);
1620 else
1622 /* All nodes do PP and PME */
1623 /* We do not require separate communicators */
1624 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1626 cartSetup.bCartesianPP = false;
1627 cartSetup.bCartesianPP_PME = false;
1630 return cartSetup;
1633 /*! \brief For PP ranks, sets or makes the communicator
1635 * For PME ranks get the rank id.
1636 * For PP only ranks, sets the PME-only rank.
1638 static void setupGroupCommunication(const gmx::MDLogger& mdlog,
1639 const DDSettings& ddSettings,
1640 gmx::ArrayRef<const int> pmeRanks,
1641 t_commrec* cr,
1642 const int numAtomsInSystem,
1643 gmx_domdec_t* dd)
1645 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
1646 const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1648 if (thisRankHasDuty(cr, DUTY_PP))
1650 /* Copy or make a new PP communicator */
1652 /* We (possibly) reordered the nodes in split_communicator,
1653 * so it is no longer required in make_pp_communicator.
1655 const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1657 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1659 else
1661 receive_ddindex2simnodeid(dd, cr);
1664 if (!thisRankHasDuty(cr, DUTY_PME))
1666 /* Set up the commnuication to our PME node */
1667 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1668 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1669 if (debug)
1671 fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1672 gmx::boolToString(dd->pme_receive_vir_ener));
1675 else
1677 dd->pme_nodeid = -1;
1680 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1681 if (MASTER(cr))
1683 dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1687 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1689 real * slb_frac, tot;
1690 int i, n;
1691 double dbl;
1693 slb_frac = nullptr;
1694 if (nc > 1 && size_string != nullptr)
1696 GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1697 snew(slb_frac, nc);
1698 tot = 0;
1699 for (i = 0; i < nc; i++)
1701 dbl = 0;
1702 sscanf(size_string, "%20lf%n", &dbl, &n);
1703 if (dbl == 0)
1705 gmx_fatal(FARGS,
1706 "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1707 dir, size_string);
1709 slb_frac[i] = dbl;
1710 size_string += n;
1711 tot += slb_frac[i];
1713 /* Normalize */
1714 std::string relativeCellSizes = "Relative cell sizes:";
1715 for (i = 0; i < nc; i++)
1717 slb_frac[i] /= tot;
1718 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1720 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1723 return slb_frac;
1726 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1728 int n = 0;
1729 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1730 int nmol;
1731 while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1733 for (auto& ilist : extractILists(*ilists, IF_BOND))
1735 if (NRAL(ilist.functionType) > 2)
1737 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1742 return n;
1745 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1747 char* val;
1748 int nst;
1750 nst = def;
1751 val = getenv(env_var);
1752 if (val)
1754 if (sscanf(val, "%20d", &nst) <= 0)
1756 nst = 1;
1758 GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1761 return nst;
1764 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1766 if (ir->pbcType == PbcType::Screw
1767 && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1769 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1770 c_pbcTypeNames[ir->pbcType].c_str());
1773 if (ir->nstlist == 0)
1775 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1778 if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1780 GMX_LOG(mdlog.warning)
1781 .appendText(
1782 "comm-mode angular will give incorrect results when the comm group "
1783 "partially crosses a periodic boundary");
1787 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1789 real r = ddbox.box_size[XX];
1790 for (int d = 0; d < DIM; d++)
1792 if (numDomains[d] > 1)
1794 /* Check using the initial average cell size */
1795 r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1799 return r;
1802 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1804 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1805 const std::string& reasonStr,
1806 const gmx::MDLogger& mdlog)
1808 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1809 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1811 if (cmdlineDlbState == DlbState::onUser)
1813 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1815 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1817 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1819 return DlbState::offForever;
1822 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1824 * This function parses the parameters of "-dlb" command line option setting
1825 * corresponding state values. Then it checks the consistency of the determined
1826 * state with other run parameters and settings. As a result, the initial state
1827 * may be altered or an error may be thrown if incompatibility of options is detected.
1829 * \param [in] mdlog Logger.
1830 * \param [in] dlbOption Enum value for the DLB option.
1831 * \param [in] bRecordLoad True if the load balancer is recording load information.
1832 * \param [in] mdrunOptions Options for mdrun.
1833 * \param [in] ir Pointer mdrun to input parameters.
1834 * \returns DLB initial/startup state.
1836 static DlbState determineInitialDlbState(const gmx::MDLogger& mdlog,
1837 DlbOption dlbOption,
1838 gmx_bool bRecordLoad,
1839 const gmx::MdrunOptions& mdrunOptions,
1840 const t_inputrec* ir)
1842 DlbState dlbState = DlbState::offCanTurnOn;
1844 switch (dlbOption)
1846 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1847 case DlbOption::no: dlbState = DlbState::offUser; break;
1848 case DlbOption::yes: dlbState = DlbState::onUser; break;
1849 default: gmx_incons("Invalid dlbOption enum value");
1852 /* Reruns don't support DLB: bail or override auto mode */
1853 if (mdrunOptions.rerun)
1855 std::string reasonStr = "it is not supported in reruns.";
1856 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1859 /* Unsupported integrators */
1860 if (!EI_DYNAMICS(ir->eI))
1862 auto reasonStr = gmx::formatString(
1863 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1864 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1867 /* Without cycle counters we can't time work to balance on */
1868 if (!bRecordLoad)
1870 std::string reasonStr =
1871 "cycle counters unsupported or not enabled in the operating system kernel.";
1872 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1875 if (mdrunOptions.reproducible)
1877 std::string reasonStr = "you started a reproducible run.";
1878 switch (dlbState)
1880 case DlbState::offUser: break;
1881 case DlbState::offForever:
1882 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1883 break;
1884 case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1885 case DlbState::onCanTurnOff:
1886 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1887 break;
1888 case DlbState::onUser:
1889 return forceDlbOffOrBail(
1890 dlbState,
1891 reasonStr
1892 + " In load balanced runs binary reproducibility cannot be "
1893 "ensured.",
1894 mdlog);
1895 default:
1896 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1897 static_cast<int>(dlbState));
1901 return dlbState;
1904 static gmx_domdec_comm_t* init_dd_comm()
1906 gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1908 comm->n_load_have = 0;
1909 comm->n_load_collect = 0;
1911 comm->haveTurnedOffDlb = false;
1913 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1915 comm->sum_nat[i] = 0;
1917 comm->ndecomp = 0;
1918 comm->nload = 0;
1919 comm->load_step = 0;
1920 comm->load_sum = 0;
1921 comm->load_max = 0;
1922 clear_ivec(comm->load_lim);
1923 comm->load_mdf = 0;
1924 comm->load_pme = 0;
1926 /* This should be replaced by a unique pointer */
1927 comm->balanceRegion = ddBalanceRegionAllocate();
1929 return comm;
1932 /* Returns whether mtop contains constraints and/or vsites */
1933 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1935 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1936 int nmol;
1937 while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1939 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1941 return true;
1945 return false;
1948 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1949 const gmx_mtop_t& mtop,
1950 const t_inputrec& inputrec,
1951 const real cutoffMargin,
1952 DDSystemInfo* systemInfo)
1954 /* When we have constraints and/or vsites, it is beneficial to use
1955 * update groups (when possible) to allow independent update of groups.
1957 if (!systemHasConstraintsOrVsites(mtop))
1959 /* No constraints or vsites, atoms can be updated independently */
1960 return;
1963 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1964 systemInfo->useUpdateGroups = (!systemInfo->updateGroupingPerMoleculetype.empty()
1965 && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1967 if (systemInfo->useUpdateGroups)
1969 int numUpdateGroups = 0;
1970 for (const auto& molblock : mtop.molblock)
1972 numUpdateGroups += molblock.nmol
1973 * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1976 systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1977 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
1979 /* To use update groups, the large domain-to-domain cutoff distance
1980 * should be compatible with the box size.
1982 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1984 if (systemInfo->useUpdateGroups)
1986 GMX_LOG(mdlog.info)
1987 .appendTextFormatted(
1988 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1989 "nm\n",
1990 numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
1991 systemInfo->maxUpdateGroupRadius);
1993 else
1995 GMX_LOG(mdlog.info)
1996 .appendTextFormatted(
1997 "The combination of rlist and box size prohibits the use of update "
1998 "groups\n");
1999 systemInfo->updateGroupingPerMoleculetype.clear();
2004 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2005 npbcdim(numPbcDimensions(ir.pbcType)),
2006 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2007 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2008 haveScrewPBC(ir.pbcType == PbcType::Screw)
2012 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2013 static bool moleculesAreAlwaysWhole(const gmx_mtop_t& mtop,
2014 const bool useUpdateGroups,
2015 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2017 if (useUpdateGroups)
2019 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2020 "Need one grouping per moltype");
2021 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2023 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2025 return false;
2029 else
2031 for (const auto& moltype : mtop.moltype)
2033 if (moltype.atoms.nr > 1)
2035 return false;
2040 return true;
2043 /*! \brief Generate the simulation system information */
2044 static DDSystemInfo getSystemInfo(const gmx::MDLogger& mdlog,
2045 const t_commrec* cr,
2046 const DomdecOptions& options,
2047 const gmx_mtop_t& mtop,
2048 const t_inputrec& ir,
2049 const matrix box,
2050 gmx::ArrayRef<const gmx::RVec> xGlobal)
2052 const real tenPercentMargin = 1.1;
2054 DDSystemInfo systemInfo;
2056 /* We need to decide on update groups early, as this affects communication distances */
2057 systemInfo.useUpdateGroups = false;
2058 if (ir.cutoff_scheme == ecutsVERLET)
2060 real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2061 setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2064 systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2065 mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2066 systemInfo.haveInterDomainBondeds =
2067 (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2068 systemInfo.haveInterDomainMultiBodyBondeds =
2069 (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2071 if (systemInfo.useUpdateGroups)
2073 systemInfo.haveSplitConstraints = false;
2074 systemInfo.haveSplitSettles = false;
2076 else
2078 systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2079 || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2080 systemInfo.haveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2083 if (ir.rlist == 0)
2085 /* Set the cut-off to some very large value,
2086 * so we don't need if statements everywhere in the code.
2087 * We use sqrt, since the cut-off is squared in some places.
2089 systemInfo.cutoff = GMX_CUTOFF_INF;
2091 else
2093 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2095 systemInfo.minCutoffForMultiBody = 0;
2097 /* Determine the minimum cell size limit, affected by many factors */
2098 systemInfo.cellsizeLimit = 0;
2099 systemInfo.filterBondedCommunication = false;
2101 /* We do not allow home atoms to move beyond the neighboring domain
2102 * between domain decomposition steps, which limits the cell size.
2103 * Get an estimate of cell size limit due to atom displacement.
2104 * In most cases this is a large overestimate, because it assumes
2105 * non-interaction atoms.
2106 * We set the chance to 1 in a trillion steps.
2108 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2109 const real limitForAtomDisplacement = minCellSizeForAtomDisplacement(
2110 mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2111 GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2113 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2115 /* TODO: PME decomposition currently requires atoms not to be more than
2116 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2117 * In nearly all cases, limitForAtomDisplacement will be smaller
2118 * than 2/3*rlist, so the PME requirement is satisfied.
2119 * But it would be better for both correctness and performance
2120 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2121 * Note that we would need to improve the pairlist buffer case.
2124 if (systemInfo.haveInterDomainBondeds)
2126 if (options.minimumCommunicationRange > 0)
2128 systemInfo.minCutoffForMultiBody =
2129 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2130 if (options.useBondedCommunication)
2132 systemInfo.filterBondedCommunication =
2133 (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2135 else
2137 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2140 else if (ir.bPeriodicMols)
2142 /* Can not easily determine the required cut-off */
2143 GMX_LOG(mdlog.warning)
2144 .appendText(
2145 "NOTE: Periodic molecules are present in this system. Because of this, "
2146 "the domain decomposition algorithm cannot easily determine the "
2147 "minimum cell size that it requires for treating bonded interactions. "
2148 "Instead, domain decomposition will assume that half the non-bonded "
2149 "cut-off will be a suitable lower bound.");
2150 systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2152 else
2154 real r_2b, r_mb;
2156 if (MASTER(cr))
2158 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2159 options.checkBondedInteractions, &r_2b, &r_mb);
2161 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2162 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2164 /* We use an initial margin of 10% for the minimum cell size,
2165 * except when we are just below the non-bonded cut-off.
2167 if (options.useBondedCommunication)
2169 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2171 const real r_bonded = std::max(r_2b, r_mb);
2172 systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2173 /* This is the (only) place where we turn on the filtering */
2174 systemInfo.filterBondedCommunication = true;
2176 else
2178 const real r_bonded = r_mb;
2179 systemInfo.minCutoffForMultiBody =
2180 std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2182 /* We determine cutoff_mbody later */
2183 systemInfo.increaseMultiBodyCutoff = true;
2185 else
2187 /* No special bonded communication,
2188 * simply increase the DD cut-off.
2190 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2191 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2194 GMX_LOG(mdlog.info)
2195 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2196 systemInfo.minCutoffForMultiBody);
2198 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2201 systemInfo.constraintCommunicationRange = 0;
2202 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2204 /* There is a cell size limit due to the constraints (P-LINCS) */
2205 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2206 GMX_LOG(mdlog.info)
2207 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2208 systemInfo.constraintCommunicationRange);
2209 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2211 GMX_LOG(mdlog.info)
2212 .appendText(
2213 "This distance will limit the DD cell size, you can override this with "
2214 "-rcon");
2217 else if (options.constraintCommunicationRange > 0)
2219 /* Here we do not check for dd->splitConstraints.
2220 * because one can also set a cell size limit for virtual sites only
2221 * and at this point we don't know yet if there are intercg v-sites.
2223 GMX_LOG(mdlog.info)
2224 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2225 options.constraintCommunicationRange);
2226 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2228 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2230 return systemInfo;
2233 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2234 * implemented. */
2235 static void checkDDGridSetup(const DDGridSetup& ddGridSetup,
2236 const t_commrec* cr,
2237 const DomdecOptions& options,
2238 const DDSettings& ddSettings,
2239 const DDSystemInfo& systemInfo,
2240 const real cellsizeLimit,
2241 const gmx_ddbox_t& ddbox)
2243 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2245 char buf[STRLEN];
2246 gmx_bool bC = (systemInfo.haveSplitConstraints
2247 && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2248 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2249 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2250 bC ? " or your LINCS settings" : "");
2252 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2253 "There is no domain decomposition for %d ranks that is compatible "
2254 "with the given box and a minimum cell size of %g nm\n"
2255 "%s\n"
2256 "Look in the log file for details on the domain decomposition",
2257 cr->nnodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2260 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2261 if (acs < cellsizeLimit)
2263 if (options.numCells[XX] <= 0)
2265 GMX_RELEASE_ASSERT(
2266 false,
2267 "dd_choose_grid() should return a grid that satisfies the cell size limits");
2269 else
2271 gmx_fatal_collective(
2272 FARGS, cr->mpi_comm_mysim, MASTER(cr),
2273 "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2274 "options -dd, -rdd or -rcon, see the log file for details",
2275 acs, cellsizeLimit);
2279 const int numPPRanks =
2280 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2281 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2283 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2284 "The size of the domain decomposition grid (%d) does not match the "
2285 "number of PP ranks (%d). The total number of ranks is %d",
2286 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2288 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2290 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2291 "The number of separate PME ranks (%d) is larger than the number of "
2292 "PP ranks (%d), this is not supported.",
2293 ddGridSetup.numPmeOnlyRanks, numPPRanks);
2297 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2298 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2299 t_commrec* cr,
2300 const DDGridSetup& ddGridSetup,
2301 const t_inputrec& ir)
2303 GMX_LOG(mdlog.info)
2304 .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2305 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2306 ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2308 DDRankSetup ddRankSetup;
2310 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2311 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2313 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2314 if (ddRankSetup.usePmeOnlyRanks)
2316 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2318 else
2320 ddRankSetup.numRanksDoingPme =
2321 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2324 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2326 /* The following choices should match those
2327 * in comm_cost_est in domdec_setup.c.
2328 * Note that here the checks have to take into account
2329 * that the decomposition might occur in a different order than xyz
2330 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2331 * in which case they will not match those in comm_cost_est,
2332 * but since that is mainly for testing purposes that's fine.
2334 if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2335 && ddGridSetup.ddDimensions[1] == YY
2336 && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2337 && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2338 && getenv("GMX_PMEONEDD") == nullptr)
2340 ddRankSetup.npmedecompdim = 2;
2341 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2342 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2344 else
2346 /* In case nc is 1 in both x and y we could still choose to
2347 * decompose pme in y instead of x, but we use x for simplicity.
2349 ddRankSetup.npmedecompdim = 1;
2350 if (ddGridSetup.ddDimensions[0] == YY)
2352 ddRankSetup.npmenodes_x = 1;
2353 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2355 else
2357 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2358 ddRankSetup.npmenodes_y = 1;
2361 GMX_LOG(mdlog.info)
2362 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2363 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2365 else
2367 ddRankSetup.npmedecompdim = 0;
2368 ddRankSetup.npmenodes_x = 0;
2369 ddRankSetup.npmenodes_y = 0;
2372 return ddRankSetup;
2375 /*! \brief Set the cell size and interaction limits */
2376 static void set_dd_limits(const gmx::MDLogger& mdlog,
2377 t_commrec* cr,
2378 gmx_domdec_t* dd,
2379 const DomdecOptions& options,
2380 const DDSettings& ddSettings,
2381 const DDSystemInfo& systemInfo,
2382 const DDGridSetup& ddGridSetup,
2383 const int numPPRanks,
2384 const gmx_mtop_t* mtop,
2385 const t_inputrec* ir,
2386 const gmx_ddbox_t& ddbox)
2388 gmx_domdec_comm_t* comm = dd->comm;
2389 comm->ddSettings = ddSettings;
2391 /* Initialize to GPU share count to 0, might change later */
2392 comm->nrank_gpu_shared = 0;
2394 comm->dlbState = comm->ddSettings.initialDlbState;
2395 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2396 /* To consider turning DLB on after 2*nstlist steps we need to check
2397 * at partitioning count 3. Thus we need to increase the first count by 2.
2399 comm->ddPartioningCountFirstDlbOff += 2;
2401 comm->bPMELoadBalDLBLimits = FALSE;
2403 /* Allocate the charge group/atom sorting struct */
2404 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2406 comm->systemInfo = systemInfo;
2408 if (systemInfo.useUpdateGroups)
2410 /* Note: We would like to use dd->nnodes for the atom count estimate,
2411 * but that is not yet available here. But this anyhow only
2412 * affect performance up to the second dd_partition_system call.
2414 const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2415 comm->updateGroupsCog = std::make_unique<gmx::UpdateGroupsCog>(
2416 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2417 homeAtomCountEstimate);
2420 /* Set the DD setup given by ddGridSetup */
2421 copy_ivec(ddGridSetup.numDomains, dd->numCells);
2422 dd->ndim = ddGridSetup.numDDDimensions;
2423 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2425 dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2427 snew(comm->slb_frac, DIM);
2428 if (isDlbDisabled(comm))
2430 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2431 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2432 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2435 /* Set the multi-body cut-off and cellsize limit for DLB */
2436 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2437 comm->cellsize_limit = systemInfo.cellsizeLimit;
2438 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2440 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2442 /* Set the bonded communication distance to halfway
2443 * the minimum and the maximum,
2444 * since the extra communication cost is nearly zero.
2446 real acs = average_cellsize_min(ddbox, dd->numCells);
2447 comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2448 if (!isDlbDisabled(comm))
2450 /* Check if this does not limit the scaling */
2451 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2453 if (!systemInfo.filterBondedCommunication)
2455 /* Without bBondComm do not go beyond the n.b. cut-off */
2456 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2457 if (comm->cellsize_limit >= systemInfo.cutoff)
2459 /* We don't loose a lot of efficieny
2460 * when increasing it to the n.b. cut-off.
2461 * It can even be slightly faster, because we need
2462 * less checks for the communication setup.
2464 comm->cutoff_mbody = systemInfo.cutoff;
2467 /* Check if we did not end up below our original limit */
2468 comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2470 if (comm->cutoff_mbody > comm->cellsize_limit)
2472 comm->cellsize_limit = comm->cutoff_mbody;
2475 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2478 if (debug)
2480 fprintf(debug,
2481 "Bonded atom communication beyond the cut-off: %s\n"
2482 "cellsize limit %f\n",
2483 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2486 if (MASTER(cr))
2488 check_dd_restrictions(dd, ir, mdlog);
2492 void dd_init_bondeds(FILE* fplog,
2493 gmx_domdec_t* dd,
2494 const gmx_mtop_t& mtop,
2495 const gmx_vsite_t* vsite,
2496 const t_inputrec* ir,
2497 gmx_bool bBCheck,
2498 gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
2500 gmx_domdec_comm_t* comm;
2502 dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2504 comm = dd->comm;
2506 if (comm->systemInfo.filterBondedCommunication)
2508 /* Communicate atoms beyond the cut-off for bonded interactions */
2509 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2511 else
2513 /* Only communicate atoms based on cut-off */
2514 comm->bondedLinks = nullptr;
2518 static void writeSettings(gmx::TextWriter* log,
2519 gmx_domdec_t* dd,
2520 const gmx_mtop_t* mtop,
2521 const t_inputrec* ir,
2522 gmx_bool bDynLoadBal,
2523 real dlb_scale,
2524 const gmx_ddbox_t* ddbox)
2526 gmx_domdec_comm_t* comm;
2527 int d;
2528 ivec np;
2529 real limit, shrink;
2531 comm = dd->comm;
2533 if (bDynLoadBal)
2535 log->writeString("The maximum number of communication pulses is:");
2536 for (d = 0; d < dd->ndim; d++)
2538 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2540 log->ensureLineBreak();
2541 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2542 comm->cellsize_limit);
2543 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2544 log->writeString("The allowed shrink of domain decomposition cells is:");
2545 for (d = 0; d < DIM; d++)
2547 if (dd->numCells[d] > 1)
2549 if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2551 shrink = 0;
2553 else
2555 shrink = comm->cellsize_min_dlb[d]
2556 / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2558 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2561 log->ensureLineBreak();
2563 else
2565 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2566 log->writeString("The initial number of communication pulses is:");
2567 for (d = 0; d < dd->ndim; d++)
2569 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2571 log->ensureLineBreak();
2572 log->writeString("The initial domain decomposition cell size is:");
2573 for (d = 0; d < DIM; d++)
2575 if (dd->numCells[d] > 1)
2577 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2580 log->ensureLineBreak();
2581 log->writeLine();
2584 const bool haveInterDomainVsites =
2585 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2587 if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2588 || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2590 std::string decompUnits;
2591 if (comm->systemInfo.useUpdateGroups)
2593 decompUnits = "atom groups";
2595 else
2597 decompUnits = "atoms";
2600 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2601 decompUnits.c_str());
2602 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "",
2603 comm->systemInfo.cutoff);
2605 if (bDynLoadBal)
2607 limit = dd->comm->cellsize_limit;
2609 else
2611 if (dd->unitCellInfo.ddBoxIsDynamic)
2613 log->writeLine(
2614 "(the following are initial values, they could change due to box "
2615 "deformation)");
2617 limit = dd->comm->cellsize_min[XX];
2618 for (d = 1; d < DIM; d++)
2620 limit = std::min(limit, dd->comm->cellsize_min[d]);
2624 if (comm->systemInfo.haveInterDomainBondeds)
2626 log->writeLineFormatted("%40s %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2627 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2628 log->writeLineFormatted("%40s %-7s %6.3f nm", "multi-body bonded interactions",
2629 "(-rdd)",
2630 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2631 ? comm->cutoff_mbody
2632 : std::min(comm->systemInfo.cutoff, limit));
2634 if (haveInterDomainVsites)
2636 log->writeLineFormatted("%40s %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2638 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2640 std::string separation =
2641 gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2642 log->writeLineFormatted("%40s %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2644 log->ensureLineBreak();
2648 static void logSettings(const gmx::MDLogger& mdlog,
2649 gmx_domdec_t* dd,
2650 const gmx_mtop_t* mtop,
2651 const t_inputrec* ir,
2652 real dlb_scale,
2653 const gmx_ddbox_t* ddbox)
2655 gmx::StringOutputStream stream;
2656 gmx::TextWriter log(&stream);
2657 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2658 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2661 log.ensureEmptyLine();
2662 log.writeLine(
2663 "When dynamic load balancing gets turned on, these settings will change to:");
2665 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2667 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2670 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2671 gmx_domdec_t* dd,
2672 real dlb_scale,
2673 const t_inputrec* ir,
2674 const gmx_ddbox_t* ddbox)
2676 gmx_domdec_comm_t* comm;
2677 int d, dim, npulse, npulse_d_max, npulse_d;
2678 gmx_bool bNoCutOff;
2680 comm = dd->comm;
2682 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2684 /* Determine the maximum number of comm. pulses in one dimension */
2686 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2688 /* Determine the maximum required number of grid pulses */
2689 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2691 /* Only a single pulse is required */
2692 npulse = 1;
2694 else if (!bNoCutOff && comm->cellsize_limit > 0)
2696 /* We round down slightly here to avoid overhead due to the latency
2697 * of extra communication calls when the cut-off
2698 * would be only slightly longer than the cell size.
2699 * Later cellsize_limit is redetermined,
2700 * so we can not miss interactions due to this rounding.
2702 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2704 else
2706 /* There is no cell size limit */
2707 npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2710 if (!bNoCutOff && npulse > 1)
2712 /* See if we can do with less pulses, based on dlb_scale */
2713 npulse_d_max = 0;
2714 for (d = 0; d < dd->ndim; d++)
2716 dim = dd->dim[d];
2717 npulse_d = static_cast<int>(
2719 + dd->numCells[dim] * comm->systemInfo.cutoff
2720 / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2721 npulse_d_max = std::max(npulse_d_max, npulse_d);
2723 npulse = std::min(npulse, npulse_d_max);
2726 /* This env var can override npulse */
2727 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2728 if (d > 0)
2730 npulse = d;
2733 comm->maxpulse = 1;
2734 comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2735 for (d = 0; d < dd->ndim; d++)
2737 if (comm->ddSettings.request1DAnd1Pulse)
2739 comm->cd[d].np_dlb = 1;
2741 else
2743 comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2744 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2746 if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2748 comm->bVacDLBNoLimit = FALSE;
2752 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2753 if (!comm->bVacDLBNoLimit)
2755 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2757 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2758 /* Set the minimum cell size for each DD dimension */
2759 for (d = 0; d < dd->ndim; d++)
2761 if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2763 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2765 else
2767 comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2770 if (comm->cutoff_mbody <= 0)
2772 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2774 if (isDlbOn(comm))
2776 set_dlb_limits(dd);
2780 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2782 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2785 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2787 /* If each molecule is a single charge group
2788 * or we use domain decomposition for each periodic dimension,
2789 * we do not need to take pbc into account for the bonded interactions.
2791 return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2792 && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2793 && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2796 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2797 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2798 gmx_domdec_t* dd,
2799 real dlb_scale,
2800 const gmx_mtop_t* mtop,
2801 const t_inputrec* ir,
2802 const gmx_ddbox_t* ddbox)
2804 gmx_domdec_comm_t* comm = dd->comm;
2805 DDRankSetup& ddRankSetup = comm->ddRankSetup;
2807 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2809 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2810 if (ddRankSetup.npmedecompdim >= 2)
2812 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2815 else
2817 ddRankSetup.numRanksDoingPme = 0;
2818 if (dd->pme_nodeid >= 0)
2820 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2821 "Can not have separate PME ranks without PME electrostatics");
2825 if (debug)
2827 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2829 if (!isDlbDisabled(comm))
2831 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2834 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2836 real vol_frac;
2837 if (ir->pbcType == PbcType::No)
2839 vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2841 else
2843 vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2844 / static_cast<double>(dd->nnodes);
2846 if (debug)
2848 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2850 int natoms_tot = mtop->natoms;
2852 dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2855 /*! \brief Get some important DD parameters which can be modified by env.vars */
2856 static DDSettings getDDSettings(const gmx::MDLogger& mdlog,
2857 const DomdecOptions& options,
2858 const gmx::MdrunOptions& mdrunOptions,
2859 const t_inputrec& ir)
2861 DDSettings ddSettings;
2863 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2864 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2865 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2866 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2867 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2868 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2869 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2870 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2871 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2872 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2874 if (ddSettings.useSendRecv2)
2876 GMX_LOG(mdlog.info)
2877 .appendText(
2878 "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2879 "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2880 "communication");
2883 if (ddSettings.eFlop)
2885 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2886 ddSettings.recordLoad = true;
2888 else
2890 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2893 ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2894 ddSettings.recordLoad, mdrunOptions, &ir);
2895 GMX_LOG(mdlog.info)
2896 .appendTextFormatted("Dynamic load balancing: %s",
2897 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2899 return ddSettings;
2902 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2904 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2906 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2907 * generally requires a larger box than other possible decompositions
2908 * with the same rank count, so the calling code might need to decide
2909 * what is the most appropriate way to run the simulation based on
2910 * whether such a DD is possible.
2912 * This function works like init_domain_decomposition(), but will not
2913 * give a fatal error, and only uses \c cr for communicating between
2914 * ranks.
2916 * It is safe to call before thread-MPI spawns ranks, so that
2917 * thread-MPI can decide whether and how to trigger the GPU halo
2918 * exchange code path. The number of PME ranks, if any, should be set
2919 * in \c options.numPmeRanks.
2921 static bool canMake1DAnd1PulseDomainDecomposition(const DDSettings& ddSettingsOriginal,
2922 const t_commrec* cr,
2923 const int numRanksRequested,
2924 const DomdecOptions& options,
2925 const gmx_mtop_t& mtop,
2926 const t_inputrec& ir,
2927 const matrix box,
2928 gmx::ArrayRef<const gmx::RVec> xGlobal)
2930 // Ensure we don't write any output from this checking routine
2931 gmx::MDLogger dummyLogger;
2933 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2935 DDSettings ddSettings = ddSettingsOriginal;
2936 ddSettings.request1DAnd1Pulse = true;
2937 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
2938 dummyLogger, ddSettings.request1DAnd1Pulse, !isDlbDisabled(ddSettings.initialDlbState),
2939 options.dlbScaling, ir, systemInfo.cellsizeLimit);
2940 gmx_ddbox_t ddbox = { 0 };
2941 DDGridSetup ddGridSetup =
2942 getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
2943 gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
2945 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2947 return canMakeDDWith1DAnd1Pulse;
2950 bool is1DAnd1PulseDD(const gmx_domdec_t& dd)
2952 const int maxDimensionSize = std::max(dd.numCells[XX], std::max(dd.numCells[YY], dd.numCells[ZZ]));
2953 const int productOfDimensionSizes = dd.numCells[XX] * dd.numCells[YY] * dd.numCells[ZZ];
2954 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2956 const bool hasMax1Pulse =
2957 ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff)
2958 || (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
2960 return decompositionHasOneDimension && hasMax1Pulse;
2963 namespace gmx
2966 // TODO once the functionality stablizes, move this class and
2967 // supporting functionality into builder.cpp
2968 /*! \brief Impl class for DD builder */
2969 class DomainDecompositionBuilder::Impl
2971 public:
2972 //! Constructor
2973 Impl(const MDLogger& mdlog,
2974 t_commrec* cr,
2975 const DomdecOptions& options,
2976 const MdrunOptions& mdrunOptions,
2977 bool prefer1DAnd1Pulse,
2978 const gmx_mtop_t& mtop,
2979 const t_inputrec& ir,
2980 const matrix box,
2981 ArrayRef<const RVec> xGlobal);
2983 //! Build the resulting DD manager
2984 gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2986 //! Objects used in constructing and configuring DD
2987 //! {
2988 //! Logging object
2989 const MDLogger& mdlog_;
2990 //! Communication object
2991 t_commrec* cr_;
2992 //! User-supplied options configuring DD behavior
2993 const DomdecOptions options_;
2994 //! Global system topology
2995 const gmx_mtop_t& mtop_;
2996 //! User input values from the tpr file
2997 const t_inputrec& ir_;
2998 //! }
3000 //! Internal objects used in constructing DD
3001 //! {
3002 //! Settings combined from the user input
3003 DDSettings ddSettings_;
3004 //! Information derived from the simulation system
3005 DDSystemInfo systemInfo_;
3006 //! Box structure
3007 gmx_ddbox_t ddbox_ = { 0 };
3008 //! Organization of the DD grids
3009 DDGridSetup ddGridSetup_;
3010 //! Organzation of the DD ranks
3011 DDRankSetup ddRankSetup_;
3012 //! Number of DD cells in each dimension
3013 ivec ddCellIndex_ = { 0, 0, 0 };
3014 //! IDs of PME-only ranks
3015 std::vector<int> pmeRanks_;
3016 //! Contains a valid Cartesian-communicator-based setup, or defaults.
3017 CartesianRankSetup cartSetup_;
3018 //! }
3021 DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog,
3022 t_commrec* cr,
3023 const DomdecOptions& options,
3024 const MdrunOptions& mdrunOptions,
3025 const bool prefer1DAnd1Pulse,
3026 const gmx_mtop_t& mtop,
3027 const t_inputrec& ir,
3028 const matrix box,
3029 ArrayRef<const RVec> xGlobal) :
3030 mdlog_(mdlog),
3031 cr_(cr),
3032 options_(options),
3033 mtop_(mtop),
3034 ir_(ir)
3036 GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3038 ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3040 if (prefer1DAnd1Pulse
3041 && canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_,
3042 ir_, box, xGlobal))
3044 ddSettings_.request1DAnd1Pulse = true;
3047 if (ddSettings_.eFlop > 1)
3049 /* Ensure that we have different random flop counts on different ranks */
3050 srand(1 + cr_->nodeid);
3053 systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3055 const int numRanksRequested = cr_->nnodes;
3056 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3058 // DD grid setup uses a more different cell size limit for
3059 // automated setup than the one in systemInfo_. The latter is used
3060 // in set_dd_limits() to configure DLB, for example.
3061 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
3062 mdlog_, ddSettings_.request1DAnd1Pulse, !isDlbDisabled(ddSettings_.initialDlbState),
3063 options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3064 ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
3065 gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3066 checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3068 cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3070 ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3072 /* Generate the group communicator, also decides the duty of each rank */
3073 cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_,
3074 ddCellIndex_, &pmeRanks_);
3077 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3079 gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3081 copy_ivec(ddCellIndex_, dd->ci);
3083 dd->comm = init_dd_comm();
3085 dd->comm->ddRankSetup = ddRankSetup_;
3086 dd->comm->cartesianRankSetup = cartSetup_;
3088 set_dd_limits(mdlog_, cr_, dd, options_, ddSettings_, systemInfo_, ddGridSetup_,
3089 ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3091 setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3093 if (thisRankHasDuty(cr_, DUTY_PP))
3095 set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3097 setup_neighbor_relations(dd);
3100 /* Set overallocation to avoid frequent reallocation of arrays */
3101 set_over_alloc_dd(TRUE);
3103 dd->atomSets = atomSets;
3105 return dd;
3108 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& mdlog,
3109 t_commrec* cr,
3110 const DomdecOptions& options,
3111 const MdrunOptions& mdrunOptions,
3112 const bool prefer1DAnd1Pulse,
3113 const gmx_mtop_t& mtop,
3114 const t_inputrec& ir,
3115 const matrix box,
3116 ArrayRef<const RVec> xGlobal) :
3117 impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3121 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3123 return impl_->build(atomSets);
3126 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3128 } // namespace gmx
3130 static gmx_bool test_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3132 gmx_domdec_t* dd;
3133 gmx_ddbox_t ddbox;
3134 int d, dim, np;
3135 real inv_cell_size;
3136 int LocallyLimited;
3138 dd = cr->dd;
3140 set_ddbox(*dd, false, box, true, x, &ddbox);
3142 LocallyLimited = 0;
3144 for (d = 0; d < dd->ndim; d++)
3146 dim = dd->dim[d];
3148 inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3149 if (dd->unitCellInfo.ddBoxIsDynamic)
3151 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3154 np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3156 if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3158 return FALSE;
3161 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3163 if (np > dd->comm->cd[d].np_dlb)
3165 return FALSE;
3168 /* If a current local cell size is smaller than the requested
3169 * cut-off, we could still fix it, but this gets very complicated.
3170 * Without fixing here, we might actually need more checks.
3172 real cellSizeAlongDim =
3173 (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3174 if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3176 LocallyLimited = 1;
3181 if (!isDlbDisabled(dd->comm))
3183 /* If DLB is not active yet, we don't need to check the grid jumps.
3184 * Actually we shouldn't, because then the grid jump data is not set.
3186 if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3188 LocallyLimited = 1;
3191 gmx_sumi(1, &LocallyLimited, cr);
3193 if (LocallyLimited > 0)
3195 return FALSE;
3199 return TRUE;
3202 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3204 gmx_bool bCutoffAllowed;
3206 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3208 if (bCutoffAllowed)
3210 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3213 return bCutoffAllowed;