2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
5 * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
6 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
7 * Copyright (c) 2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
56 #include "gromacs/domdec/builder.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlb.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/ga2la.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/options.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/gmxlib/network.h"
66 #include "gromacs/gmxlib/nrnb.h"
67 #include "gromacs/gpu_utils/device_stream_manager.h"
68 #include "gromacs/gpu_utils/gpu_utils.h"
69 #include "gromacs/hardware/hw_info.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/math/vectypes.h"
72 #include "gromacs/mdlib/calc_verletbuf.h"
73 #include "gromacs/mdlib/constr.h"
74 #include "gromacs/mdlib/constraintrange.h"
75 #include "gromacs/mdlib/updategroups.h"
76 #include "gromacs/mdlib/vsite.h"
77 #include "gromacs/mdtypes/commrec.h"
78 #include "gromacs/mdtypes/forceoutput.h"
79 #include "gromacs/mdtypes/inputrec.h"
80 #include "gromacs/mdtypes/mdrunoptions.h"
81 #include "gromacs/mdtypes/state.h"
82 #include "gromacs/pbcutil/ishift.h"
83 #include "gromacs/pbcutil/pbc.h"
84 #include "gromacs/pulling/pull.h"
85 #include "gromacs/timing/wallcycle.h"
86 #include "gromacs/topology/block.h"
87 #include "gromacs/topology/idef.h"
88 #include "gromacs/topology/ifunc.h"
89 #include "gromacs/topology/mtop_lookup.h"
90 #include "gromacs/topology/mtop_util.h"
91 #include "gromacs/topology/topology.h"
92 #include "gromacs/utility/basedefinitions.h"
93 #include "gromacs/utility/basenetwork.h"
94 #include "gromacs/utility/cstringutil.h"
95 #include "gromacs/utility/exceptions.h"
96 #include "gromacs/utility/fatalerror.h"
97 #include "gromacs/utility/gmxmpi.h"
98 #include "gromacs/utility/logger.h"
99 #include "gromacs/utility/real.h"
100 #include "gromacs/utility/smalloc.h"
101 #include "gromacs/utility/strconvert.h"
102 #include "gromacs/utility/stringstream.h"
103 #include "gromacs/utility/stringutil.h"
104 #include "gromacs/utility/textwriter.h"
106 #include "atomdistribution.h"
108 #include "cellsizes.h"
109 #include "distribute.h"
110 #include "domdec_constraints.h"
111 #include "domdec_internal.h"
112 #include "domdec_setup.h"
113 #include "domdec_vsite.h"
114 #include "redistribute.h"
117 // TODO remove this when moving domdec into gmx namespace
118 using gmx::DdRankOrder
;
119 using gmx::DlbOption
;
120 using gmx::DomdecOptions
;
122 static const char* edlbs_names
[int(DlbState::nr
)] = { "off", "off", "auto", "locked", "on", "on" };
124 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
128 #define DD_FLAG_NRCG 65535
129 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
130 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
132 /* The DD zone order */
133 static const ivec dd_zo
[DD_MAXZONE
] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
134 { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
136 /* The non-bonded zone-pair setup for domain decomposition
137 * The first number is the i-zone, the second number the first j-zone seen by
138 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
139 * As is, this is for 3D decomposition, where there are 4 i-zones.
140 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
141 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
143 static const int ddNonbondedZonePairRanges
[DD_MAXIZONE
][3] = { { 0, 0, 8 },
148 static void ddindex2xyz(const ivec nc
, int ind
, ivec xyz
)
150 xyz
[XX
] = ind
/ (nc
[YY
] * nc
[ZZ
]);
151 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
152 xyz
[ZZ
] = ind
% nc
[ZZ
];
155 static int ddcoord2ddnodeid(gmx_domdec_t
* dd
, ivec c
)
159 const CartesianRankSetup
& cartSetup
= dd
->comm
->cartesianRankSetup
;
160 const int ddindex
= dd_index(dd
->numCells
, c
);
161 if (cartSetup
.bCartesianPP_PME
)
163 ddnodeid
= cartSetup
.ddindex2ddnodeid
[ddindex
];
165 else if (cartSetup
.bCartesianPP
)
168 MPI_Cart_rank(dd
->mpi_comm_all
, c
, &ddnodeid
);
179 int ddglatnr(const gmx_domdec_t
* dd
, int i
)
189 if (i
>= dd
->comm
->atomRanges
.numAtomsTotal())
192 "glatnr called with %d, which is larger than the local number of atoms (%d)",
193 i
, dd
->comm
->atomRanges
.numAtomsTotal());
195 atnr
= dd
->globalAtomIndices
[i
] + 1;
201 gmx::ArrayRef
<const gmx::RangePartitioning
> getUpdateGroupingPerMoleculetype(const gmx_domdec_t
& dd
)
203 GMX_RELEASE_ASSERT(dd
.comm
, "Need a valid dd.comm");
204 return dd
.comm
->systemInfo
.updateGroupingPerMoleculetype
;
207 void dd_store_state(gmx_domdec_t
* dd
, t_state
* state
)
211 if (state
->ddp_count
!= dd
->ddp_count
)
213 gmx_incons("The MD state does not match the domain decomposition state");
216 state
->cg_gl
.resize(dd
->ncg_home
);
217 for (i
= 0; i
< dd
->ncg_home
; i
++)
219 state
->cg_gl
[i
] = dd
->globalAtomGroupIndices
[i
];
222 state
->ddp_count_cg_gl
= dd
->ddp_count
;
225 gmx_domdec_zones_t
* domdec_zones(gmx_domdec_t
* dd
)
227 return &dd
->comm
->zones
;
230 int dd_numAtomsZones(const gmx_domdec_t
& dd
)
232 return dd
.comm
->atomRanges
.end(DDAtomRanges::Type::Zones
);
235 int dd_numHomeAtoms(const gmx_domdec_t
& dd
)
237 return dd
.comm
->atomRanges
.numHomeAtoms();
240 int dd_natoms_mdatoms(const gmx_domdec_t
* dd
)
242 /* We currently set mdatoms entries for all atoms:
243 * local + non-local + communicated for vsite + constraints
246 return dd
->comm
->atomRanges
.numAtomsTotal();
249 int dd_natoms_vsite(const gmx_domdec_t
* dd
)
251 return dd
->comm
->atomRanges
.end(DDAtomRanges::Type::Vsites
);
254 void dd_get_constraint_range(const gmx_domdec_t
* dd
, int* at_start
, int* at_end
)
256 *at_start
= dd
->comm
->atomRanges
.start(DDAtomRanges::Type::Constraints
);
257 *at_end
= dd
->comm
->atomRanges
.end(DDAtomRanges::Type::Constraints
);
260 void dd_move_x(gmx_domdec_t
* dd
, const matrix box
, gmx::ArrayRef
<gmx::RVec
> x
, gmx_wallcycle
* wcycle
)
262 wallcycle_start(wcycle
, ewcMOVEX
);
265 gmx_domdec_comm_t
* comm
;
266 gmx_domdec_comm_dim_t
* cd
;
267 rvec shift
= { 0, 0, 0 };
268 gmx_bool bPBC
, bScrew
;
273 nat_tot
= comm
->atomRanges
.numHomeAtoms();
274 for (int d
= 0; d
< dd
->ndim
; d
++)
276 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
277 bScrew
= (bPBC
&& dd
->unitCellInfo
.haveScrewPBC
&& dd
->dim
[d
] == XX
);
280 copy_rvec(box
[dd
->dim
[d
]], shift
);
283 for (const gmx_domdec_ind_t
& ind
: cd
->ind
)
285 DDBufferAccess
<gmx::RVec
> sendBufferAccess(comm
->rvecBuffer
, ind
.nsend
[nzone
+ 1]);
286 gmx::ArrayRef
<gmx::RVec
>& sendBuffer
= sendBufferAccess
.buffer
;
290 for (int j
: ind
.index
)
292 sendBuffer
[n
] = x
[j
];
298 for (int j
: ind
.index
)
300 /* We need to shift the coordinates */
301 for (int d
= 0; d
< DIM
; d
++)
303 sendBuffer
[n
][d
] = x
[j
][d
] + shift
[d
];
310 for (int j
: ind
.index
)
313 sendBuffer
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
315 * This operation requires a special shift force
316 * treatment, which is performed in calc_vir.
318 sendBuffer
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
319 sendBuffer
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
324 DDBufferAccess
<gmx::RVec
> receiveBufferAccess(
325 comm
->rvecBuffer2
, cd
->receiveInPlace
? 0 : ind
.nrecv
[nzone
+ 1]);
327 gmx::ArrayRef
<gmx::RVec
> receiveBuffer
;
328 if (cd
->receiveInPlace
)
330 receiveBuffer
= gmx::arrayRefFromArray(x
.data() + nat_tot
, ind
.nrecv
[nzone
+ 1]);
334 receiveBuffer
= receiveBufferAccess
.buffer
;
336 /* Send and receive the coordinates */
337 ddSendrecv(dd
, d
, dddirBackward
, sendBuffer
, receiveBuffer
);
339 if (!cd
->receiveInPlace
)
342 for (int zone
= 0; zone
< nzone
; zone
++)
344 for (int i
= ind
.cell2at0
[zone
]; i
< ind
.cell2at1
[zone
]; i
++)
346 x
[i
] = receiveBuffer
[j
++];
350 nat_tot
+= ind
.nrecv
[nzone
+ 1];
355 wallcycle_stop(wcycle
, ewcMOVEX
);
358 void dd_move_f(gmx_domdec_t
* dd
, gmx::ForceWithShiftForces
* forceWithShiftForces
, gmx_wallcycle
* wcycle
)
360 wallcycle_start(wcycle
, ewcMOVEF
);
362 gmx::ArrayRef
<gmx::RVec
> f
= forceWithShiftForces
->force();
363 gmx::ArrayRef
<gmx::RVec
> fshift
= forceWithShiftForces
->shiftForces();
365 gmx_domdec_comm_t
& comm
= *dd
->comm
;
366 int nzone
= comm
.zones
.n
/ 2;
367 int nat_tot
= comm
.atomRanges
.end(DDAtomRanges::Type::Zones
);
368 for (int d
= dd
->ndim
- 1; d
>= 0; d
--)
370 /* Only forces in domains near the PBC boundaries need to
371 consider PBC in the treatment of fshift */
372 const bool shiftForcesNeedPbc
=
373 (forceWithShiftForces
->computeVirial() && dd
->ci
[dd
->dim
[d
]] == 0);
374 const bool applyScrewPbc
=
375 (shiftForcesNeedPbc
&& dd
->unitCellInfo
.haveScrewPBC
&& dd
->dim
[d
] == XX
);
376 /* Determine which shift vector we need */
377 ivec vis
= { 0, 0, 0 };
379 const int is
= IVEC2IS(vis
);
381 /* Loop over the pulses */
382 const gmx_domdec_comm_dim_t
& cd
= comm
.cd
[d
];
383 for (int p
= cd
.numPulses() - 1; p
>= 0; p
--)
385 const gmx_domdec_ind_t
& ind
= cd
.ind
[p
];
386 DDBufferAccess
<gmx::RVec
> receiveBufferAccess(comm
.rvecBuffer
, ind
.nsend
[nzone
+ 1]);
387 gmx::ArrayRef
<gmx::RVec
>& receiveBuffer
= receiveBufferAccess
.buffer
;
389 nat_tot
-= ind
.nrecv
[nzone
+ 1];
391 DDBufferAccess
<gmx::RVec
> sendBufferAccess(
392 comm
.rvecBuffer2
, cd
.receiveInPlace
? 0 : ind
.nrecv
[nzone
+ 1]);
394 gmx::ArrayRef
<gmx::RVec
> sendBuffer
;
395 if (cd
.receiveInPlace
)
397 sendBuffer
= gmx::arrayRefFromArray(f
.data() + nat_tot
, ind
.nrecv
[nzone
+ 1]);
401 sendBuffer
= sendBufferAccess
.buffer
;
403 for (int zone
= 0; zone
< nzone
; zone
++)
405 for (int i
= ind
.cell2at0
[zone
]; i
< ind
.cell2at1
[zone
]; i
++)
407 sendBuffer
[j
++] = f
[i
];
411 /* Communicate the forces */
412 ddSendrecv(dd
, d
, dddirForward
, sendBuffer
, receiveBuffer
);
413 /* Add the received forces */
415 if (!shiftForcesNeedPbc
)
417 for (int j
: ind
.index
)
419 for (int d
= 0; d
< DIM
; d
++)
421 f
[j
][d
] += receiveBuffer
[n
][d
];
426 else if (!applyScrewPbc
)
428 for (int j
: ind
.index
)
430 for (int d
= 0; d
< DIM
; d
++)
432 f
[j
][d
] += receiveBuffer
[n
][d
];
434 /* Add this force to the shift force */
435 for (int d
= 0; d
< DIM
; d
++)
437 fshift
[is
][d
] += receiveBuffer
[n
][d
];
444 for (int j
: ind
.index
)
446 /* Rotate the force */
447 f
[j
][XX
] += receiveBuffer
[n
][XX
];
448 f
[j
][YY
] -= receiveBuffer
[n
][YY
];
449 f
[j
][ZZ
] -= receiveBuffer
[n
][ZZ
];
450 if (shiftForcesNeedPbc
)
452 /* Add this force to the shift force */
453 for (int d
= 0; d
< DIM
; d
++)
455 fshift
[is
][d
] += receiveBuffer
[n
][d
];
464 wallcycle_stop(wcycle
, ewcMOVEF
);
467 /* Convenience function for extracting a real buffer from an rvec buffer
469 * To reduce the number of temporary communication buffers and avoid
470 * cache polution, we reuse gmx::RVec buffers for storing reals.
471 * This functions return a real buffer reference with the same number
472 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
474 static gmx::ArrayRef
<real
> realArrayRefFromRvecArrayRef(gmx::ArrayRef
<gmx::RVec
> arrayRef
)
476 return gmx::arrayRefFromArray(as_rvec_array(arrayRef
.data())[0], arrayRef
.size());
479 void dd_atom_spread_real(gmx_domdec_t
* dd
, real v
[])
482 gmx_domdec_comm_t
* comm
;
483 gmx_domdec_comm_dim_t
* cd
;
488 nat_tot
= comm
->atomRanges
.numHomeAtoms();
489 for (int d
= 0; d
< dd
->ndim
; d
++)
492 for (const gmx_domdec_ind_t
& ind
: cd
->ind
)
494 /* Note: We provision for RVec instead of real, so a factor of 3
495 * more than needed. The buffer actually already has this size
496 * and we pass a plain pointer below, so this does not matter.
498 DDBufferAccess
<gmx::RVec
> sendBufferAccess(comm
->rvecBuffer
, ind
.nsend
[nzone
+ 1]);
499 gmx::ArrayRef
<real
> sendBuffer
= realArrayRefFromRvecArrayRef(sendBufferAccess
.buffer
);
501 for (int j
: ind
.index
)
503 sendBuffer
[n
++] = v
[j
];
506 DDBufferAccess
<gmx::RVec
> receiveBufferAccess(
507 comm
->rvecBuffer2
, cd
->receiveInPlace
? 0 : ind
.nrecv
[nzone
+ 1]);
509 gmx::ArrayRef
<real
> receiveBuffer
;
510 if (cd
->receiveInPlace
)
512 receiveBuffer
= gmx::arrayRefFromArray(v
+ nat_tot
, ind
.nrecv
[nzone
+ 1]);
516 receiveBuffer
= realArrayRefFromRvecArrayRef(receiveBufferAccess
.buffer
);
518 /* Send and receive the data */
519 ddSendrecv(dd
, d
, dddirBackward
, sendBuffer
, receiveBuffer
);
520 if (!cd
->receiveInPlace
)
523 for (int zone
= 0; zone
< nzone
; zone
++)
525 for (int i
= ind
.cell2at0
[zone
]; i
< ind
.cell2at1
[zone
]; i
++)
527 v
[i
] = receiveBuffer
[j
++];
531 nat_tot
+= ind
.nrecv
[nzone
+ 1];
537 void dd_atom_sum_real(gmx_domdec_t
* dd
, real v
[])
540 gmx_domdec_comm_t
* comm
;
541 gmx_domdec_comm_dim_t
* cd
;
545 nzone
= comm
->zones
.n
/ 2;
546 nat_tot
= comm
->atomRanges
.end(DDAtomRanges::Type::Zones
);
547 for (int d
= dd
->ndim
- 1; d
>= 0; d
--)
550 for (int p
= cd
->numPulses() - 1; p
>= 0; p
--)
552 const gmx_domdec_ind_t
& ind
= cd
->ind
[p
];
554 /* Note: We provision for RVec instead of real, so a factor of 3
555 * more than needed. The buffer actually already has this size
556 * and we typecast, so this works as intended.
558 DDBufferAccess
<gmx::RVec
> receiveBufferAccess(comm
->rvecBuffer
, ind
.nsend
[nzone
+ 1]);
559 gmx::ArrayRef
<real
> receiveBuffer
= realArrayRefFromRvecArrayRef(receiveBufferAccess
.buffer
);
560 nat_tot
-= ind
.nrecv
[nzone
+ 1];
562 DDBufferAccess
<gmx::RVec
> sendBufferAccess(
563 comm
->rvecBuffer2
, cd
->receiveInPlace
? 0 : ind
.nrecv
[nzone
+ 1]);
565 gmx::ArrayRef
<real
> sendBuffer
;
566 if (cd
->receiveInPlace
)
568 sendBuffer
= gmx::arrayRefFromArray(v
+ nat_tot
, ind
.nrecv
[nzone
+ 1]);
572 sendBuffer
= realArrayRefFromRvecArrayRef(sendBufferAccess
.buffer
);
574 for (int zone
= 0; zone
< nzone
; zone
++)
576 for (int i
= ind
.cell2at0
[zone
]; i
< ind
.cell2at1
[zone
]; i
++)
578 sendBuffer
[j
++] = v
[i
];
582 /* Communicate the forces */
583 ddSendrecv(dd
, d
, dddirForward
, sendBuffer
, receiveBuffer
);
584 /* Add the received forces */
586 for (int j
: ind
.index
)
588 v
[j
] += receiveBuffer
[n
];
596 real
dd_cutoff_multibody(const gmx_domdec_t
* dd
)
598 const gmx_domdec_comm_t
& comm
= *dd
->comm
;
599 const DDSystemInfo
& systemInfo
= comm
.systemInfo
;
602 if (systemInfo
.haveInterDomainMultiBodyBondeds
)
604 if (comm
.cutoff_mbody
> 0)
606 r
= comm
.cutoff_mbody
;
610 /* cutoff_mbody=0 means we do not have DLB */
611 r
= comm
.cellsize_min
[dd
->dim
[0]];
612 for (int di
= 1; di
< dd
->ndim
; di
++)
614 r
= std::min(r
, comm
.cellsize_min
[dd
->dim
[di
]]);
616 if (comm
.systemInfo
.filterBondedCommunication
)
618 r
= std::max(r
, comm
.cutoff_mbody
);
622 r
= std::min(r
, systemInfo
.cutoff
);
630 real
dd_cutoff_twobody(const gmx_domdec_t
* dd
)
634 r_mb
= dd_cutoff_multibody(dd
);
636 return std::max(dd
->comm
->systemInfo
.cutoff
, r_mb
);
640 static void dd_cart_coord2pmecoord(const DDRankSetup
& ddRankSetup
,
641 const CartesianRankSetup
& cartSetup
,
645 const int nc
= ddRankSetup
.numPPCells
[cartSetup
.cartpmedim
];
646 const int ntot
= cartSetup
.ntot
[cartSetup
.cartpmedim
];
647 copy_ivec(coord
, coord_pme
);
648 coord_pme
[cartSetup
.cartpmedim
] =
649 nc
+ (coord
[cartSetup
.cartpmedim
] * (ntot
- nc
) + (ntot
- nc
) / 2) / nc
;
652 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
653 static int ddindex2pmeindex(const DDRankSetup
& ddRankSetup
, const int ddCellIndex
)
655 const int npp
= ddRankSetup
.numPPRanks
;
656 const int npme
= ddRankSetup
.numRanksDoingPme
;
658 /* Here we assign a PME node to communicate with this DD node
659 * by assuming that the major index of both is x.
660 * We add npme/2 to obtain an even distribution.
662 return (ddCellIndex
* npme
+ npme
/ 2) / npp
;
665 static std::vector
<int> dd_interleaved_pme_ranks(const DDRankSetup
& ddRankSetup
)
667 std::vector
<int> pmeRanks(ddRankSetup
.numRanksDoingPme
);
670 for (int i
= 0; i
< ddRankSetup
.numPPRanks
; i
++)
672 const int p0
= ddindex2pmeindex(ddRankSetup
, i
);
673 const int p1
= ddindex2pmeindex(ddRankSetup
, i
+ 1);
674 if (i
+ 1 == ddRankSetup
.numPPRanks
|| p1
> p0
)
678 fprintf(debug
, "pme_rank[%d] = %d\n", n
, i
+ 1 + n
);
680 pmeRanks
[n
] = i
+ 1 + n
;
688 static int gmx_ddcoord2pmeindex(const t_commrec
* cr
, int x
, int y
, int z
)
698 slab
= ddindex2pmeindex(dd
->comm
->ddRankSetup
, dd_index(dd
->numCells
, coords
));
703 static int ddcoord2simnodeid(const t_commrec
* cr
, int x
, int y
, int z
)
705 const CartesianRankSetup
& cartSetup
= cr
->dd
->comm
->cartesianRankSetup
;
706 ivec coords
= { x
, y
, z
};
709 if (cartSetup
.bCartesianPP_PME
)
712 MPI_Cart_rank(cr
->mpi_comm_mysim
, coords
, &nodeid
);
717 int ddindex
= dd_index(cr
->dd
->numCells
, coords
);
718 if (cartSetup
.bCartesianPP
)
720 nodeid
= cartSetup
.ddindex2simnodeid
[ddindex
];
724 if (cr
->dd
->comm
->ddRankSetup
.usePmeOnlyRanks
)
726 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
, x
, y
, z
);
738 static int dd_simnode2pmenode(const DDRankSetup
& ddRankSetup
,
739 const CartesianRankSetup
& cartSetup
,
740 gmx::ArrayRef
<const int> pmeRanks
,
741 const t_commrec gmx_unused
* cr
,
742 const int sim_nodeid
)
746 /* This assumes a uniform x domain decomposition grid cell size */
747 if (cartSetup
.bCartesianPP_PME
)
750 ivec coord
, coord_pme
;
751 MPI_Cart_coords(cr
->mpi_comm_mysim
, sim_nodeid
, DIM
, coord
);
752 if (coord
[cartSetup
.cartpmedim
] < ddRankSetup
.numPPCells
[cartSetup
.cartpmedim
])
754 /* This is a PP rank */
755 dd_cart_coord2pmecoord(ddRankSetup
, cartSetup
, coord
, coord_pme
);
756 MPI_Cart_rank(cr
->mpi_comm_mysim
, coord_pme
, &pmenode
);
760 else if (cartSetup
.bCartesianPP
)
762 if (sim_nodeid
< ddRankSetup
.numPPRanks
)
764 pmenode
= ddRankSetup
.numPPRanks
+ ddindex2pmeindex(ddRankSetup
, sim_nodeid
);
769 /* This assumes DD cells with identical x coordinates
770 * are numbered sequentially.
772 if (pmeRanks
.empty())
774 if (sim_nodeid
< ddRankSetup
.numPPRanks
)
776 /* The DD index equals the nodeid */
777 pmenode
= ddRankSetup
.numPPRanks
+ ddindex2pmeindex(ddRankSetup
, sim_nodeid
);
783 while (sim_nodeid
> pmeRanks
[i
])
787 if (sim_nodeid
< pmeRanks
[i
])
789 pmenode
= pmeRanks
[i
];
797 NumPmeDomains
getNumPmeDomains(const gmx_domdec_t
* dd
)
801 return { dd
->comm
->ddRankSetup
.npmenodes_x
, dd
->comm
->ddRankSetup
.npmenodes_y
};
809 std::vector
<int> get_pme_ddranks(const t_commrec
* cr
, const int pmenodeid
)
811 const DDRankSetup
& ddRankSetup
= cr
->dd
->comm
->ddRankSetup
;
812 const CartesianRankSetup
& cartSetup
= cr
->dd
->comm
->cartesianRankSetup
;
813 GMX_RELEASE_ASSERT(ddRankSetup
.usePmeOnlyRanks
,
814 "This function should only be called when PME-only ranks are in use");
815 std::vector
<int> ddranks
;
816 ddranks
.reserve((ddRankSetup
.numPPRanks
+ ddRankSetup
.numRanksDoingPme
- 1) / ddRankSetup
.numRanksDoingPme
);
818 for (int x
= 0; x
< ddRankSetup
.numPPCells
[XX
]; x
++)
820 for (int y
= 0; y
< ddRankSetup
.numPPCells
[YY
]; y
++)
822 for (int z
= 0; z
< ddRankSetup
.numPPCells
[ZZ
]; z
++)
824 if (cartSetup
.bCartesianPP_PME
)
826 ivec coord
= { x
, y
, z
};
828 dd_cart_coord2pmecoord(ddRankSetup
, cartSetup
, coord
, coord_pme
);
829 if (cr
->dd
->ci
[XX
] == coord_pme
[XX
] && cr
->dd
->ci
[YY
] == coord_pme
[YY
]
830 && cr
->dd
->ci
[ZZ
] == coord_pme
[ZZ
])
832 ddranks
.push_back(ddcoord2simnodeid(cr
, x
, y
, z
));
837 /* The slab corresponds to the nodeid in the PME group */
838 if (gmx_ddcoord2pmeindex(cr
, x
, y
, z
) == pmenodeid
)
840 ddranks
.push_back(ddcoord2simnodeid(cr
, x
, y
, z
));
849 static gmx_bool
receive_vir_ener(const gmx_domdec_t
* dd
, gmx::ArrayRef
<const int> pmeRanks
, const t_commrec
* cr
)
851 gmx_bool bReceive
= TRUE
;
853 const DDRankSetup
& ddRankSetup
= dd
->comm
->ddRankSetup
;
854 if (ddRankSetup
.usePmeOnlyRanks
)
856 const CartesianRankSetup
& cartSetup
= dd
->comm
->cartesianRankSetup
;
857 if (cartSetup
.bCartesianPP_PME
)
860 int pmenode
= dd_simnode2pmenode(ddRankSetup
, cartSetup
, pmeRanks
, cr
, cr
->sim_nodeid
);
862 MPI_Cart_coords(cr
->mpi_comm_mysim
, cr
->sim_nodeid
, DIM
, coords
);
863 coords
[cartSetup
.cartpmedim
]++;
864 if (coords
[cartSetup
.cartpmedim
] < dd
->numCells
[cartSetup
.cartpmedim
])
867 MPI_Cart_rank(cr
->mpi_comm_mysim
, coords
, &rank
);
868 if (dd_simnode2pmenode(ddRankSetup
, cartSetup
, pmeRanks
, cr
, rank
) == pmenode
)
870 /* This is not the last PP node for pmenode */
877 "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
882 int pmenode
= dd_simnode2pmenode(ddRankSetup
, cartSetup
, pmeRanks
, cr
, cr
->sim_nodeid
);
883 if (cr
->sim_nodeid
+ 1 < cr
->nnodes
884 && dd_simnode2pmenode(ddRankSetup
, cartSetup
, pmeRanks
, cr
, cr
->sim_nodeid
+ 1) == pmenode
)
886 /* This is not the last PP node for pmenode */
895 static void set_slb_pme_dim_f(gmx_domdec_t
* dd
, int dim
, real
** dim_f
)
897 gmx_domdec_comm_t
* comm
;
902 snew(*dim_f
, dd
->numCells
[dim
] + 1);
904 for (i
= 1; i
< dd
->numCells
[dim
]; i
++)
906 if (comm
->slb_frac
[dim
])
908 (*dim_f
)[i
] = (*dim_f
)[i
- 1] + comm
->slb_frac
[dim
][i
- 1];
912 (*dim_f
)[i
] = static_cast<real
>(i
) / static_cast<real
>(dd
->numCells
[dim
]);
915 (*dim_f
)[dd
->numCells
[dim
]] = 1;
918 static void init_ddpme(gmx_domdec_t
* dd
, gmx_ddpme_t
* ddpme
, int dimind
)
920 const DDRankSetup
& ddRankSetup
= dd
->comm
->ddRankSetup
;
922 if (dimind
== 0 && dd
->dim
[0] == YY
&& ddRankSetup
.npmenodes_x
== 1)
930 ddpme
->dim_match
= (ddpme
->dim
== dd
->dim
[dimind
]);
932 ddpme
->nslab
= (ddpme
->dim
== 0 ? ddRankSetup
.npmenodes_x
: ddRankSetup
.npmenodes_y
);
934 if (ddpme
->nslab
<= 1)
939 const int nso
= ddRankSetup
.numRanksDoingPme
/ ddpme
->nslab
;
940 /* Determine for each PME slab the PP location range for dimension dim */
941 snew(ddpme
->pp_min
, ddpme
->nslab
);
942 snew(ddpme
->pp_max
, ddpme
->nslab
);
943 for (int slab
= 0; slab
< ddpme
->nslab
; slab
++)
945 ddpme
->pp_min
[slab
] = dd
->numCells
[dd
->dim
[dimind
]] - 1;
946 ddpme
->pp_max
[slab
] = 0;
948 for (int i
= 0; i
< dd
->nnodes
; i
++)
951 ddindex2xyz(dd
->numCells
, i
, xyz
);
952 /* For y only use our y/z slab.
953 * This assumes that the PME x grid size matches the DD grid size.
955 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
])
957 const int pmeindex
= ddindex2pmeindex(ddRankSetup
, i
);
961 slab
= pmeindex
/ nso
;
965 slab
= pmeindex
% ddpme
->nslab
;
967 ddpme
->pp_min
[slab
] = std::min(ddpme
->pp_min
[slab
], xyz
[dimind
]);
968 ddpme
->pp_max
[slab
] = std::max(ddpme
->pp_max
[slab
], xyz
[dimind
]);
972 set_slb_pme_dim_f(dd
, ddpme
->dim
, &ddpme
->slb_dim_f
);
975 int dd_pme_maxshift_x(const gmx_domdec_t
* dd
)
977 const DDRankSetup
& ddRankSetup
= dd
->comm
->ddRankSetup
;
979 if (ddRankSetup
.ddpme
[0].dim
== XX
)
981 return ddRankSetup
.ddpme
[0].maxshift
;
989 int dd_pme_maxshift_y(const gmx_domdec_t
* dd
)
991 const DDRankSetup
& ddRankSetup
= dd
->comm
->ddRankSetup
;
993 if (ddRankSetup
.ddpme
[0].dim
== YY
)
995 return ddRankSetup
.ddpme
[0].maxshift
;
997 else if (ddRankSetup
.npmedecompdim
>= 2 && ddRankSetup
.ddpme
[1].dim
== YY
)
999 return ddRankSetup
.ddpme
[1].maxshift
;
1007 bool ddHaveSplitConstraints(const gmx_domdec_t
& dd
)
1009 return dd
.comm
->systemInfo
.haveSplitConstraints
;
1012 bool ddUsesUpdateGroups(const gmx_domdec_t
& dd
)
1014 return dd
.comm
->systemInfo
.useUpdateGroups
;
1017 void dd_cycles_add(const gmx_domdec_t
* dd
, float cycles
, int ddCycl
)
1019 /* Note that the cycles value can be incorrect, either 0 or some
1020 * extremely large value, when our thread migrated to another core
1021 * with an unsynchronized cycle counter. If this happens less often
1022 * that once per nstlist steps, this will not cause issues, since
1023 * we later subtract the maximum value from the sum over nstlist steps.
1024 * A zero count will slightly lower the total, but that's a small effect.
1025 * Note that the main purpose of the subtraction of the maximum value
1026 * is to avoid throwing off the load balancing when stalls occur due
1027 * e.g. system activity or network congestion.
1029 dd
->comm
->cycl
[ddCycl
] += cycles
;
1030 dd
->comm
->cycl_n
[ddCycl
]++;
1031 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
1033 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
1038 static void make_load_communicator(gmx_domdec_t
* dd
, int dim_ind
, ivec loc
)
1043 gmx_bool bPartOfGroup
= FALSE
;
1045 dim
= dd
->dim
[dim_ind
];
1046 copy_ivec(loc
, loc_c
);
1047 for (i
= 0; i
< dd
->numCells
[dim
]; i
++)
1050 rank
= dd_index(dd
->numCells
, loc_c
);
1051 if (rank
== dd
->rank
)
1053 /* This process is part of the group */
1054 bPartOfGroup
= TRUE
;
1057 MPI_Comm_split(dd
->mpi_comm_all
, bPartOfGroup
? 0 : MPI_UNDEFINED
, dd
->rank
, &c_row
);
1060 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
1061 if (!isDlbDisabled(dd
->comm
))
1063 DDCellsizesWithDlb
& cellsizes
= dd
->comm
->cellsizesWithDlb
[dim_ind
];
1065 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
1067 /* This is the root process of this row */
1068 cellsizes
.rowMaster
= std::make_unique
<RowMaster
>();
1070 RowMaster
& rowMaster
= *cellsizes
.rowMaster
;
1071 rowMaster
.cellFrac
.resize(ddCellFractionBufferSize(dd
, dim_ind
));
1072 rowMaster
.oldCellFrac
.resize(dd
->numCells
[dim
] + 1);
1073 rowMaster
.isCellMin
.resize(dd
->numCells
[dim
]);
1076 rowMaster
.bounds
.resize(dd
->numCells
[dim
]);
1078 rowMaster
.buf_ncd
.resize(dd
->numCells
[dim
]);
1082 /* This is not a root process, we only need to receive cell_f */
1083 cellsizes
.fracRow
.resize(ddCellFractionBufferSize(dd
, dim_ind
));
1086 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
1088 snew(dd
->comm
->load
[dim_ind
].load
, dd
->numCells
[dim
] * DD_NLOAD_MAX
);
1094 void dd_setup_dlb_resource_sharing(const t_commrec
* cr
, int gpu_id
)
1097 int physicalnode_id_hash
;
1099 MPI_Comm mpi_comm_pp_physicalnode
;
1101 if (!thisRankHasDuty(cr
, DUTY_PP
) || gpu_id
< 0)
1103 /* Only ranks with short-ranged tasks (currently) use GPUs.
1104 * If we don't have GPUs assigned, there are no resources to share.
1109 physicalnode_id_hash
= gmx_physicalnode_id_hash();
1115 fprintf(debug
, "dd_setup_dd_dlb_gpu_sharing:\n");
1116 fprintf(debug
, "DD PP rank %d physical node hash %d gpu_id %d\n", dd
->rank
,
1117 physicalnode_id_hash
, gpu_id
);
1119 /* Split the PP communicator over the physical nodes */
1120 /* TODO: See if we should store this (before), as it's also used for
1121 * for the nodecomm summation.
1123 // TODO PhysicalNodeCommunicator could be extended/used to handle
1124 // the need for per-node per-group communicators.
1125 MPI_Comm_split(dd
->mpi_comm_all
, physicalnode_id_hash
, dd
->rank
, &mpi_comm_pp_physicalnode
);
1126 MPI_Comm_split(mpi_comm_pp_physicalnode
, gpu_id
, dd
->rank
, &dd
->comm
->mpi_comm_gpu_shared
);
1127 MPI_Comm_free(&mpi_comm_pp_physicalnode
);
1128 MPI_Comm_size(dd
->comm
->mpi_comm_gpu_shared
, &dd
->comm
->nrank_gpu_shared
);
1132 fprintf(debug
, "nrank_gpu_shared %d\n", dd
->comm
->nrank_gpu_shared
);
1135 /* Note that some ranks could share a GPU, while others don't */
1137 if (dd
->comm
->nrank_gpu_shared
== 1)
1139 MPI_Comm_free(&dd
->comm
->mpi_comm_gpu_shared
);
1142 GMX_UNUSED_VALUE(cr
);
1143 GMX_UNUSED_VALUE(gpu_id
);
1147 static void make_load_communicators(gmx_domdec_t gmx_unused
* dd
)
1150 int dim0
, dim1
, i
, j
;
1155 fprintf(debug
, "Making load communicators\n");
1158 dd
->comm
->load
= new domdec_load_t
[std::max(dd
->ndim
, 1)];
1159 snew(dd
->comm
->mpi_comm_load
, std::max(dd
->ndim
, 1));
1167 make_load_communicator(dd
, 0, loc
);
1171 for (i
= 0; i
< dd
->numCells
[dim0
]; i
++)
1174 make_load_communicator(dd
, 1, loc
);
1180 for (i
= 0; i
< dd
->numCells
[dim0
]; i
++)
1184 for (j
= 0; j
< dd
->numCells
[dim1
]; j
++)
1187 make_load_communicator(dd
, 2, loc
);
1194 fprintf(debug
, "Finished making load communicators\n");
1199 /*! \brief Sets up the relation between neighboring domains and zones */
1200 static void setup_neighbor_relations(gmx_domdec_t
* dd
)
1204 gmx_domdec_zones_t
* zones
;
1205 GMX_ASSERT((dd
->ndim
>= 0) && (dd
->ndim
<= DIM
), "Must have valid number of dimensions for DD");
1207 for (d
= 0; d
< dd
->ndim
; d
++)
1210 copy_ivec(dd
->ci
, tmp
);
1211 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->numCells
[dim
];
1212 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
, tmp
);
1213 copy_ivec(dd
->ci
, tmp
);
1214 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->numCells
[dim
]) % dd
->numCells
[dim
];
1215 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
, tmp
);
1218 fprintf(debug
, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd
->rank
, dim
,
1219 dd
->neighbor
[d
][0], dd
->neighbor
[d
][1]);
1223 int nzone
= (1 << dd
->ndim
);
1224 int nizone
= (1 << std::max(dd
->ndim
- 1, 0));
1225 assert(nizone
>= 1 && nizone
<= DD_MAXIZONE
);
1227 zones
= &dd
->comm
->zones
;
1229 for (int i
= 0; i
< nzone
; i
++)
1232 clear_ivec(zones
->shift
[i
]);
1233 for (d
= 0; d
< dd
->ndim
; d
++)
1235 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
1240 for (int i
= 0; i
< nzone
; i
++)
1242 for (d
= 0; d
< DIM
; d
++)
1244 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
1247 s
[d
] += dd
->numCells
[d
];
1249 else if (s
[d
] >= dd
->numCells
[d
])
1251 s
[d
] -= dd
->numCells
[d
];
1255 for (int iZoneIndex
= 0; iZoneIndex
< nizone
; iZoneIndex
++)
1258 ddNonbondedZonePairRanges
[iZoneIndex
][0] == iZoneIndex
,
1259 "The first element for each ddNonbondedZonePairRanges should match its index");
1261 DDPairInteractionRanges iZone
;
1262 iZone
.iZoneIndex
= iZoneIndex
;
1263 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1264 * j-zones up to nzone.
1266 iZone
.jZoneRange
= gmx::Range
<int>(std::min(ddNonbondedZonePairRanges
[iZoneIndex
][1], nzone
),
1267 std::min(ddNonbondedZonePairRanges
[iZoneIndex
][2], nzone
));
1268 for (dim
= 0; dim
< DIM
; dim
++)
1270 if (dd
->numCells
[dim
] == 1)
1272 /* All shifts should be allowed */
1273 iZone
.shift0
[dim
] = -1;
1274 iZone
.shift1
[dim
] = 1;
1278 /* Determine the min/max j-zone shift wrt the i-zone */
1279 iZone
.shift0
[dim
] = 1;
1280 iZone
.shift1
[dim
] = -1;
1281 for (int jZone
: iZone
.jZoneRange
)
1283 int shift_diff
= zones
->shift
[jZone
][dim
] - zones
->shift
[iZoneIndex
][dim
];
1284 if (shift_diff
< iZone
.shift0
[dim
])
1286 iZone
.shift0
[dim
] = shift_diff
;
1288 if (shift_diff
> iZone
.shift1
[dim
])
1290 iZone
.shift1
[dim
] = shift_diff
;
1296 zones
->iZones
.push_back(iZone
);
1299 if (!isDlbDisabled(dd
->comm
))
1301 dd
->comm
->cellsizesWithDlb
.resize(dd
->ndim
);
1304 if (dd
->comm
->ddSettings
.recordLoad
)
1306 make_load_communicators(dd
);
1310 static void make_pp_communicator(const gmx::MDLogger
& mdlog
,
1312 t_commrec gmx_unused
* cr
,
1313 bool gmx_unused reorder
)
1316 gmx_domdec_comm_t
* comm
= dd
->comm
;
1317 CartesianRankSetup
& cartSetup
= comm
->cartesianRankSetup
;
1319 if (cartSetup
.bCartesianPP
)
1321 /* Set up cartesian communication for the particle-particle part */
1323 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1324 dd
->numCells
[XX
], dd
->numCells
[YY
], dd
->numCells
[ZZ
]);
1327 for (int i
= 0; i
< DIM
; i
++)
1332 MPI_Cart_create(cr
->mpi_comm_mygroup
, DIM
, dd
->numCells
, periods
, static_cast<int>(reorder
),
1334 /* We overwrite the old communicator with the new cartesian one */
1335 cr
->mpi_comm_mygroup
= comm_cart
;
1338 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
1339 MPI_Comm_rank(dd
->mpi_comm_all
, &dd
->rank
);
1341 if (cartSetup
.bCartesianPP_PME
)
1343 /* Since we want to use the original cartesian setup for sim,
1344 * and not the one after split, we need to make an index.
1346 cartSetup
.ddindex2ddnodeid
.resize(dd
->nnodes
);
1347 cartSetup
.ddindex2ddnodeid
[dd_index(dd
->numCells
, dd
->ci
)] = dd
->rank
;
1348 gmx_sumi(dd
->nnodes
, cartSetup
.ddindex2ddnodeid
.data(), cr
);
1349 /* Get the rank of the DD master,
1350 * above we made sure that the master node is a PP node.
1361 MPI_Allreduce(&rank
, &dd
->masterrank
, 1, MPI_INT
, MPI_SUM
, dd
->mpi_comm_all
);
1363 else if (cartSetup
.bCartesianPP
)
1365 if (!comm
->ddRankSetup
.usePmeOnlyRanks
)
1367 /* The PP communicator is also
1368 * the communicator for this simulation
1370 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
1372 cr
->nodeid
= dd
->rank
;
1374 MPI_Cart_coords(dd
->mpi_comm_all
, dd
->rank
, DIM
, dd
->ci
);
1376 /* We need to make an index to go from the coordinates
1377 * to the nodeid of this simulation.
1379 cartSetup
.ddindex2simnodeid
.resize(dd
->nnodes
);
1380 std::vector
<int> buf(dd
->nnodes
);
1381 if (thisRankHasDuty(cr
, DUTY_PP
))
1383 buf
[dd_index(dd
->numCells
, dd
->ci
)] = cr
->sim_nodeid
;
1385 /* Communicate the ddindex to simulation nodeid index */
1386 MPI_Allreduce(buf
.data(), cartSetup
.ddindex2simnodeid
.data(), dd
->nnodes
, MPI_INT
, MPI_SUM
,
1387 cr
->mpi_comm_mysim
);
1389 /* Determine the master coordinates and rank.
1390 * The DD master should be the same node as the master of this sim.
1392 for (int i
= 0; i
< dd
->nnodes
; i
++)
1394 if (cartSetup
.ddindex2simnodeid
[i
] == 0)
1396 ddindex2xyz(dd
->numCells
, i
, dd
->master_ci
);
1397 MPI_Cart_rank(dd
->mpi_comm_all
, dd
->master_ci
, &dd
->masterrank
);
1402 fprintf(debug
, "The master rank is %d\n", dd
->masterrank
);
1407 /* No Cartesian communicators */
1408 /* We use the rank in dd->comm->all as DD index */
1409 ddindex2xyz(dd
->numCells
, dd
->rank
, dd
->ci
);
1410 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1412 clear_ivec(dd
->master_ci
);
1417 .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd
->rank
,
1418 dd
->ci
[XX
], dd
->ci
[YY
], dd
->ci
[ZZ
]);
1421 fprintf(debug
, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd
->rank
,
1422 dd
->ci
[XX
], dd
->ci
[YY
], dd
->ci
[ZZ
]);
1426 static void receive_ddindex2simnodeid(gmx_domdec_t
* dd
, t_commrec
* cr
)
1429 CartesianRankSetup
& cartSetup
= dd
->comm
->cartesianRankSetup
;
1431 if (!cartSetup
.bCartesianPP_PME
&& cartSetup
.bCartesianPP
)
1433 cartSetup
.ddindex2simnodeid
.resize(dd
->nnodes
);
1434 std::vector
<int> buf(dd
->nnodes
);
1435 if (thisRankHasDuty(cr
, DUTY_PP
))
1437 buf
[dd_index(dd
->numCells
, dd
->ci
)] = cr
->sim_nodeid
;
1439 /* Communicate the ddindex to simulation nodeid index */
1440 MPI_Allreduce(buf
.data(), cartSetup
.ddindex2simnodeid
.data(), dd
->nnodes
, MPI_INT
, MPI_SUM
,
1441 cr
->mpi_comm_mysim
);
1444 GMX_UNUSED_VALUE(dd
);
1445 GMX_UNUSED_VALUE(cr
);
1449 static CartesianRankSetup
split_communicator(const gmx::MDLogger
& mdlog
,
1451 const DdRankOrder ddRankOrder
,
1452 bool gmx_unused reorder
,
1453 const DDRankSetup
& ddRankSetup
,
1455 std::vector
<int>* pmeRanks
)
1457 CartesianRankSetup cartSetup
;
1459 cartSetup
.bCartesianPP
= (ddRankOrder
== DdRankOrder::cartesian
);
1460 cartSetup
.bCartesianPP_PME
= false;
1462 const ivec
& numDDCells
= ddRankSetup
.numPPCells
;
1463 /* Initially we set ntot to the number of PP cells */
1464 copy_ivec(numDDCells
, cartSetup
.ntot
);
1466 if (cartSetup
.bCartesianPP
)
1468 const int numDDCellsTot
= ddRankSetup
.numPPRanks
;
1470 for (int i
= 1; i
< DIM
; i
++)
1472 bDiv
[i
] = ((ddRankSetup
.numRanksDoingPme
* numDDCells
[i
]) % numDDCellsTot
== 0);
1474 if (bDiv
[YY
] || bDiv
[ZZ
])
1476 cartSetup
.bCartesianPP_PME
= TRUE
;
1477 /* If we have 2D PME decomposition, which is always in x+y,
1478 * we stack the PME only nodes in z.
1479 * Otherwise we choose the direction that provides the thinnest slab
1480 * of PME only nodes as this will have the least effect
1481 * on the PP communication.
1482 * But for the PME communication the opposite might be better.
1484 if (bDiv
[ZZ
] && (ddRankSetup
.npmenodes_y
> 1 || !bDiv
[YY
] || numDDCells
[YY
] > numDDCells
[ZZ
]))
1486 cartSetup
.cartpmedim
= ZZ
;
1490 cartSetup
.cartpmedim
= YY
;
1492 cartSetup
.ntot
[cartSetup
.cartpmedim
] +=
1493 (ddRankSetup
.numRanksDoingPme
* numDDCells
[cartSetup
.cartpmedim
]) / numDDCellsTot
;
1498 .appendTextFormatted(
1499 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1501 ddRankSetup
.numRanksDoingPme
, numDDCells
[XX
], numDDCells
[YY
],
1502 numDDCells
[XX
], numDDCells
[ZZ
]);
1504 .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1508 if (cartSetup
.bCartesianPP_PME
)
1515 .appendTextFormatted(
1516 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1517 cartSetup
.ntot
[XX
], cartSetup
.ntot
[YY
], cartSetup
.ntot
[ZZ
]);
1519 for (int i
= 0; i
< DIM
; i
++)
1524 MPI_Cart_create(cr
->mpi_comm_mysim
, DIM
, cartSetup
.ntot
, periods
, static_cast<int>(reorder
),
1526 MPI_Comm_rank(comm_cart
, &rank
);
1527 if (MASTER(cr
) && rank
!= 0)
1529 gmx_fatal(FARGS
, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1532 /* With this assigment we loose the link to the original communicator
1533 * which will usually be MPI_COMM_WORLD, unless have multisim.
1535 cr
->mpi_comm_mysim
= comm_cart
;
1536 cr
->sim_nodeid
= rank
;
1538 MPI_Cart_coords(cr
->mpi_comm_mysim
, cr
->sim_nodeid
, DIM
, ddCellIndex
);
1541 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr
->sim_nodeid
,
1542 ddCellIndex
[XX
], ddCellIndex
[YY
], ddCellIndex
[ZZ
]);
1544 if (ddCellIndex
[cartSetup
.cartpmedim
] < numDDCells
[cartSetup
.cartpmedim
])
1548 if (!ddRankSetup
.usePmeOnlyRanks
1549 || ddCellIndex
[cartSetup
.cartpmedim
] >= numDDCells
[cartSetup
.cartpmedim
])
1551 cr
->duty
= DUTY_PME
;
1554 /* Split the sim communicator into PP and PME only nodes */
1555 MPI_Comm_split(cr
->mpi_comm_mysim
, getThisRankDuties(cr
),
1556 dd_index(cartSetup
.ntot
, ddCellIndex
), &cr
->mpi_comm_mygroup
);
1558 GMX_UNUSED_VALUE(ddCellIndex
);
1563 switch (ddRankOrder
)
1565 case DdRankOrder::pp_pme
:
1566 GMX_LOG(mdlog
.info
).appendText("Order of the ranks: PP first, PME last");
1568 case DdRankOrder::interleave
:
1569 /* Interleave the PP-only and PME-only ranks */
1570 GMX_LOG(mdlog
.info
).appendText("Interleaving PP and PME ranks");
1571 *pmeRanks
= dd_interleaved_pme_ranks(ddRankSetup
);
1573 case DdRankOrder::cartesian
: break;
1574 default: gmx_fatal(FARGS
, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder
));
1577 if (dd_simnode2pmenode(ddRankSetup
, cartSetup
, *pmeRanks
, cr
, cr
->sim_nodeid
) == -1)
1579 cr
->duty
= DUTY_PME
;
1586 /* Split the sim communicator into PP and PME only nodes */
1587 MPI_Comm_split(cr
->mpi_comm_mysim
, getThisRankDuties(cr
), cr
->nodeid
, &cr
->mpi_comm_mygroup
);
1588 MPI_Comm_rank(cr
->mpi_comm_mygroup
, &cr
->nodeid
);
1593 .appendTextFormatted("This rank does only %s work.\n",
1594 thisRankHasDuty(cr
, DUTY_PP
) ? "particle-particle" : "PME-mesh");
1599 /*! \brief Makes the PP communicator and the PME communicator, when needed
1601 * Returns the Cartesian rank setup.
1602 * Sets \p cr->mpi_comm_mygroup
1603 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1604 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1606 static CartesianRankSetup
makeGroupCommunicators(const gmx::MDLogger
& mdlog
,
1607 const DDSettings
& ddSettings
,
1608 const DdRankOrder ddRankOrder
,
1609 const DDRankSetup
& ddRankSetup
,
1612 std::vector
<int>* pmeRanks
)
1614 CartesianRankSetup cartSetup
;
1616 // As a default, both group and sim communicators are equal to the default communicator
1617 cr
->mpi_comm_mygroup
= cr
->mpiDefaultCommunicator
;
1618 cr
->mpi_comm_mysim
= cr
->mpiDefaultCommunicator
;
1619 cr
->nnodes
= cr
->sizeOfDefaultCommunicator
;
1620 cr
->nodeid
= cr
->rankInDefaultCommunicator
;
1621 cr
->sim_nodeid
= cr
->rankInDefaultCommunicator
;
1623 if (ddRankSetup
.usePmeOnlyRanks
)
1625 /* Split the communicator into a PP and PME part */
1626 cartSetup
= split_communicator(mdlog
, cr
, ddRankOrder
, ddSettings
.useCartesianReorder
,
1627 ddRankSetup
, ddCellIndex
, pmeRanks
);
1631 /* All nodes do PP and PME */
1632 /* We do not require separate communicators */
1633 cartSetup
.bCartesianPP
= false;
1634 cartSetup
.bCartesianPP_PME
= false;
1640 /*! \brief For PP ranks, sets or makes the communicator
1642 * For PME ranks get the rank id.
1643 * For PP only ranks, sets the PME-only rank.
1645 static void setupGroupCommunication(const gmx::MDLogger
& mdlog
,
1646 const DDSettings
& ddSettings
,
1647 gmx::ArrayRef
<const int> pmeRanks
,
1649 const int numAtomsInSystem
,
1652 const DDRankSetup
& ddRankSetup
= dd
->comm
->ddRankSetup
;
1653 const CartesianRankSetup
& cartSetup
= dd
->comm
->cartesianRankSetup
;
1655 if (thisRankHasDuty(cr
, DUTY_PP
))
1657 /* Copy or make a new PP communicator */
1659 /* We (possibly) reordered the nodes in split_communicator,
1660 * so it is no longer required in make_pp_communicator.
1662 const bool useCartesianReorder
= (ddSettings
.useCartesianReorder
&& !cartSetup
.bCartesianPP_PME
);
1664 make_pp_communicator(mdlog
, dd
, cr
, useCartesianReorder
);
1668 receive_ddindex2simnodeid(dd
, cr
);
1671 if (!thisRankHasDuty(cr
, DUTY_PME
))
1673 /* Set up the commnuication to our PME node */
1674 dd
->pme_nodeid
= dd_simnode2pmenode(ddRankSetup
, cartSetup
, pmeRanks
, cr
, cr
->sim_nodeid
);
1675 dd
->pme_receive_vir_ener
= receive_vir_ener(dd
, pmeRanks
, cr
);
1678 fprintf(debug
, "My pme_nodeid %d receive ener %s\n", dd
->pme_nodeid
,
1679 gmx::boolToString(dd
->pme_receive_vir_ener
));
1684 dd
->pme_nodeid
= -1;
1687 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1690 dd
->ma
= std::make_unique
<AtomDistribution
>(dd
->numCells
, numAtomsInSystem
, numAtomsInSystem
);
1694 static real
* get_slb_frac(const gmx::MDLogger
& mdlog
, const char* dir
, int nc
, const char* size_string
)
1696 real
* slb_frac
, tot
;
1701 if (nc
> 1 && size_string
!= nullptr)
1703 GMX_LOG(mdlog
.info
).appendTextFormatted("Using static load balancing for the %s direction", dir
);
1706 for (i
= 0; i
< nc
; i
++)
1709 sscanf(size_string
, "%20lf%n", &dbl
, &n
);
1713 "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1721 std::string relativeCellSizes
= "Relative cell sizes:";
1722 for (i
= 0; i
< nc
; i
++)
1725 relativeCellSizes
+= gmx::formatString(" %5.3f", slb_frac
[i
]);
1727 GMX_LOG(mdlog
.info
).appendText(relativeCellSizes
);
1733 static int multi_body_bondeds_count(const gmx_mtop_t
* mtop
)
1736 gmx_mtop_ilistloop_t iloop
= gmx_mtop_ilistloop_init(mtop
);
1738 while (const InteractionLists
* ilists
= gmx_mtop_ilistloop_next(iloop
, &nmol
))
1740 for (auto& ilist
: extractILists(*ilists
, IF_BOND
))
1742 if (NRAL(ilist
.functionType
) > 2)
1744 n
+= nmol
* (ilist
.iatoms
.size() / ilistStride(ilist
));
1752 static int dd_getenv(const gmx::MDLogger
& mdlog
, const char* env_var
, int def
)
1758 val
= getenv(env_var
);
1761 if (sscanf(val
, "%20d", &nst
) <= 0)
1765 GMX_LOG(mdlog
.info
).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var
, val
, nst
);
1771 static void check_dd_restrictions(const gmx_domdec_t
* dd
, const t_inputrec
* ir
, const gmx::MDLogger
& mdlog
)
1773 if (ir
->pbcType
== PbcType::Screw
1774 && (dd
->numCells
[XX
] == 1 || dd
->numCells
[YY
] > 1 || dd
->numCells
[ZZ
] > 1))
1776 gmx_fatal(FARGS
, "With pbc=%s can only do domain decomposition in the x-direction",
1777 c_pbcTypeNames
[ir
->pbcType
].c_str());
1780 if (ir
->nstlist
== 0)
1782 gmx_fatal(FARGS
, "Domain decomposition does not work with nstlist=0");
1785 if (ir
->comm_mode
== ecmANGULAR
&& ir
->pbcType
!= PbcType::No
)
1787 GMX_LOG(mdlog
.warning
)
1789 "comm-mode angular will give incorrect results when the comm group "
1790 "partially crosses a periodic boundary");
1794 static real
average_cellsize_min(const gmx_ddbox_t
& ddbox
, const ivec numDomains
)
1796 real r
= ddbox
.box_size
[XX
];
1797 for (int d
= 0; d
< DIM
; d
++)
1799 if (numDomains
[d
] > 1)
1801 /* Check using the initial average cell size */
1802 r
= std::min(r
, ddbox
.box_size
[d
] * ddbox
.skew_fac
[d
] / numDomains
[d
]);
1809 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1811 static DlbState
forceDlbOffOrBail(DlbState cmdlineDlbState
,
1812 const std::string
& reasonStr
,
1813 const gmx::MDLogger
& mdlog
)
1815 std::string dlbNotSupportedErr
= "Dynamic load balancing requested, but ";
1816 std::string dlbDisableNote
= "NOTE: disabling dynamic load balancing as ";
1818 if (cmdlineDlbState
== DlbState::onUser
)
1820 gmx_fatal(FARGS
, "%s", (dlbNotSupportedErr
+ reasonStr
).c_str());
1822 else if (cmdlineDlbState
== DlbState::offCanTurnOn
)
1824 GMX_LOG(mdlog
.info
).appendText(dlbDisableNote
+ reasonStr
);
1826 return DlbState::offForever
;
1829 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1831 * This function parses the parameters of "-dlb" command line option setting
1832 * corresponding state values. Then it checks the consistency of the determined
1833 * state with other run parameters and settings. As a result, the initial state
1834 * may be altered or an error may be thrown if incompatibility of options is detected.
1836 * \param [in] mdlog Logger.
1837 * \param [in] dlbOption Enum value for the DLB option.
1838 * \param [in] bRecordLoad True if the load balancer is recording load information.
1839 * \param [in] mdrunOptions Options for mdrun.
1840 * \param [in] ir Pointer mdrun to input parameters.
1841 * \returns DLB initial/startup state.
1843 static DlbState
determineInitialDlbState(const gmx::MDLogger
& mdlog
,
1844 DlbOption dlbOption
,
1845 gmx_bool bRecordLoad
,
1846 const gmx::MdrunOptions
& mdrunOptions
,
1847 const t_inputrec
* ir
)
1849 DlbState dlbState
= DlbState::offCanTurnOn
;
1853 case DlbOption::turnOnWhenUseful
: dlbState
= DlbState::offCanTurnOn
; break;
1854 case DlbOption::no
: dlbState
= DlbState::offUser
; break;
1855 case DlbOption::yes
: dlbState
= DlbState::onUser
; break;
1856 default: gmx_incons("Invalid dlbOption enum value");
1859 /* Reruns don't support DLB: bail or override auto mode */
1860 if (mdrunOptions
.rerun
)
1862 std::string reasonStr
= "it is not supported in reruns.";
1863 return forceDlbOffOrBail(dlbState
, reasonStr
, mdlog
);
1866 /* Unsupported integrators */
1867 if (!EI_DYNAMICS(ir
->eI
))
1869 auto reasonStr
= gmx::formatString(
1870 "it is only supported with dynamics, not with integrator '%s'.", EI(ir
->eI
));
1871 return forceDlbOffOrBail(dlbState
, reasonStr
, mdlog
);
1874 /* Without cycle counters we can't time work to balance on */
1877 std::string reasonStr
=
1878 "cycle counters unsupported or not enabled in the operating system kernel.";
1879 return forceDlbOffOrBail(dlbState
, reasonStr
, mdlog
);
1882 if (mdrunOptions
.reproducible
)
1884 std::string reasonStr
= "you started a reproducible run.";
1887 case DlbState::offUser
: break;
1888 case DlbState::offForever
:
1889 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1891 case DlbState::offCanTurnOn
: return forceDlbOffOrBail(dlbState
, reasonStr
, mdlog
);
1892 case DlbState::onCanTurnOff
:
1893 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1895 case DlbState::onUser
:
1896 return forceDlbOffOrBail(
1899 + " In load balanced runs binary reproducibility cannot be "
1903 gmx_fatal(FARGS
, "Death horror: undefined case (%d) for load balancing choice",
1904 static_cast<int>(dlbState
));
1911 static gmx_domdec_comm_t
* init_dd_comm()
1913 gmx_domdec_comm_t
* comm
= new gmx_domdec_comm_t
;
1915 comm
->n_load_have
= 0;
1916 comm
->n_load_collect
= 0;
1918 comm
->haveTurnedOffDlb
= false;
1920 for (int i
= 0; i
< static_cast<int>(DDAtomRanges::Type::Number
); i
++)
1922 comm
->sum_nat
[i
] = 0;
1926 comm
->load_step
= 0;
1929 clear_ivec(comm
->load_lim
);
1933 /* This should be replaced by a unique pointer */
1934 comm
->balanceRegion
= ddBalanceRegionAllocate();
1939 /* Returns whether mtop contains constraints and/or vsites */
1940 static bool systemHasConstraintsOrVsites(const gmx_mtop_t
& mtop
)
1942 auto ilistLoop
= gmx_mtop_ilistloop_init(mtop
);
1944 while (const InteractionLists
* ilists
= gmx_mtop_ilistloop_next(ilistLoop
, &nmol
))
1946 if (!extractILists(*ilists
, IF_CONSTRAINT
| IF_VSITE
).empty())
1955 static void setupUpdateGroups(const gmx::MDLogger
& mdlog
,
1956 const gmx_mtop_t
& mtop
,
1957 const t_inputrec
& inputrec
,
1958 const real cutoffMargin
,
1959 DDSystemInfo
* systemInfo
)
1961 /* When we have constraints and/or vsites, it is beneficial to use
1962 * update groups (when possible) to allow independent update of groups.
1964 if (!systemHasConstraintsOrVsites(mtop
))
1966 /* No constraints or vsites, atoms can be updated independently */
1970 systemInfo
->updateGroupingPerMoleculetype
= gmx::makeUpdateGroups(mtop
);
1971 systemInfo
->useUpdateGroups
= (!systemInfo
->updateGroupingPerMoleculetype
.empty()
1972 && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1974 if (systemInfo
->useUpdateGroups
)
1976 int numUpdateGroups
= 0;
1977 for (const auto& molblock
: mtop
.molblock
)
1979 numUpdateGroups
+= molblock
.nmol
1980 * systemInfo
->updateGroupingPerMoleculetype
[molblock
.type
].numBlocks();
1983 systemInfo
->maxUpdateGroupRadius
= computeMaxUpdateGroupRadius(
1984 mtop
, systemInfo
->updateGroupingPerMoleculetype
, maxReferenceTemperature(inputrec
));
1986 /* To use update groups, the large domain-to-domain cutoff distance
1987 * should be compatible with the box size.
1989 systemInfo
->useUpdateGroups
= (atomToAtomIntoDomainToDomainCutoff(*systemInfo
, 0) < cutoffMargin
);
1991 if (systemInfo
->useUpdateGroups
)
1994 .appendTextFormatted(
1995 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1997 numUpdateGroups
, mtop
.natoms
/ static_cast<double>(numUpdateGroups
),
1998 systemInfo
->maxUpdateGroupRadius
);
2003 .appendTextFormatted(
2004 "The combination of rlist and box size prohibits the use of update "
2006 systemInfo
->updateGroupingPerMoleculetype
.clear();
2011 UnitCellInfo::UnitCellInfo(const t_inputrec
& ir
) :
2012 npbcdim(numPbcDimensions(ir
.pbcType
)),
2013 numBoundedDimensions(inputrec2nboundeddim(&ir
)),
2014 ddBoxIsDynamic(numBoundedDimensions
< DIM
|| inputrecDynamicBox(&ir
)),
2015 haveScrewPBC(ir
.pbcType
== PbcType::Screw
)
2019 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2020 static bool moleculesAreAlwaysWhole(const gmx_mtop_t
& mtop
,
2021 const bool useUpdateGroups
,
2022 gmx::ArrayRef
<const gmx::RangePartitioning
> updateGroupingPerMoleculetype
)
2024 if (useUpdateGroups
)
2026 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype
.size() == mtop
.moltype
.size(),
2027 "Need one grouping per moltype");
2028 for (size_t mol
= 0; mol
< mtop
.moltype
.size(); mol
++)
2030 if (updateGroupingPerMoleculetype
[mol
].numBlocks() > 1)
2038 for (const auto& moltype
: mtop
.moltype
)
2040 if (moltype
.atoms
.nr
> 1)
2050 /*! \brief Generate the simulation system information */
2051 static DDSystemInfo
getSystemInfo(const gmx::MDLogger
& mdlog
,
2053 MPI_Comm communicator
,
2054 const DomdecOptions
& options
,
2055 const gmx_mtop_t
& mtop
,
2056 const t_inputrec
& ir
,
2058 gmx::ArrayRef
<const gmx::RVec
> xGlobal
)
2060 const real tenPercentMargin
= 1.1;
2062 DDSystemInfo systemInfo
;
2064 /* We need to decide on update groups early, as this affects communication distances */
2065 systemInfo
.useUpdateGroups
= false;
2066 if (ir
.cutoff_scheme
== ecutsVERLET
)
2068 real cutoffMargin
= std::sqrt(max_cutoff2(ir
.pbcType
, box
)) - ir
.rlist
;
2069 setupUpdateGroups(mdlog
, mtop
, ir
, cutoffMargin
, &systemInfo
);
2072 systemInfo
.moleculesAreAlwaysWhole
= moleculesAreAlwaysWhole(
2073 mtop
, systemInfo
.useUpdateGroups
, systemInfo
.updateGroupingPerMoleculetype
);
2074 systemInfo
.haveInterDomainBondeds
=
2075 (!systemInfo
.moleculesAreAlwaysWhole
|| mtop
.bIntermolecularInteractions
);
2076 systemInfo
.haveInterDomainMultiBodyBondeds
=
2077 (systemInfo
.haveInterDomainBondeds
&& multi_body_bondeds_count(&mtop
) > 0);
2079 if (systemInfo
.useUpdateGroups
)
2081 systemInfo
.haveSplitConstraints
= false;
2082 systemInfo
.haveSplitSettles
= false;
2086 systemInfo
.haveSplitConstraints
= (gmx_mtop_ftype_count(mtop
, F_CONSTR
) > 0
2087 || gmx_mtop_ftype_count(mtop
, F_CONSTRNC
) > 0);
2088 systemInfo
.haveSplitSettles
= (gmx_mtop_ftype_count(mtop
, F_SETTLE
) > 0);
2093 /* Set the cut-off to some very large value,
2094 * so we don't need if statements everywhere in the code.
2095 * We use sqrt, since the cut-off is squared in some places.
2097 systemInfo
.cutoff
= GMX_CUTOFF_INF
;
2101 systemInfo
.cutoff
= atomToAtomIntoDomainToDomainCutoff(systemInfo
, ir
.rlist
);
2103 systemInfo
.minCutoffForMultiBody
= 0;
2105 /* Determine the minimum cell size limit, affected by many factors */
2106 systemInfo
.cellsizeLimit
= 0;
2107 systemInfo
.filterBondedCommunication
= false;
2109 /* We do not allow home atoms to move beyond the neighboring domain
2110 * between domain decomposition steps, which limits the cell size.
2111 * Get an estimate of cell size limit due to atom displacement.
2112 * In most cases this is a large overestimate, because it assumes
2113 * non-interaction atoms.
2114 * We set the chance to 1 in a trillion steps.
2116 constexpr real c_chanceThatAtomMovesBeyondDomain
= 1e-12;
2117 const real limitForAtomDisplacement
= minCellSizeForAtomDisplacement(
2118 mtop
, ir
, systemInfo
.updateGroupingPerMoleculetype
, c_chanceThatAtomMovesBeyondDomain
);
2119 GMX_LOG(mdlog
.info
).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement
);
2121 systemInfo
.cellsizeLimit
= std::max(systemInfo
.cellsizeLimit
, limitForAtomDisplacement
);
2123 /* TODO: PME decomposition currently requires atoms not to be more than
2124 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2125 * In nearly all cases, limitForAtomDisplacement will be smaller
2126 * than 2/3*rlist, so the PME requirement is satisfied.
2127 * But it would be better for both correctness and performance
2128 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2129 * Note that we would need to improve the pairlist buffer case.
2132 if (systemInfo
.haveInterDomainBondeds
)
2134 if (options
.minimumCommunicationRange
> 0)
2136 systemInfo
.minCutoffForMultiBody
=
2137 atomToAtomIntoDomainToDomainCutoff(systemInfo
, options
.minimumCommunicationRange
);
2138 if (options
.useBondedCommunication
)
2140 systemInfo
.filterBondedCommunication
=
2141 (systemInfo
.minCutoffForMultiBody
> systemInfo
.cutoff
);
2145 systemInfo
.cutoff
= std::max(systemInfo
.cutoff
, systemInfo
.minCutoffForMultiBody
);
2148 else if (ir
.bPeriodicMols
)
2150 /* Can not easily determine the required cut-off */
2151 GMX_LOG(mdlog
.warning
)
2153 "NOTE: Periodic molecules are present in this system. Because of this, "
2154 "the domain decomposition algorithm cannot easily determine the "
2155 "minimum cell size that it requires for treating bonded interactions. "
2156 "Instead, domain decomposition will assume that half the non-bonded "
2157 "cut-off will be a suitable lower bound.");
2158 systemInfo
.minCutoffForMultiBody
= systemInfo
.cutoff
/ 2;
2164 if (ddRole
== DDRole::Master
)
2166 dd_bonded_cg_distance(mdlog
, &mtop
, &ir
, xGlobal
, box
,
2167 options
.checkBondedInteractions
, &r_2b
, &r_mb
);
2169 gmx_bcast(sizeof(r_2b
), &r_2b
, communicator
);
2170 gmx_bcast(sizeof(r_mb
), &r_mb
, communicator
);
2172 /* We use an initial margin of 10% for the minimum cell size,
2173 * except when we are just below the non-bonded cut-off.
2175 if (options
.useBondedCommunication
)
2177 if (std::max(r_2b
, r_mb
) > systemInfo
.cutoff
)
2179 const real r_bonded
= std::max(r_2b
, r_mb
);
2180 systemInfo
.minCutoffForMultiBody
= tenPercentMargin
* r_bonded
;
2181 /* This is the (only) place where we turn on the filtering */
2182 systemInfo
.filterBondedCommunication
= true;
2186 const real r_bonded
= r_mb
;
2187 systemInfo
.minCutoffForMultiBody
=
2188 std::min(tenPercentMargin
* r_bonded
, systemInfo
.cutoff
);
2190 /* We determine cutoff_mbody later */
2191 systemInfo
.increaseMultiBodyCutoff
= true;
2195 /* No special bonded communication,
2196 * simply increase the DD cut-off.
2198 systemInfo
.minCutoffForMultiBody
= tenPercentMargin
* std::max(r_2b
, r_mb
);
2199 systemInfo
.cutoff
= std::max(systemInfo
.cutoff
, systemInfo
.minCutoffForMultiBody
);
2203 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2204 systemInfo
.minCutoffForMultiBody
);
2206 systemInfo
.cellsizeLimit
= std::max(systemInfo
.cellsizeLimit
, systemInfo
.minCutoffForMultiBody
);
2209 systemInfo
.constraintCommunicationRange
= 0;
2210 if (systemInfo
.haveSplitConstraints
&& options
.constraintCommunicationRange
<= 0)
2212 /* There is a cell size limit due to the constraints (P-LINCS) */
2213 systemInfo
.constraintCommunicationRange
= gmx::constr_r_max(mdlog
, &mtop
, &ir
);
2215 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2216 systemInfo
.constraintCommunicationRange
);
2217 if (systemInfo
.constraintCommunicationRange
> systemInfo
.cellsizeLimit
)
2221 "This distance will limit the DD cell size, you can override this with "
2225 else if (options
.constraintCommunicationRange
> 0)
2227 /* Here we do not check for dd->splitConstraints.
2228 * because one can also set a cell size limit for virtual sites only
2229 * and at this point we don't know yet if there are intercg v-sites.
2232 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2233 options
.constraintCommunicationRange
);
2234 systemInfo
.constraintCommunicationRange
= options
.constraintCommunicationRange
;
2236 systemInfo
.cellsizeLimit
= std::max(systemInfo
.cellsizeLimit
, systemInfo
.constraintCommunicationRange
);
2241 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2243 static void checkDDGridSetup(const DDGridSetup
& ddGridSetup
,
2245 MPI_Comm communicator
,
2247 const DomdecOptions
& options
,
2248 const DDSettings
& ddSettings
,
2249 const DDSystemInfo
& systemInfo
,
2250 const real cellsizeLimit
,
2251 const gmx_ddbox_t
& ddbox
)
2253 if (options
.numCells
[XX
] <= 0 && (ddGridSetup
.numDomains
[XX
] == 0))
2256 gmx_bool bC
= (systemInfo
.haveSplitConstraints
2257 && systemInfo
.constraintCommunicationRange
> systemInfo
.minCutoffForMultiBody
);
2258 sprintf(buf
, "Change the number of ranks or mdrun option %s%s%s", !bC
? "-rdd" : "-rcon",
2259 ddSettings
.initialDlbState
!= DlbState::offUser
? " or -dds" : "",
2260 bC
? " or your LINCS settings" : "");
2262 gmx_fatal_collective(FARGS
, communicator
, ddRole
== DDRole::Master
,
2263 "There is no domain decomposition for %d ranks that is compatible "
2264 "with the given box and a minimum cell size of %g nm\n"
2266 "Look in the log file for details on the domain decomposition",
2267 numNodes
- ddGridSetup
.numPmeOnlyRanks
, cellsizeLimit
, buf
);
2270 const real acs
= average_cellsize_min(ddbox
, ddGridSetup
.numDomains
);
2271 if (acs
< cellsizeLimit
)
2273 if (options
.numCells
[XX
] <= 0)
2277 "dd_choose_grid() should return a grid that satisfies the cell size limits");
2281 gmx_fatal_collective(
2282 FARGS
, communicator
, ddRole
== DDRole::Master
,
2283 "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2284 "options -dd, -rdd or -rcon, see the log file for details",
2285 acs
, cellsizeLimit
);
2289 const int numPPRanks
=
2290 ddGridSetup
.numDomains
[XX
] * ddGridSetup
.numDomains
[YY
] * ddGridSetup
.numDomains
[ZZ
];
2291 if (numNodes
- numPPRanks
!= ddGridSetup
.numPmeOnlyRanks
)
2293 gmx_fatal_collective(FARGS
, communicator
, ddRole
== DDRole::Master
,
2294 "The size of the domain decomposition grid (%d) does not match the "
2295 "number of PP ranks (%d). The total number of ranks is %d",
2296 numPPRanks
, numNodes
- ddGridSetup
.numPmeOnlyRanks
, numNodes
);
2298 if (ddGridSetup
.numPmeOnlyRanks
> numPPRanks
)
2300 gmx_fatal_collective(FARGS
, communicator
, ddRole
== DDRole::Master
,
2301 "The number of separate PME ranks (%d) is larger than the number of "
2302 "PP ranks (%d), this is not supported.",
2303 ddGridSetup
.numPmeOnlyRanks
, numPPRanks
);
2307 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2308 static DDRankSetup
getDDRankSetup(const gmx::MDLogger
& mdlog
,
2310 const DDGridSetup
& ddGridSetup
,
2311 const t_inputrec
& ir
)
2314 .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2315 ddGridSetup
.numDomains
[XX
], ddGridSetup
.numDomains
[YY
],
2316 ddGridSetup
.numDomains
[ZZ
], ddGridSetup
.numPmeOnlyRanks
);
2318 DDRankSetup ddRankSetup
;
2320 ddRankSetup
.numPPRanks
= numNodes
- ddGridSetup
.numPmeOnlyRanks
;
2321 copy_ivec(ddGridSetup
.numDomains
, ddRankSetup
.numPPCells
);
2323 ddRankSetup
.usePmeOnlyRanks
= (ddGridSetup
.numPmeOnlyRanks
> 0);
2324 if (ddRankSetup
.usePmeOnlyRanks
)
2326 ddRankSetup
.numRanksDoingPme
= ddGridSetup
.numPmeOnlyRanks
;
2330 ddRankSetup
.numRanksDoingPme
=
2331 ddGridSetup
.numDomains
[XX
] * ddGridSetup
.numDomains
[YY
] * ddGridSetup
.numDomains
[ZZ
];
2334 if (EEL_PME(ir
.coulombtype
) || EVDW_PME(ir
.vdwtype
))
2336 /* The following choices should match those
2337 * in comm_cost_est in domdec_setup.c.
2338 * Note that here the checks have to take into account
2339 * that the decomposition might occur in a different order than xyz
2340 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2341 * in which case they will not match those in comm_cost_est,
2342 * but since that is mainly for testing purposes that's fine.
2344 if (ddGridSetup
.numDDDimensions
>= 2 && ddGridSetup
.ddDimensions
[0] == XX
2345 && ddGridSetup
.ddDimensions
[1] == YY
2346 && ddRankSetup
.numRanksDoingPme
> ddGridSetup
.numDomains
[XX
]
2347 && ddRankSetup
.numRanksDoingPme
% ddGridSetup
.numDomains
[XX
] == 0
2348 && getenv("GMX_PMEONEDD") == nullptr)
2350 ddRankSetup
.npmedecompdim
= 2;
2351 ddRankSetup
.npmenodes_x
= ddGridSetup
.numDomains
[XX
];
2352 ddRankSetup
.npmenodes_y
= ddRankSetup
.numRanksDoingPme
/ ddRankSetup
.npmenodes_x
;
2356 /* In case nc is 1 in both x and y we could still choose to
2357 * decompose pme in y instead of x, but we use x for simplicity.
2359 ddRankSetup
.npmedecompdim
= 1;
2360 if (ddGridSetup
.ddDimensions
[0] == YY
)
2362 ddRankSetup
.npmenodes_x
= 1;
2363 ddRankSetup
.npmenodes_y
= ddRankSetup
.numRanksDoingPme
;
2367 ddRankSetup
.npmenodes_x
= ddRankSetup
.numRanksDoingPme
;
2368 ddRankSetup
.npmenodes_y
= 1;
2372 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2373 ddRankSetup
.npmenodes_x
, ddRankSetup
.npmenodes_y
, 1);
2377 ddRankSetup
.npmedecompdim
= 0;
2378 ddRankSetup
.npmenodes_x
= 0;
2379 ddRankSetup
.npmenodes_y
= 0;
2385 /*! \brief Set the cell size and interaction limits */
2386 static void set_dd_limits(const gmx::MDLogger
& mdlog
,
2389 const DomdecOptions
& options
,
2390 const DDSettings
& ddSettings
,
2391 const DDSystemInfo
& systemInfo
,
2392 const DDGridSetup
& ddGridSetup
,
2393 const int numPPRanks
,
2394 const gmx_mtop_t
* mtop
,
2395 const t_inputrec
* ir
,
2396 const gmx_ddbox_t
& ddbox
)
2398 gmx_domdec_comm_t
* comm
= dd
->comm
;
2399 comm
->ddSettings
= ddSettings
;
2401 /* Initialize to GPU share count to 0, might change later */
2402 comm
->nrank_gpu_shared
= 0;
2404 comm
->dlbState
= comm
->ddSettings
.initialDlbState
;
2405 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd
, TRUE
);
2406 /* To consider turning DLB on after 2*nstlist steps we need to check
2407 * at partitioning count 3. Thus we need to increase the first count by 2.
2409 comm
->ddPartioningCountFirstDlbOff
+= 2;
2411 comm
->bPMELoadBalDLBLimits
= FALSE
;
2413 /* Allocate the charge group/atom sorting struct */
2414 comm
->sort
= std::make_unique
<gmx_domdec_sort_t
>();
2416 comm
->systemInfo
= systemInfo
;
2418 if (systemInfo
.useUpdateGroups
)
2420 /* Note: We would like to use dd->nnodes for the atom count estimate,
2421 * but that is not yet available here. But this anyhow only
2422 * affect performance up to the second dd_partition_system call.
2424 const int homeAtomCountEstimate
= mtop
->natoms
/ numPPRanks
;
2425 comm
->updateGroupsCog
= std::make_unique
<gmx::UpdateGroupsCog
>(
2426 *mtop
, systemInfo
.updateGroupingPerMoleculetype
, maxReferenceTemperature(*ir
),
2427 homeAtomCountEstimate
);
2430 /* Set the DD setup given by ddGridSetup */
2431 copy_ivec(ddGridSetup
.numDomains
, dd
->numCells
);
2432 dd
->ndim
= ddGridSetup
.numDDDimensions
;
2433 copy_ivec(ddGridSetup
.ddDimensions
, dd
->dim
);
2435 dd
->nnodes
= dd
->numCells
[XX
] * dd
->numCells
[YY
] * dd
->numCells
[ZZ
];
2437 snew(comm
->slb_frac
, DIM
);
2438 if (isDlbDisabled(comm
))
2440 comm
->slb_frac
[XX
] = get_slb_frac(mdlog
, "x", dd
->numCells
[XX
], options
.cellSizeX
);
2441 comm
->slb_frac
[YY
] = get_slb_frac(mdlog
, "y", dd
->numCells
[YY
], options
.cellSizeY
);
2442 comm
->slb_frac
[ZZ
] = get_slb_frac(mdlog
, "z", dd
->numCells
[ZZ
], options
.cellSizeZ
);
2445 /* Set the multi-body cut-off and cellsize limit for DLB */
2446 comm
->cutoff_mbody
= systemInfo
.minCutoffForMultiBody
;
2447 comm
->cellsize_limit
= systemInfo
.cellsizeLimit
;
2448 if (systemInfo
.haveInterDomainBondeds
&& systemInfo
.increaseMultiBodyCutoff
)
2450 if (systemInfo
.filterBondedCommunication
|| !isDlbDisabled(comm
))
2452 /* Set the bonded communication distance to halfway
2453 * the minimum and the maximum,
2454 * since the extra communication cost is nearly zero.
2456 real acs
= average_cellsize_min(ddbox
, dd
->numCells
);
2457 comm
->cutoff_mbody
= 0.5 * (systemInfo
.minCutoffForMultiBody
+ acs
);
2458 if (!isDlbDisabled(comm
))
2460 /* Check if this does not limit the scaling */
2461 comm
->cutoff_mbody
= std::min(comm
->cutoff_mbody
, options
.dlbScaling
* acs
);
2463 if (!systemInfo
.filterBondedCommunication
)
2465 /* Without bBondComm do not go beyond the n.b. cut-off */
2466 comm
->cutoff_mbody
= std::min(comm
->cutoff_mbody
, systemInfo
.cutoff
);
2467 if (comm
->cellsize_limit
>= systemInfo
.cutoff
)
2469 /* We don't loose a lot of efficieny
2470 * when increasing it to the n.b. cut-off.
2471 * It can even be slightly faster, because we need
2472 * less checks for the communication setup.
2474 comm
->cutoff_mbody
= systemInfo
.cutoff
;
2477 /* Check if we did not end up below our original limit */
2478 comm
->cutoff_mbody
= std::max(comm
->cutoff_mbody
, systemInfo
.minCutoffForMultiBody
);
2480 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
2482 comm
->cellsize_limit
= comm
->cutoff_mbody
;
2485 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2491 "Bonded atom communication beyond the cut-off: %s\n"
2492 "cellsize limit %f\n",
2493 gmx::boolToString(systemInfo
.filterBondedCommunication
), comm
->cellsize_limit
);
2496 if (ddRole
== DDRole::Master
)
2498 check_dd_restrictions(dd
, ir
, mdlog
);
2502 void dd_init_bondeds(FILE* fplog
,
2504 const gmx_mtop_t
& mtop
,
2505 const gmx::VirtualSitesHandler
* vsite
,
2506 const t_inputrec
* ir
,
2508 gmx::ArrayRef
<cginfo_mb_t
> cginfo_mb
)
2510 gmx_domdec_comm_t
* comm
;
2512 dd_make_reverse_top(fplog
, dd
, &mtop
, vsite
, ir
, bBCheck
);
2516 if (comm
->systemInfo
.filterBondedCommunication
)
2518 /* Communicate atoms beyond the cut-off for bonded interactions */
2519 comm
->bondedLinks
= makeBondedLinks(mtop
, cginfo_mb
);
2523 /* Only communicate atoms based on cut-off */
2524 comm
->bondedLinks
= nullptr;
2528 static void writeSettings(gmx::TextWriter
* log
,
2530 const gmx_mtop_t
* mtop
,
2531 const t_inputrec
* ir
,
2532 gmx_bool bDynLoadBal
,
2534 const gmx_ddbox_t
* ddbox
)
2536 gmx_domdec_comm_t
* comm
;
2545 log
->writeString("The maximum number of communication pulses is:");
2546 for (d
= 0; d
< dd
->ndim
; d
++)
2548 log
->writeStringFormatted(" %c %d", dim2char(dd
->dim
[d
]), comm
->cd
[d
].np_dlb
);
2550 log
->ensureLineBreak();
2551 log
->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2552 comm
->cellsize_limit
);
2553 log
->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale
);
2554 log
->writeString("The allowed shrink of domain decomposition cells is:");
2555 for (d
= 0; d
< DIM
; d
++)
2557 if (dd
->numCells
[d
] > 1)
2559 if (d
>= ddbox
->npbcdim
&& dd
->numCells
[d
] == 2)
2565 shrink
= comm
->cellsize_min_dlb
[d
]
2566 / (ddbox
->box_size
[d
] * ddbox
->skew_fac
[d
] / dd
->numCells
[d
]);
2568 log
->writeStringFormatted(" %c %.2f", dim2char(d
), shrink
);
2571 log
->ensureLineBreak();
2575 set_dd_cell_sizes_slb(dd
, ddbox
, setcellsizeslbPULSE_ONLY
, np
);
2576 log
->writeString("The initial number of communication pulses is:");
2577 for (d
= 0; d
< dd
->ndim
; d
++)
2579 log
->writeStringFormatted(" %c %d", dim2char(dd
->dim
[d
]), np
[dd
->dim
[d
]]);
2581 log
->ensureLineBreak();
2582 log
->writeString("The initial domain decomposition cell size is:");
2583 for (d
= 0; d
< DIM
; d
++)
2585 if (dd
->numCells
[d
] > 1)
2587 log
->writeStringFormatted(" %c %.2f nm", dim2char(d
), dd
->comm
->cellsize_min
[d
]);
2590 log
->ensureLineBreak();
2594 const bool haveInterDomainVsites
=
2595 (countInterUpdategroupVsites(*mtop
, comm
->systemInfo
.updateGroupingPerMoleculetype
) != 0);
2597 if (comm
->systemInfo
.haveInterDomainBondeds
|| haveInterDomainVsites
2598 || comm
->systemInfo
.haveSplitConstraints
|| comm
->systemInfo
.haveSplitSettles
)
2600 std::string decompUnits
;
2601 if (comm
->systemInfo
.useUpdateGroups
)
2603 decompUnits
= "atom groups";
2607 decompUnits
= "atoms";
2610 log
->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2611 decompUnits
.c_str());
2612 log
->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "",
2613 comm
->systemInfo
.cutoff
);
2617 limit
= dd
->comm
->cellsize_limit
;
2621 if (dd
->unitCellInfo
.ddBoxIsDynamic
)
2624 "(the following are initial values, they could change due to box "
2627 limit
= dd
->comm
->cellsize_min
[XX
];
2628 for (d
= 1; d
< DIM
; d
++)
2630 limit
= std::min(limit
, dd
->comm
->cellsize_min
[d
]);
2634 if (comm
->systemInfo
.haveInterDomainBondeds
)
2636 log
->writeLineFormatted("%40s %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2637 std::max(comm
->systemInfo
.cutoff
, comm
->cutoff_mbody
));
2638 log
->writeLineFormatted("%40s %-7s %6.3f nm", "multi-body bonded interactions",
2640 (comm
->systemInfo
.filterBondedCommunication
|| isDlbOn(dd
->comm
))
2641 ? comm
->cutoff_mbody
2642 : std::min(comm
->systemInfo
.cutoff
, limit
));
2644 if (haveInterDomainVsites
)
2646 log
->writeLineFormatted("%40s %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit
);
2648 if (comm
->systemInfo
.haveSplitConstraints
|| comm
->systemInfo
.haveSplitSettles
)
2650 std::string separation
=
2651 gmx::formatString("atoms separated by up to %d constraints", 1 + ir
->nProjOrder
);
2652 log
->writeLineFormatted("%40s %-7s %6.3f nm\n", separation
.c_str(), "(-rcon)", limit
);
2654 log
->ensureLineBreak();
2658 static void logSettings(const gmx::MDLogger
& mdlog
,
2660 const gmx_mtop_t
* mtop
,
2661 const t_inputrec
* ir
,
2663 const gmx_ddbox_t
* ddbox
)
2665 gmx::StringOutputStream stream
;
2666 gmx::TextWriter
log(&stream
);
2667 writeSettings(&log
, dd
, mtop
, ir
, isDlbOn(dd
->comm
), dlb_scale
, ddbox
);
2668 if (dd
->comm
->dlbState
== DlbState::offCanTurnOn
)
2671 log
.ensureEmptyLine();
2673 "When dynamic load balancing gets turned on, these settings will change to:");
2675 writeSettings(&log
, dd
, mtop
, ir
, true, dlb_scale
, ddbox
);
2677 GMX_LOG(mdlog
.info
).asParagraph().appendText(stream
.toString());
2680 static void set_cell_limits_dlb(const gmx::MDLogger
& mdlog
,
2683 const t_inputrec
* ir
,
2684 const gmx_ddbox_t
* ddbox
)
2686 gmx_domdec_comm_t
* comm
;
2687 int d
, dim
, npulse
, npulse_d_max
, npulse_d
;
2692 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
2694 /* Determine the maximum number of comm. pulses in one dimension */
2696 comm
->cellsize_limit
= std::max(comm
->cellsize_limit
, comm
->cutoff_mbody
);
2698 /* Determine the maximum required number of grid pulses */
2699 if (comm
->cellsize_limit
>= comm
->systemInfo
.cutoff
)
2701 /* Only a single pulse is required */
2704 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
2706 /* We round down slightly here to avoid overhead due to the latency
2707 * of extra communication calls when the cut-off
2708 * would be only slightly longer than the cell size.
2709 * Later cellsize_limit is redetermined,
2710 * so we can not miss interactions due to this rounding.
2712 npulse
= static_cast<int>(0.96 + comm
->systemInfo
.cutoff
/ comm
->cellsize_limit
);
2716 /* There is no cell size limit */
2717 npulse
= std::max(dd
->numCells
[XX
] - 1, std::max(dd
->numCells
[YY
] - 1, dd
->numCells
[ZZ
] - 1));
2720 if (!bNoCutOff
&& npulse
> 1)
2722 /* See if we can do with less pulses, based on dlb_scale */
2724 for (d
= 0; d
< dd
->ndim
; d
++)
2727 npulse_d
= static_cast<int>(
2729 + dd
->numCells
[dim
] * comm
->systemInfo
.cutoff
2730 / (ddbox
->box_size
[dim
] * ddbox
->skew_fac
[dim
] * dlb_scale
));
2731 npulse_d_max
= std::max(npulse_d_max
, npulse_d
);
2733 npulse
= std::min(npulse
, npulse_d_max
);
2736 /* This env var can override npulse */
2737 d
= dd_getenv(mdlog
, "GMX_DD_NPULSE", 0);
2744 comm
->bVacDLBNoLimit
= (ir
->pbcType
== PbcType::No
);
2745 for (d
= 0; d
< dd
->ndim
; d
++)
2747 comm
->cd
[d
].np_dlb
= std::min(npulse
, dd
->numCells
[dd
->dim
[d
]] - 1);
2748 comm
->maxpulse
= std::max(comm
->maxpulse
, comm
->cd
[d
].np_dlb
);
2749 if (comm
->cd
[d
].np_dlb
< dd
->numCells
[dd
->dim
[d
]] - 1)
2751 comm
->bVacDLBNoLimit
= FALSE
;
2755 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2756 if (!comm
->bVacDLBNoLimit
)
2758 comm
->cellsize_limit
= std::max(comm
->cellsize_limit
, comm
->systemInfo
.cutoff
/ comm
->maxpulse
);
2760 comm
->cellsize_limit
= std::max(comm
->cellsize_limit
, comm
->cutoff_mbody
);
2761 /* Set the minimum cell size for each DD dimension */
2762 for (d
= 0; d
< dd
->ndim
; d
++)
2764 if (comm
->bVacDLBNoLimit
|| comm
->cd
[d
].np_dlb
* comm
->cellsize_limit
>= comm
->systemInfo
.cutoff
)
2766 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
2770 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->systemInfo
.cutoff
/ comm
->cd
[d
].np_dlb
;
2773 if (comm
->cutoff_mbody
<= 0)
2775 comm
->cutoff_mbody
= std::min(comm
->systemInfo
.cutoff
, comm
->cellsize_limit
);
2783 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t
& dd
)
2785 return dd
.comm
->systemInfo
.moleculesAreAlwaysWhole
;
2788 gmx_bool
dd_bonded_molpbc(const gmx_domdec_t
* dd
, PbcType pbcType
)
2790 /* If each molecule is a single charge group
2791 * or we use domain decomposition for each periodic dimension,
2792 * we do not need to take pbc into account for the bonded interactions.
2794 return (pbcType
!= PbcType::No
&& dd
->comm
->systemInfo
.haveInterDomainBondeds
2795 && !(dd
->numCells
[XX
] > 1 && dd
->numCells
[YY
] > 1
2796 && (dd
->numCells
[ZZ
] > 1 || pbcType
== PbcType::XY
)));
2799 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2800 static void set_ddgrid_parameters(const gmx::MDLogger
& mdlog
,
2803 const gmx_mtop_t
* mtop
,
2804 const t_inputrec
* ir
,
2805 const gmx_ddbox_t
* ddbox
)
2807 gmx_domdec_comm_t
* comm
= dd
->comm
;
2808 DDRankSetup
& ddRankSetup
= comm
->ddRankSetup
;
2810 if (EEL_PME(ir
->coulombtype
) || EVDW_PME(ir
->vdwtype
))
2812 init_ddpme(dd
, &ddRankSetup
.ddpme
[0], 0);
2813 if (ddRankSetup
.npmedecompdim
>= 2)
2815 init_ddpme(dd
, &ddRankSetup
.ddpme
[1], 1);
2820 ddRankSetup
.numRanksDoingPme
= 0;
2821 if (dd
->pme_nodeid
>= 0)
2823 gmx_fatal_collective(FARGS
, dd
->mpi_comm_all
, DDMASTER(dd
),
2824 "Can not have separate PME ranks without PME electrostatics");
2830 fprintf(debug
, "The DD cut-off is %f\n", comm
->systemInfo
.cutoff
);
2832 if (!isDlbDisabled(comm
))
2834 set_cell_limits_dlb(mdlog
, dd
, dlb_scale
, ir
, ddbox
);
2837 logSettings(mdlog
, dd
, mtop
, ir
, dlb_scale
, ddbox
);
2840 if (ir
->pbcType
== PbcType::No
)
2842 vol_frac
= 1 - 1 / static_cast<double>(dd
->nnodes
);
2846 vol_frac
= (1 + comm_box_frac(dd
->numCells
, comm
->systemInfo
.cutoff
, *ddbox
))
2847 / static_cast<double>(dd
->nnodes
);
2851 fprintf(debug
, "Volume fraction for all DD zones: %f\n", vol_frac
);
2853 int natoms_tot
= mtop
->natoms
;
2855 dd
->ga2la
= new gmx_ga2la_t(natoms_tot
, static_cast<int>(vol_frac
* natoms_tot
));
2858 /*! \brief Get some important DD parameters which can be modified by env.vars */
2859 static DDSettings
getDDSettings(const gmx::MDLogger
& mdlog
,
2860 const DomdecOptions
& options
,
2861 const gmx::MdrunOptions
& mdrunOptions
,
2862 const t_inputrec
& ir
)
2864 DDSettings ddSettings
;
2866 ddSettings
.useSendRecv2
= (dd_getenv(mdlog
, "GMX_DD_USE_SENDRECV2", 0) != 0);
2867 ddSettings
.dlb_scale_lim
= dd_getenv(mdlog
, "GMX_DLB_MAX_BOX_SCALING", 10);
2868 ddSettings
.useDDOrderZYX
= bool(dd_getenv(mdlog
, "GMX_DD_ORDER_ZYX", 0));
2869 ddSettings
.useCartesianReorder
= bool(dd_getenv(mdlog
, "GMX_NO_CART_REORDER", 1));
2870 ddSettings
.eFlop
= dd_getenv(mdlog
, "GMX_DLB_BASED_ON_FLOPS", 0);
2871 const int recload
= dd_getenv(mdlog
, "GMX_DD_RECORD_LOAD", 1);
2872 ddSettings
.nstDDDump
= dd_getenv(mdlog
, "GMX_DD_NST_DUMP", 0);
2873 ddSettings
.nstDDDumpGrid
= dd_getenv(mdlog
, "GMX_DD_NST_DUMP_GRID", 0);
2874 ddSettings
.DD_debug
= dd_getenv(mdlog
, "GMX_DD_DEBUG", 0);
2876 if (ddSettings
.useSendRecv2
)
2880 "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2881 "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2885 if (ddSettings
.eFlop
)
2887 GMX_LOG(mdlog
.info
).appendText("Will load balance based on FLOP count");
2888 ddSettings
.recordLoad
= true;
2892 ddSettings
.recordLoad
= (wallcycle_have_counter() && recload
> 0);
2895 ddSettings
.initialDlbState
= determineInitialDlbState(mdlog
, options
.dlbOption
,
2896 ddSettings
.recordLoad
, mdrunOptions
, &ir
);
2898 .appendTextFormatted("Dynamic load balancing: %s",
2899 edlbs_names
[static_cast<int>(ddSettings
.initialDlbState
)]);
2904 gmx_domdec_t::gmx_domdec_t(const t_inputrec
& ir
) : unitCellInfo(ir
) {}
2909 // TODO once the functionality stablizes, move this class and
2910 // supporting functionality into builder.cpp
2911 /*! \brief Impl class for DD builder */
2912 class DomainDecompositionBuilder::Impl
2916 Impl(const MDLogger
& mdlog
,
2918 const DomdecOptions
& options
,
2919 const MdrunOptions
& mdrunOptions
,
2920 const gmx_mtop_t
& mtop
,
2921 const t_inputrec
& ir
,
2923 ArrayRef
<const RVec
> xGlobal
);
2925 //! Build the resulting DD manager
2926 gmx_domdec_t
* build(LocalAtomSetManager
* atomSets
);
2928 //! Objects used in constructing and configuring DD
2931 const MDLogger
& mdlog_
;
2932 //! Communication object
2934 //! User-supplied options configuring DD behavior
2935 const DomdecOptions options_
;
2936 //! Global system topology
2937 const gmx_mtop_t
& mtop_
;
2938 //! User input values from the tpr file
2939 const t_inputrec
& ir_
;
2942 //! Internal objects used in constructing DD
2944 //! Settings combined from the user input
2945 DDSettings ddSettings_
;
2946 //! Information derived from the simulation system
2947 DDSystemInfo systemInfo_
;
2949 gmx_ddbox_t ddbox_
= { 0 };
2950 //! Organization of the DD grids
2951 DDGridSetup ddGridSetup_
;
2952 //! Organzation of the DD ranks
2953 DDRankSetup ddRankSetup_
;
2954 //! Number of DD cells in each dimension
2955 ivec ddCellIndex_
= { 0, 0, 0 };
2956 //! IDs of PME-only ranks
2957 std::vector
<int> pmeRanks_
;
2958 //! Contains a valid Cartesian-communicator-based setup, or defaults.
2959 CartesianRankSetup cartSetup_
;
2963 DomainDecompositionBuilder::Impl::Impl(const MDLogger
& mdlog
,
2965 const DomdecOptions
& options
,
2966 const MdrunOptions
& mdrunOptions
,
2967 const gmx_mtop_t
& mtop
,
2968 const t_inputrec
& ir
,
2970 ArrayRef
<const RVec
> xGlobal
) :
2977 GMX_LOG(mdlog_
.info
).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_
->sizeOfDefaultCommunicator
);
2979 ddSettings_
= getDDSettings(mdlog_
, options_
, mdrunOptions
, ir_
);
2981 if (ddSettings_
.eFlop
> 1)
2983 /* Ensure that we have different random flop counts on different ranks */
2984 srand(1 + cr_
->rankInDefaultCommunicator
);
2987 systemInfo_
= getSystemInfo(mdlog_
, MASTER(cr_
) ? DDRole::Master
: DDRole::Agent
,
2988 cr
->mpiDefaultCommunicator
, options_
, mtop_
, ir_
, box
, xGlobal
);
2990 const int numRanksRequested
= cr_
->sizeOfDefaultCommunicator
;
2991 const bool checkForLargePrimeFactors
= (options_
.numCells
[0] <= 0);
2992 checkForValidRankCountRequests(numRanksRequested
, EEL_PME(ir_
.coulombtype
),
2993 options_
.numPmeRanks
, checkForLargePrimeFactors
);
2995 // DD grid setup uses a more different cell size limit for
2996 // automated setup than the one in systemInfo_. The latter is used
2997 // in set_dd_limits() to configure DLB, for example.
2998 const real gridSetupCellsizeLimit
=
2999 getDDGridSetupCellSizeLimit(mdlog_
, !isDlbDisabled(ddSettings_
.initialDlbState
),
3000 options_
.dlbScaling
, ir_
, systemInfo_
.cellsizeLimit
);
3002 getDDGridSetup(mdlog_
, MASTER(cr_
) ? DDRole::Master
: DDRole::Agent
,
3003 cr
->mpiDefaultCommunicator
, numRanksRequested
, options_
, ddSettings_
,
3004 systemInfo_
, gridSetupCellsizeLimit
, mtop_
, ir_
, box
, xGlobal
, &ddbox_
);
3005 checkDDGridSetup(ddGridSetup_
, MASTER(cr_
) ? DDRole::Master
: DDRole::Agent
,
3006 cr
->mpiDefaultCommunicator
, cr
->sizeOfDefaultCommunicator
, options_
,
3007 ddSettings_
, systemInfo_
, gridSetupCellsizeLimit
, ddbox_
);
3009 cr_
->npmenodes
= ddGridSetup_
.numPmeOnlyRanks
;
3011 ddRankSetup_
= getDDRankSetup(mdlog_
, cr_
->sizeOfDefaultCommunicator
, ddGridSetup_
, ir_
);
3013 /* Generate the group communicator, also decides the duty of each rank */
3014 cartSetup_
= makeGroupCommunicators(mdlog_
, ddSettings_
, options_
.rankOrder
, ddRankSetup_
, cr_
,
3015 ddCellIndex_
, &pmeRanks_
);
3018 gmx_domdec_t
* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager
* atomSets
)
3020 gmx_domdec_t
* dd
= new gmx_domdec_t(ir_
);
3022 copy_ivec(ddCellIndex_
, dd
->ci
);
3024 dd
->comm
= init_dd_comm();
3026 dd
->comm
->ddRankSetup
= ddRankSetup_
;
3027 dd
->comm
->cartesianRankSetup
= cartSetup_
;
3029 set_dd_limits(mdlog_
, MASTER(cr_
) ? DDRole::Master
: DDRole::Agent
, dd
, options_
, ddSettings_
,
3030 systemInfo_
, ddGridSetup_
, ddRankSetup_
.numPPRanks
, &mtop_
, &ir_
, ddbox_
);
3032 setupGroupCommunication(mdlog_
, ddSettings_
, pmeRanks_
, cr_
, mtop_
.natoms
, dd
);
3034 if (thisRankHasDuty(cr_
, DUTY_PP
))
3036 set_ddgrid_parameters(mdlog_
, dd
, options_
.dlbScaling
, &mtop_
, &ir_
, &ddbox_
);
3038 setup_neighbor_relations(dd
);
3041 /* Set overallocation to avoid frequent reallocation of arrays */
3042 set_over_alloc_dd(TRUE
);
3044 dd
->atomSets
= atomSets
;
3049 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger
& mdlog
,
3051 const DomdecOptions
& options
,
3052 const MdrunOptions
& mdrunOptions
,
3053 const gmx_mtop_t
& mtop
,
3054 const t_inputrec
& ir
,
3056 ArrayRef
<const RVec
> xGlobal
) :
3057 impl_(new Impl(mdlog
, cr
, options
, mdrunOptions
, mtop
, ir
, box
, xGlobal
))
3061 gmx_domdec_t
* DomainDecompositionBuilder::build(LocalAtomSetManager
* atomSets
)
3063 return impl_
->build(atomSets
);
3066 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3070 static gmx_bool
test_dd_cutoff(const t_commrec
* cr
, const matrix box
, gmx::ArrayRef
<const gmx::RVec
> x
, real cutoffRequested
)
3077 const auto* dd
= cr
->dd
;
3079 set_ddbox(*dd
, false, box
, true, x
, &ddbox
);
3083 for (d
= 0; d
< dd
->ndim
; d
++)
3087 inv_cell_size
= DD_CELL_MARGIN
* dd
->numCells
[dim
] / ddbox
.box_size
[dim
];
3088 if (dd
->unitCellInfo
.ddBoxIsDynamic
)
3090 inv_cell_size
*= DD_PRES_SCALE_MARGIN
;
3093 np
= 1 + static_cast<int>(cutoffRequested
* inv_cell_size
* ddbox
.skew_fac
[dim
]);
3095 if (!isDlbDisabled(dd
->comm
) && (dim
< ddbox
.npbcdim
) && (dd
->comm
->cd
[d
].np_dlb
> 0))
3097 if (np
> dd
->comm
->cd
[d
].np_dlb
)
3102 /* If a current local cell size is smaller than the requested
3103 * cut-off, we could still fix it, but this gets very complicated.
3104 * Without fixing here, we might actually need more checks.
3106 real cellSizeAlongDim
=
3107 (dd
->comm
->cell_x1
[dim
] - dd
->comm
->cell_x0
[dim
]) * ddbox
.skew_fac
[dim
];
3108 if (cellSizeAlongDim
* dd
->comm
->cd
[d
].np_dlb
< cutoffRequested
)
3115 if (!isDlbDisabled(dd
->comm
))
3117 /* If DLB is not active yet, we don't need to check the grid jumps.
3118 * Actually we shouldn't, because then the grid jump data is not set.
3120 if (isDlbOn(dd
->comm
) && check_grid_jump(0, dd
, cutoffRequested
, &ddbox
, FALSE
))
3125 gmx_sumi(1, &LocallyLimited
, cr
);
3127 if (LocallyLimited
> 0)
3136 gmx_bool
change_dd_cutoff(t_commrec
* cr
, const matrix box
, gmx::ArrayRef
<const gmx::RVec
> x
, real cutoffRequested
)
3138 gmx_bool bCutoffAllowed
;
3140 bCutoffAllowed
= test_dd_cutoff(cr
, box
, x
, cutoffRequested
);
3144 cr
->dd
->comm
->systemInfo
.cutoff
= cutoffRequested
;
3147 return bCutoffAllowed
;
3150 void constructGpuHaloExchange(const gmx::MDLogger
& mdlog
,
3151 const t_commrec
& cr
,
3152 const gmx::DeviceStreamManager
& deviceStreamManager
,
3153 gmx_wallcycle
* wcycle
)
3155 GMX_RELEASE_ASSERT(deviceStreamManager
.streamIsValid(gmx::DeviceStreamType::NonBondedLocal
),
3156 "Local non-bonded stream should be valid when using"
3157 "GPU halo exchange.");
3158 GMX_RELEASE_ASSERT(deviceStreamManager
.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal
),
3159 "Non-local non-bonded stream should be valid when using "
3160 "GPU halo exchange.");
3162 if (cr
.dd
->gpuHaloExchange
[0].empty())
3164 GMX_LOG(mdlog
.warning
)
3166 .appendTextFormatted(
3167 "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3169 "GMX_GPU_DD_COMMS environment variable.");
3172 for (int d
= 0; d
< cr
.dd
->ndim
; d
++)
3174 for (int pulse
= cr
.dd
->gpuHaloExchange
[d
].size(); pulse
< cr
.dd
->comm
->cd
[d
].numPulses(); pulse
++)
3176 cr
.dd
->gpuHaloExchange
[d
].push_back(std::make_unique
<gmx::GpuHaloExchange
>(
3177 cr
.dd
, d
, cr
.mpi_comm_mysim
, deviceStreamManager
.context(),
3178 deviceStreamManager
.stream(gmx::DeviceStreamType::NonBondedLocal
),
3179 deviceStreamManager
.stream(gmx::DeviceStreamType::NonBondedNonLocal
), pulse
, wcycle
));
3184 void reinitGpuHaloExchange(const t_commrec
& cr
,
3185 const DeviceBuffer
<gmx::RVec
> d_coordinatesBuffer
,
3186 const DeviceBuffer
<gmx::RVec
> d_forcesBuffer
)
3188 for (int d
= 0; d
< cr
.dd
->ndim
; d
++)
3190 for (int pulse
= 0; pulse
< cr
.dd
->comm
->cd
[d
].numPulses(); pulse
++)
3192 cr
.dd
->gpuHaloExchange
[d
][pulse
]->reinitHalo(d_coordinatesBuffer
, d_forcesBuffer
);
3197 void communicateGpuHaloCoordinates(const t_commrec
& cr
,
3199 GpuEventSynchronizer
* coordinatesReadyOnDeviceEvent
)
3201 for (int d
= 0; d
< cr
.dd
->ndim
; d
++)
3203 for (int pulse
= 0; pulse
< cr
.dd
->comm
->cd
[d
].numPulses(); pulse
++)
3205 cr
.dd
->gpuHaloExchange
[d
][pulse
]->communicateHaloCoordinates(box
, coordinatesReadyOnDeviceEvent
);
3210 void communicateGpuHaloForces(const t_commrec
& cr
, bool accumulateForces
)
3212 for (int d
= cr
.dd
->ndim
- 1; d
>= 0; d
--)
3214 for (int pulse
= cr
.dd
->comm
->cd
[d
].numPulses() - 1; pulse
>= 0; pulse
--)
3216 cr
.dd
->gpuHaloExchange
[d
][pulse
]->communicateHaloForces(accumulateForces
);