2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012 by the GROMACS development team.
5 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
40 * \brief This file defines functions used by the domdec module
41 * in its initial setup phase.
43 * \author Berk Hess <hess@kth.se>
44 * \ingroup module_domdec
49 #include "domdec_setup.h"
56 #include "gromacs/domdec/domdec.h"
57 #include "gromacs/domdec/domdec_struct.h"
58 #include "gromacs/domdec/options.h"
59 #include "gromacs/ewald/pme.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/math/utilities.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdlib/perf_est.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/pbcutil/pbc.h"
68 #include "gromacs/topology/topology.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/logger.h"
71 #include "gromacs/utility/stringutil.h"
74 #include "domdec_internal.h"
77 // TODO remove this when moving domdec into gmx namespace
78 using gmx::DomdecOptions
;
80 /*! \brief Margin for setting up the DD grid */
81 #define DD_GRID_MARGIN_PRES_SCALE 1.05
83 /*! \brief Factorize \p n.
85 * \param[in] n Value to factorize
86 * \param[out] fac Vector of factors (to be allocated in this function)
87 * \param[out] mfac Vector with the number of times each factor repeats in the factorization (to be allocated in this function)
89 static void factorize(int n
, std::vector
<int>* fac
, std::vector
<int>* mfac
)
93 gmx_fatal(FARGS
, "Can only factorize positive integers.");
96 /* Decompose n in factors */
104 if (fac
->empty() || fac
->back() != d
)
119 /*! \brief Find largest divisor of \p n smaller than \p n*/
120 static int largest_divisor(int n
)
122 std::vector
<int> div
;
123 std::vector
<int> mdiv
;
124 factorize(n
, &div
, &mdiv
);
129 /*! \brief Compute largest common divisor of \p n1 and \b n2 */
130 static int lcd(int n1
, int n2
)
135 for (i
= 2; (i
<= n1
&& i
<= n2
); i
++)
137 if (n1
% i
== 0 && n2
% i
== 0)
146 /*! \brief Returns TRUE when there are enough PME ranks for the ratio */
147 static gmx_bool
fits_pme_ratio(int nrank_tot
, int nrank_pme
, float ratio
)
149 return (static_cast<double>(nrank_pme
) / static_cast<double>(nrank_tot
) > 0.95 * ratio
);
152 /*! \brief Returns TRUE when npme out of ntot ranks doing PME is expected to give reasonable performance */
153 static gmx_bool
fits_pp_pme_perf(int ntot
, int npme
, float ratio
)
155 std::vector
<int> div
;
156 std::vector
<int> mdiv
;
157 factorize(ntot
- npme
, &div
, &mdiv
);
159 int npp_root3
= gmx::roundToInt(std::cbrt(ntot
- npme
));
160 int npme_root2
= gmx::roundToInt(std::sqrt(static_cast<double>(npme
)));
162 /* The check below gives a reasonable division:
163 * factor 5 allowed at 5 or more PP ranks,
164 * factor 7 allowed at 49 or more PP ranks.
166 if (div
.back() > 3 + npp_root3
)
171 /* Check if the number of PP and PME ranks have a reasonable sized
172 * denominator in common, such that we can use 2D PME decomposition
173 * when required (which requires nx_pp == nx_pme).
174 * The factor of 2 allows for a maximum ratio of 2^2=4
175 * between nx_pme and ny_pme.
177 if (lcd(ntot
- npme
, npme
) * 2 < npme_root2
)
182 /* Does this division gives a reasonable PME load? */
183 return fits_pme_ratio(ntot
, npme
, ratio
);
186 /*! \brief Make a guess for the number of PME ranks to use. */
187 static int guess_npme(const gmx::MDLogger
& mdlog
,
188 const gmx_mtop_t
& mtop
,
189 const t_inputrec
& ir
,
196 ratio
= pme_load_estimate(mtop
, ir
, box
);
198 GMX_LOG(mdlog
.info
).appendTextFormatted("Guess for relative PME load: %.2f", ratio
);
200 /* We assume the optimal rank ratio is close to the load ratio.
201 * The communication load is neglected,
202 * but (hopefully) this will balance out between PP and PME.
205 if (!fits_pme_ratio(nrank_tot
, nrank_tot
/ 2, ratio
))
207 /* We would need more than nrank_tot/2 PME only nodes,
208 * which is not possible. Since the PME load is very high,
209 * we will not loose much performance when all ranks do PME.
215 /* First try to find npme as a factor of nrank_tot up to nrank_tot/3.
216 * We start with a minimum PME node fraction of 1/16
217 * and avoid ratios which lead to large prime factors in nnodes-npme.
219 npme
= (nrank_tot
+ 15) / 16;
220 while (npme
<= nrank_tot
/ 3)
222 if (nrank_tot
% npme
== 0)
224 /* Note that fits_perf might change the PME grid,
225 * in the current implementation it does not.
227 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
234 if (npme
> nrank_tot
/ 3)
236 /* Try any possible number for npme */
238 while (npme
<= nrank_tot
/ 2)
240 /* Note that fits_perf may change the PME grid */
241 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
248 if (npme
> nrank_tot
/ 2)
251 "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks "
252 "(%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, "
254 "Use the -npme option of mdrun or change the number of ranks or the PME grid "
255 "dimensions, see the manual for details.",
256 ratio
, gmx::roundToInt(0.95 * ratio
* nrank_tot
), nrank_tot
/ 2, ir
.nkx
, ir
.nky
);
261 .appendTextFormatted(
262 "Will use %d particle-particle and %d PME only ranks\n"
263 "This is a guess, check the performance at the end of the log file",
264 nrank_tot
- npme
, npme
);
270 /*! \brief Return \p n divided by \p f rounded up to the next integer. */
271 static int div_up(int n
, int f
)
273 return (n
+ f
- 1) / f
;
276 real
comm_box_frac(const gmx::IVec
& dd_nc
, real cutoff
, const gmx_ddbox_t
& ddbox
)
282 for (i
= 0; i
< DIM
; i
++)
284 real bt
= ddbox
.box_size
[i
] * ddbox
.skew_fac
[i
];
285 nw
[i
] = dd_nc
[i
] * cutoff
/ bt
;
289 for (i
= 0; i
< DIM
; i
++)
294 for (j
= i
+ 1; j
< DIM
; j
++)
298 comm_vol
+= nw
[i
] * nw
[j
] * M_PI
/ 4;
299 for (k
= j
+ 1; k
< DIM
; k
++)
303 comm_vol
+= nw
[i
] * nw
[j
] * nw
[k
] * M_PI
/ 6;
314 /*! \brief Return whether the DD inhomogeneous in the z direction */
315 static gmx_bool
inhomogeneous_z(const t_inputrec
& ir
)
317 return ((EEL_PME(ir
.coulombtype
) || ir
.coulombtype
== eelEWALD
) && ir
.ePBC
== epbcXYZ
318 && ir
.ewald_geometry
== eewg3DC
);
321 /*! \brief Estimate cost of PME FFT communication
323 * This only takes the communication into account and not imbalance
324 * in the calculation. But the imbalance in communication and calculation
325 * are similar and therefore these formulas also prefer load balance
326 * in the FFT and pme_solve calculation.
328 static float comm_pme_cost_vol(int npme
, int a
, int b
, int c
)
330 /* We use a float here, since an integer might overflow */
335 comm_vol
*= div_up(a
, npme
);
336 comm_vol
*= div_up(b
, npme
);
342 /*! \brief Estimate cost of communication for a possible domain decomposition. */
343 static float comm_cost_est(real limit
,
346 const gmx_ddbox_t
& ddbox
,
348 const t_inputrec
& ir
,
353 gmx::IVec npme
= { 1, 1, 1 };
354 int i
, j
, nk
, overlap
;
356 float comm_vol
, comm_vol_xf
, comm_pme
, cost_pbcdx
;
357 /* This is the cost of a pbc_dx call relative to the cost
358 * of communicating the coordinate and force of an atom.
359 * This will be machine dependent.
360 * These factors are for x86 with SMP or Infiniband.
362 float pbcdx_rect_fac
= 0.1;
363 float pbcdx_tric_fac
= 0.2;
366 /* Check the DD algorithm restrictions */
367 if ((ir
.ePBC
== epbcXY
&& ir
.nwall
< 2 && nc
[ZZ
] > 1)
368 || (ir
.ePBC
== epbcSCREW
&& (nc
[XX
] == 1 || nc
[YY
] > 1 || nc
[ZZ
] > 1)))
373 if (inhomogeneous_z(ir
) && nc
[ZZ
] > 1)
378 assert(ddbox
.npbcdim
<= DIM
);
380 /* Check if the triclinic requirements are met */
381 for (i
= 0; i
< DIM
; i
++)
383 for (j
= i
+ 1; j
< ddbox
.npbcdim
; j
++)
385 if (box
[j
][i
] != 0 || ir
.deform
[j
][i
] != 0 || (ir
.epc
!= epcNO
&& ir
.compress
[j
][i
] != 0))
387 if (nc
[j
] > 1 && nc
[i
] == 1)
395 for (i
= 0; i
< DIM
; i
++)
397 bt
[i
] = ddbox
.box_size
[i
] * ddbox
.skew_fac
[i
];
399 /* Without PBC and with 2 cells, there are no lower limits on the cell size */
400 if (!(i
>= ddbox
.npbcdim
&& nc
[i
] <= 2) && bt
[i
] < nc
[i
] * limit
)
404 /* With PBC, check if the cut-off fits in nc[i]-1 cells */
405 if (i
< ddbox
.npbcdim
&& nc
[i
] > 1 && (nc
[i
] - 1) * bt
[i
] < nc
[i
] * cutoff
)
413 /* The following choices should match those
414 * in init_domain_decomposition in domdec.c.
416 if (nc
[XX
] == 1 && nc
[YY
] > 1)
421 else if (nc
[YY
] == 1)
428 /* Will we use 1D or 2D PME decomposition? */
429 npme
[XX
] = (npme_tot
% nc
[XX
] == 0) ? nc
[XX
] : npme_tot
;
430 npme
[YY
] = npme_tot
/ npme
[XX
];
434 if (EEL_PME(ir
.coulombtype
) || EVDW_PME(ir
.vdwtype
))
436 /* Check the PME grid restrictions.
437 * Currently these can only be invalid here with too few grid lines
438 * along the x dimension per rank doing PME.
440 int npme_x
= (npme_tot
> 1 ? npme
[XX
] : nc
[XX
]);
442 /* Currently we don't have the OpenMP thread count available here.
443 * But with threads we have only tighter restrictions and it's
444 * probably better anyhow to avoid settings where we need to reduce
445 * grid lines over multiple ranks, as the thread check will do.
447 bool useThreads
= true;
448 bool errorsAreFatal
= false;
449 if (!gmx_pme_check_restrictions(ir
.pme_order
, ir
.nkx
, ir
.nky
, ir
.nkz
, npme_x
, useThreads
,
456 /* When two dimensions are (nearly) equal, use more cells
457 * for the smallest index, so the decomposition does not
458 * depend sensitively on the rounding of the box elements.
460 for (i
= 0; i
< DIM
; i
++)
462 for (j
= i
+ 1; j
< DIM
; j
++)
464 /* Check if the box size is nearly identical,
465 * in that case we prefer nx > ny and ny > nz.
467 if (std::fabs(bt
[j
] - bt
[i
]) < 0.01 * bt
[i
] && nc
[j
] > nc
[i
])
469 /* The XX/YY check is a bit compact. If nc[YY]==npme[YY]
470 * this means the swapped nc has nc[XX]==npme[XX],
471 * and we can also swap X and Y for PME.
473 /* Check if dimension i and j are equivalent for PME.
474 * For x/y: if nc[YY]!=npme[YY], we can not swap x/y
475 * For y/z: we can not have PME decomposition in z
478 || !((i
== XX
&& j
== YY
&& nc
[YY
] != npme
[YY
]) || (i
== YY
&& j
== ZZ
&& npme
[YY
] > 1)))
486 /* This function determines only half of the communication cost.
487 * All PP, PME and PP-PME communication is symmetric
488 * and the "back"-communication cost is identical to the forward cost.
491 comm_vol
= comm_box_frac(nc
, cutoff
, ddbox
);
494 for (i
= 0; i
< 2; i
++)
496 /* Determine the largest volume for PME x/f redistribution */
497 if (nc
[i
] % npme
[i
] != 0)
501 comm_vol_xf
= (npme
[i
] == 2 ? 1.0 / 3.0 : 0.5);
505 comm_vol_xf
= 1.0 - lcd(nc
[i
], npme
[i
]) / static_cast<double>(npme
[i
]);
507 comm_pme
+= 3 * natoms
* comm_vol_xf
;
510 /* Grid overlap communication */
513 nk
= (i
== 0 ? ir
.nkx
: ir
.nky
);
514 overlap
= (nk
% npme
[i
] == 0 ? ir
.pme_order
- 1 : ir
.pme_order
);
522 /* Old line comm_pme += npme[i]*overlap*ir.nkx*ir.nky*ir.nkz/nk; */
526 comm_pme
+= comm_pme_cost_vol(npme
[YY
], ir
.nky
, ir
.nkz
, ir
.nkx
);
527 comm_pme
+= comm_pme_cost_vol(npme
[XX
], ir
.nkx
, ir
.nky
, ir
.nkz
);
529 /* Add cost of pbc_dx for bondeds */
531 if ((nc
[XX
] == 1 || nc
[YY
] == 1) || (nc
[ZZ
] == 1 && ir
.ePBC
!= epbcXY
))
533 if ((ddbox
.tric_dir
[XX
] && nc
[XX
] == 1) || (ddbox
.tric_dir
[YY
] && nc
[YY
] == 1))
535 cost_pbcdx
= pbcdxr
* pbcdx_tric_fac
;
539 cost_pbcdx
= pbcdxr
* pbcdx_rect_fac
;
545 fprintf(debug
, "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n",
546 nc
[XX
], nc
[YY
], nc
[ZZ
], npme
[XX
], npme
[YY
], comm_vol
, cost_pbcdx
,
547 comm_pme
/ (3 * natoms
), comm_vol
+ cost_pbcdx
+ comm_pme
/ (3 * natoms
));
550 return 3 * natoms
* (comm_vol
+ cost_pbcdx
) + comm_pme
;
553 /*! \brief Assign penalty factors to possible domain decompositions,
554 * based on the estimated communication costs. */
555 static void assign_factors(const real limit
,
556 const bool request1D
,
559 const gmx_ddbox_t
& ddbox
,
561 const t_inputrec
& ir
,
572 gmx::IVec
& ir_try
= *irTryPtr
;
576 const int maxDimensionSize
= std::max(ir_try
[XX
], std::max(ir_try
[YY
], ir_try
[ZZ
]));
577 const int productOfDimensionSizes
= ir_try
[XX
] * ir_try
[YY
] * ir_try
[ZZ
];
578 const bool decompositionHasOneDimension
= (maxDimensionSize
== productOfDimensionSizes
);
579 if (request1D
&& !decompositionHasOneDimension
)
581 /* We requested 1D DD, but got multiple dimensions */
585 ce
= comm_cost_est(limit
, cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
, ir_try
);
588 || ce
< comm_cost_est(limit
, cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
, *opt
)))
596 for (x
= mdiv
[0]; x
>= 0; x
--)
598 for (i
= 0; i
< x
; i
++)
600 ir_try
[XX
] *= div
[0];
602 for (y
= mdiv
[0] - x
; y
>= 0; y
--)
604 for (i
= 0; i
< y
; i
++)
606 ir_try
[YY
] *= div
[0];
608 for (i
= 0; i
< mdiv
[0] - x
- y
; i
++)
610 ir_try
[ZZ
] *= div
[0];
614 assign_factors(limit
, request1D
, cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
, ndiv
- 1,
615 div
+ 1, mdiv
+ 1, irTryPtr
, opt
);
617 for (i
= 0; i
< mdiv
[0] - x
- y
; i
++)
619 ir_try
[ZZ
] /= div
[0];
621 for (i
= 0; i
< y
; i
++)
623 ir_try
[YY
] /= div
[0];
626 for (i
= 0; i
< x
; i
++)
628 ir_try
[XX
] /= div
[0];
633 /*! \brief Determine the optimal distribution of DD cells for the
634 * simulation system and number of MPI ranks
636 * \returns The optimal grid cell choice. The latter will contain all
637 * zeros if no valid cell choice exists. */
638 static gmx::IVec
optimizeDDCells(const gmx::MDLogger
& mdlog
,
639 const int numRanksRequested
,
640 const int numPmeOnlyRanks
,
641 const real cellSizeLimit
,
642 const bool request1DAnd1Pulse
,
643 const gmx_mtop_t
& mtop
,
645 const gmx_ddbox_t
& ddbox
,
646 const t_inputrec
& ir
,
647 const DDSystemInfo
& systemInfo
)
651 const int numPPRanks
= numRanksRequested
- numPmeOnlyRanks
;
654 .appendTextFormatted(
655 "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
656 numPPRanks
, cellSizeLimit
);
657 if (inhomogeneous_z(ir
))
660 .appendTextFormatted(
661 "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, "
662 "will not decompose in z.",
663 eewg_names
[ir
.ewald_geometry
]);
667 // For cost estimates, we need the number of ranks doing PME work,
668 // which is the number of PP ranks when not using separate
670 const int numRanksDoingPmeWork
=
671 (EEL_PME(ir
.coulombtype
) ? ((numPmeOnlyRanks
> 0) ? numPmeOnlyRanks
: numPPRanks
) : 0);
673 if (systemInfo
.haveInterDomainBondeds
)
675 /* If we can skip PBC for distance calculations in plain-C bondeds,
676 * we can save some time (e.g. 3D DD with pbc=xyz).
677 * Here we ignore SIMD bondeds as they always do (fast) PBC.
679 count_bonded_distances(mtop
, ir
, &pbcdxr
, nullptr);
680 pbcdxr
/= static_cast<double>(mtop
.natoms
);
684 /* Every molecule is a single charge group: no pbc required */
688 if (cellSizeLimit
> 0)
690 std::string maximumCells
= "The maximum allowed number of cells is:";
691 for (int d
= 0; d
< DIM
; d
++)
693 int nmax
= static_cast<int>(ddbox
.box_size
[d
] * ddbox
.skew_fac
[d
] / cellSizeLimit
);
694 if (d
>= ddbox
.npbcdim
&& nmax
< 2)
698 if (d
== ZZ
&& inhomogeneous_z(ir
))
702 maximumCells
+= gmx::formatString(" %c %d", 'X' + d
, nmax
);
704 GMX_LOG(mdlog
.info
).appendText(maximumCells
);
709 fprintf(debug
, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr
);
712 /* Decompose numPPRanks in factors */
713 std::vector
<int> div
;
714 std::vector
<int> mdiv
;
715 factorize(numPPRanks
, &div
, &mdiv
);
717 gmx::IVec itry
= { 1, 1, 1 };
718 gmx::IVec numDomains
= { 0, 0, 0 };
719 assign_factors(cellSizeLimit
, request1DAnd1Pulse
, systemInfo
.cutoff
, box
, ddbox
, mtop
.natoms
, ir
,
720 pbcdxr
, numRanksDoingPmeWork
, div
.size(), div
.data(), mdiv
.data(), &itry
, &numDomains
);
725 real
getDDGridSetupCellSizeLimit(const gmx::MDLogger
& mdlog
,
726 const bool request1DAnd1Pulse
,
727 const bool bDynLoadBal
,
728 const real dlb_scale
,
729 const t_inputrec
& ir
,
730 const real systemInfoCellSizeLimit
)
732 real cellSizeLimit
= systemInfoCellSizeLimit
;
733 if (request1DAnd1Pulse
)
735 cellSizeLimit
= std::max(cellSizeLimit
, ir
.rlist
);
738 /* Add a margin for DLB and/or pressure scaling */
741 if (dlb_scale
>= 1.0)
743 gmx_fatal(FARGS
, "The value for option -dds should be smaller than 1");
746 .appendTextFormatted(
747 "Scaling the initial minimum size with 1/%g (option -dds) = %g", dlb_scale
,
749 cellSizeLimit
/= dlb_scale
;
751 else if (ir
.epc
!= epcNO
)
754 .appendTextFormatted(
755 "To account for pressure scaling, scaling the initial minimum size with %g",
756 DD_GRID_MARGIN_PRES_SCALE
);
757 cellSizeLimit
*= DD_GRID_MARGIN_PRES_SCALE
;
760 return cellSizeLimit
;
762 void checkForValidRankCountRequests(const int numRanksRequested
, const bool usingPme
, const int numPmeRanksRequested
)
764 int numPPRanksRequested
= numRanksRequested
;
765 if (usingPme
&& numPmeRanksRequested
> 0)
767 numPPRanksRequested
-= numPmeRanksRequested
;
768 if (numPmeRanksRequested
> numPPRanksRequested
)
771 "Cannot have %d separate PME ranks with only %d PP ranks, choose fewer or no "
772 "separate PME ranks",
773 numPmeRanksRequested
, numPPRanksRequested
);
777 // Once the rank count is large enough, it becomes worth
778 // suggesting improvements to the user.
779 const int minPPRankCountToCheckForLargePrimeFactors
= 13;
780 if (numPPRanksRequested
>= minPPRankCountToCheckForLargePrimeFactors
)
782 const int largestDivisor
= largest_divisor(numPPRanksRequested
);
783 /* Check if the largest divisor is more than numPPRanks ^ (2/3) */
784 if (largestDivisor
* largestDivisor
* largestDivisor
> numPPRanksRequested
* numPPRanksRequested
)
787 "The number of ranks selected for particle-particle work (%d) "
788 "contains a large prime factor %d. In most cases this will lead to "
789 "bad performance. Choose a number with smaller prime factors or "
790 "set the decomposition (option -dd) manually.",
791 numPPRanksRequested
, largestDivisor
);
796 /*! \brief Return the number of PME-only ranks used by the simulation
798 * If the user did not choose a number, then decide for them. */
799 static int getNumPmeOnlyRanksToUse(const gmx::MDLogger
& mdlog
,
800 const DomdecOptions
& options
,
801 const gmx_mtop_t
& mtop
,
802 const t_inputrec
& ir
,
804 const int numRanksRequested
)
807 const char* extraMessage
= "";
809 if (options
.numCells
[XX
] > 0)
811 if (options
.numPmeRanks
>= 0)
813 // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
814 numPmeOnlyRanks
= options
.numPmeRanks
;
818 // When the DD grid is set explicitly and -npme is set to auto,
819 // don't use PME ranks. We check later if the DD grid is
820 // compatible with the total number of ranks.
826 if (!EEL_PME(ir
.coulombtype
))
828 // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
833 if (options
.numPmeRanks
>= 0)
835 numPmeOnlyRanks
= options
.numPmeRanks
;
839 // Controls the automated choice of when to use separate PME-only ranks.
840 const int minRankCountToDefaultToSeparatePmeRanks
= 19;
842 if (numRanksRequested
< minRankCountToDefaultToSeparatePmeRanks
)
846 ", as there are too few total\n"
847 " ranks for efficient splitting";
851 numPmeOnlyRanks
= guess_npme(mdlog
, mtop
, ir
, box
, numRanksRequested
);
852 extraMessage
= ", as guessed by mdrun";
857 GMX_RELEASE_ASSERT(numPmeOnlyRanks
<= numRanksRequested
,
858 "Cannot have more PME ranks than total ranks");
859 if (EEL_PME(ir
.coulombtype
))
861 GMX_LOG(mdlog
.info
).appendTextFormatted("Using %d separate PME ranks%s", numPmeOnlyRanks
, extraMessage
);
864 return numPmeOnlyRanks
;
867 /*! \brief Sets the order of the DD dimensions, returns the number of DD dimensions */
868 static int set_dd_dim(const gmx::IVec
& numDDCells
, const DDSettings
& ddSettings
, ivec
* dims
)
871 if (ddSettings
.useDDOrderZYX
)
873 /* Decomposition order z,y,x */
874 for (int dim
= DIM
- 1; dim
>= 0; dim
--)
876 if (numDDCells
[dim
] > 1)
878 (*dims
)[ndim
++] = dim
;
884 /* Decomposition order x,y,z */
885 for (int dim
= 0; dim
< DIM
; dim
++)
887 if (numDDCells
[dim
] > 1)
889 (*dims
)[ndim
++] = dim
;
896 /* Set dim[0] to avoid extra checks on ndim in several places */
903 DDGridSetup
getDDGridSetup(const gmx::MDLogger
& mdlog
,
905 const int numRanksRequested
,
906 const DomdecOptions
& options
,
907 const DDSettings
& ddSettings
,
908 const DDSystemInfo
& systemInfo
,
909 const real cellSizeLimit
,
910 const gmx_mtop_t
& mtop
,
911 const t_inputrec
& ir
,
913 gmx::ArrayRef
<const gmx::RVec
> xGlobal
,
916 int numPmeOnlyRanks
= getNumPmeOnlyRanksToUse(mdlog
, options
, mtop
, ir
, box
, numRanksRequested
);
918 if (ddSettings
.request1DAnd1Pulse
&& (numRanksRequested
- numPmeOnlyRanks
== 1))
920 // With only one PP rank, there will not be a need for
921 // GPU-based halo exchange that wants to request that any DD
922 // has only 1 dimension and 1 pulse.
923 return DDGridSetup
{};
926 gmx::IVec numDomains
;
927 if (options
.numCells
[XX
] > 0)
929 numDomains
= gmx::IVec(options
.numCells
);
930 const ivec numDomainsLegacyIvec
= { numDomains
[XX
], numDomains
[YY
], numDomains
[ZZ
] };
931 set_ddbox_cr(*cr
, &numDomainsLegacyIvec
, ir
, box
, xGlobal
, ddbox
);
935 set_ddbox_cr(*cr
, nullptr, ir
, box
, xGlobal
, ddbox
);
939 numDomains
= optimizeDDCells(mdlog
, numRanksRequested
, numPmeOnlyRanks
, cellSizeLimit
,
940 ddSettings
.request1DAnd1Pulse
, mtop
, box
, *ddbox
, ir
, systemInfo
);
944 /* Communicate the information set by the master to all ranks */
945 gmx_bcast(sizeof(numDomains
), numDomains
, cr
);
946 if (EEL_PME(ir
.coulombtype
))
948 gmx_bcast(sizeof(numPmeOnlyRanks
), &numPmeOnlyRanks
, cr
);
951 DDGridSetup ddGridSetup
;
952 ddGridSetup
.numPmeOnlyRanks
= numPmeOnlyRanks
;
953 ddGridSetup
.numDomains
[XX
] = numDomains
[XX
];
954 ddGridSetup
.numDomains
[YY
] = numDomains
[YY
];
955 ddGridSetup
.numDomains
[ZZ
] = numDomains
[ZZ
];
956 ddGridSetup
.numDDDimensions
= set_dd_dim(numDomains
, ddSettings
, &ddGridSetup
.ddDimensions
);