2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines functions used by the domdec module
39 * in its initial setup phase.
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_domdec
47 #include "domdec_setup.h"
54 #include "gromacs/domdec/domdec.h"
55 #include "gromacs/domdec/domdec_struct.h"
56 #include "gromacs/domdec/options.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/math/utilities.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/mdlib/perf_est.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/pbcutil/pbc.h"
66 #include "gromacs/topology/topology.h"
67 #include "gromacs/utility/fatalerror.h"
68 #include "gromacs/utility/logger.h"
69 #include "gromacs/utility/stringutil.h"
72 #include "domdec_internal.h"
75 // TODO remove this when moving domdec into gmx namespace
76 using gmx::DomdecOptions
;
78 /*! \brief Margin for setting up the DD grid */
79 #define DD_GRID_MARGIN_PRES_SCALE 1.05
81 /*! \brief Factorize \p n.
83 * \param[in] n Value to factorize
84 * \param[out] fac Vector of factors (to be allocated in this function)
85 * \param[out] mfac Vector with the number of times each factor repeats in the factorization (to be allocated in this function)
87 static void factorize(int n
,
88 std::vector
<int> *fac
,
89 std::vector
<int> *mfac
)
93 gmx_fatal(FARGS
, "Can only factorize positive integers.");
96 /* Decompose n in factors */
104 if (fac
->empty() || fac
->back() != d
)
119 /*! \brief Find largest divisor of \p n smaller than \p n*/
120 static int largest_divisor(int n
)
122 std::vector
<int> div
;
123 std::vector
<int> mdiv
;
124 factorize(n
, &div
, &mdiv
);
129 /*! \brief Compute largest common divisor of \p n1 and \b n2 */
130 static int lcd(int n1
, int n2
)
135 for (i
= 2; (i
<= n1
&& i
<= n2
); i
++)
137 if (n1
% i
== 0 && n2
% i
== 0)
146 /*! \brief Returns TRUE when there are enough PME ranks for the ratio */
147 static gmx_bool
fits_pme_ratio(int nrank_tot
, int nrank_pme
, float ratio
)
149 return (static_cast<double>(nrank_pme
)/static_cast<double>(nrank_tot
) > 0.95*ratio
);
152 /*! \brief Returns TRUE when npme out of ntot ranks doing PME is expected to give reasonable performance */
153 static gmx_bool
fits_pp_pme_perf(int ntot
, int npme
, float ratio
)
155 std::vector
<int> div
;
156 std::vector
<int> mdiv
;
157 factorize(ntot
- npme
, &div
, &mdiv
);
159 int npp_root3
= gmx::roundToInt(std::cbrt(ntot
- npme
));
160 int npme_root2
= gmx::roundToInt(std::sqrt(static_cast<double>(npme
)));
162 /* The check below gives a reasonable division:
163 * factor 5 allowed at 5 or more PP ranks,
164 * factor 7 allowed at 49 or more PP ranks.
166 if (div
.back() > 3 + npp_root3
)
171 /* Check if the number of PP and PME ranks have a reasonable sized
172 * denominator in common, such that we can use 2D PME decomposition
173 * when required (which requires nx_pp == nx_pme).
174 * The factor of 2 allows for a maximum ratio of 2^2=4
175 * between nx_pme and ny_pme.
177 if (lcd(ntot
- npme
, npme
)*2 < npme_root2
)
182 /* Does this division gives a reasonable PME load? */
183 return fits_pme_ratio(ntot
, npme
, ratio
);
186 /*! \brief Make a guess for the number of PME ranks to use. */
187 static int guess_npme(const gmx::MDLogger
&mdlog
,
188 const gmx_mtop_t
&mtop
,
189 const t_inputrec
&ir
,
196 ratio
= pme_load_estimate(mtop
, ir
, box
);
198 GMX_LOG(mdlog
.info
).appendTextFormatted(
199 "Guess for relative PME load: %.2f", ratio
);
201 /* We assume the optimal rank ratio is close to the load ratio.
202 * The communication load is neglected,
203 * but (hopefully) this will balance out between PP and PME.
206 if (!fits_pme_ratio(nrank_tot
, nrank_tot
/2, ratio
))
208 /* We would need more than nrank_tot/2 PME only nodes,
209 * which is not possible. Since the PME load is very high,
210 * we will not loose much performance when all ranks do PME.
216 /* First try to find npme as a factor of nrank_tot up to nrank_tot/3.
217 * We start with a minimum PME node fraction of 1/16
218 * and avoid ratios which lead to large prime factors in nnodes-npme.
220 npme
= (nrank_tot
+ 15)/16;
221 while (npme
<= nrank_tot
/3)
223 if (nrank_tot
% npme
== 0)
225 /* Note that fits_perf might change the PME grid,
226 * in the current implementation it does not.
228 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
235 if (npme
> nrank_tot
/3)
237 /* Try any possible number for npme */
239 while (npme
<= nrank_tot
/2)
241 /* Note that fits_perf may change the PME grid */
242 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
249 if (npme
> nrank_tot
/2)
251 gmx_fatal(FARGS
, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
252 "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.",
253 ratio
, gmx::roundToInt(0.95*ratio
*nrank_tot
), nrank_tot
/2, ir
.nkx
, ir
.nky
);
257 GMX_LOG(mdlog
.info
).appendTextFormatted(
258 "Will use %d particle-particle and %d PME only ranks\n"
259 "This is a guess, check the performance at the end of the log file",
260 nrank_tot
- npme
, npme
);
266 /*! \brief Return \p n divided by \p f rounded up to the next integer. */
267 static int div_up(int n
, int f
)
269 return (n
+ f
- 1)/f
;
272 real
comm_box_frac(const gmx::IVec
&dd_nc
, real cutoff
, const gmx_ddbox_t
&ddbox
)
278 for (i
= 0; i
< DIM
; i
++)
280 real bt
= ddbox
.box_size
[i
]*ddbox
.skew_fac
[i
];
281 nw
[i
] = dd_nc
[i
]*cutoff
/bt
;
285 for (i
= 0; i
< DIM
; i
++)
290 for (j
= i
+1; j
< DIM
; j
++)
294 comm_vol
+= nw
[i
]*nw
[j
]*M_PI
/4;
295 for (k
= j
+1; k
< DIM
; k
++)
299 comm_vol
+= nw
[i
]*nw
[j
]*nw
[k
]*M_PI
/6;
310 /*! \brief Return whether the DD inhomogeneous in the z direction */
311 static gmx_bool
inhomogeneous_z(const t_inputrec
&ir
)
313 return ((EEL_PME(ir
.coulombtype
) || ir
.coulombtype
== eelEWALD
) &&
314 ir
.ePBC
== epbcXYZ
&& ir
.ewald_geometry
== eewg3DC
);
317 /*! \brief Estimate cost of PME FFT communication
319 * This only takes the communication into account and not imbalance
320 * in the calculation. But the imbalance in communication and calculation
321 * are similar and therefore these formulas also prefer load balance
322 * in the FFT and pme_solve calculation.
324 static float comm_pme_cost_vol(int npme
, int a
, int b
, int c
)
326 /* We use a float here, since an integer might overflow */
331 comm_vol
*= div_up(a
, npme
);
332 comm_vol
*= div_up(b
, npme
);
338 /*! \brief Estimate cost of communication for a possible domain decomposition. */
339 static float comm_cost_est(real limit
, real cutoff
,
340 const matrix box
, const gmx_ddbox_t
&ddbox
,
341 int natoms
, const t_inputrec
&ir
,
343 int npme_tot
, const gmx::IVec
&nc
)
345 gmx::IVec npme
= {1, 1, 1};
346 int i
, j
, nk
, overlap
;
348 float comm_vol
, comm_vol_xf
, comm_pme
, cost_pbcdx
;
349 /* This is the cost of a pbc_dx call relative to the cost
350 * of communicating the coordinate and force of an atom.
351 * This will be machine dependent.
352 * These factors are for x86 with SMP or Infiniband.
354 float pbcdx_rect_fac
= 0.1;
355 float pbcdx_tric_fac
= 0.2;
358 /* Check the DD algorithm restrictions */
359 if ((ir
.ePBC
== epbcXY
&& ir
.nwall
< 2 && nc
[ZZ
] > 1) ||
360 (ir
.ePBC
== epbcSCREW
&& (nc
[XX
] == 1 || nc
[YY
] > 1 || nc
[ZZ
] > 1)))
365 if (inhomogeneous_z(ir
) && nc
[ZZ
] > 1)
370 assert(ddbox
.npbcdim
<= DIM
);
372 /* Check if the triclinic requirements are met */
373 for (i
= 0; i
< DIM
; i
++)
375 for (j
= i
+1; j
< ddbox
.npbcdim
; j
++)
377 if (box
[j
][i
] != 0 || ir
.deform
[j
][i
] != 0 ||
378 (ir
.epc
!= epcNO
&& ir
.compress
[j
][i
] != 0))
380 if (nc
[j
] > 1 && nc
[i
] == 1)
388 for (i
= 0; i
< DIM
; i
++)
390 bt
[i
] = ddbox
.box_size
[i
]*ddbox
.skew_fac
[i
];
392 /* Without PBC and with 2 cells, there are no lower limits on the cell size */
393 if (!(i
>= ddbox
.npbcdim
&& nc
[i
] <= 2) && bt
[i
] < nc
[i
]*limit
)
397 /* With PBC, check if the cut-off fits in nc[i]-1 cells */
398 if (i
< ddbox
.npbcdim
&& nc
[i
] > 1 && (nc
[i
] - 1)*bt
[i
] < nc
[i
]*cutoff
)
406 /* The following choices should match those
407 * in init_domain_decomposition in domdec.c.
409 if (nc
[XX
] == 1 && nc
[YY
] > 1)
414 else if (nc
[YY
] == 1)
421 /* Will we use 1D or 2D PME decomposition? */
422 npme
[XX
] = (npme_tot
% nc
[XX
] == 0) ? nc
[XX
] : npme_tot
;
423 npme
[YY
] = npme_tot
/npme
[XX
];
427 if (EEL_PME(ir
.coulombtype
) || EVDW_PME(ir
.vdwtype
))
429 /* Check the PME grid restrictions.
430 * Currently these can only be invalid here with too few grid lines
431 * along the x dimension per rank doing PME.
433 int npme_x
= (npme_tot
> 1 ? npme
[XX
] : nc
[XX
]);
435 /* Currently we don't have the OpenMP thread count available here.
436 * But with threads we have only tighter restrictions and it's
437 * probably better anyhow to avoid settings where we need to reduce
438 * grid lines over multiple ranks, as the thread check will do.
440 bool useThreads
= true;
441 bool errorsAreFatal
= false;
442 if (!gmx_pme_check_restrictions(ir
.pme_order
, ir
.nkx
, ir
.nky
, ir
.nkz
,
443 npme_x
, useThreads
, errorsAreFatal
))
449 /* When two dimensions are (nearly) equal, use more cells
450 * for the smallest index, so the decomposition does not
451 * depend sensitively on the rounding of the box elements.
453 for (i
= 0; i
< DIM
; i
++)
455 for (j
= i
+1; j
< DIM
; j
++)
457 /* Check if the box size is nearly identical,
458 * in that case we prefer nx > ny and ny > nz.
460 if (std::fabs(bt
[j
] - bt
[i
]) < 0.01*bt
[i
] && nc
[j
] > nc
[i
])
462 /* The XX/YY check is a bit compact. If nc[YY]==npme[YY]
463 * this means the swapped nc has nc[XX]==npme[XX],
464 * and we can also swap X and Y for PME.
466 /* Check if dimension i and j are equivalent for PME.
467 * For x/y: if nc[YY]!=npme[YY], we can not swap x/y
468 * For y/z: we can not have PME decomposition in z
471 !((i
== XX
&& j
== YY
&& nc
[YY
] != npme
[YY
]) ||
472 (i
== YY
&& j
== ZZ
&& npme
[YY
] > 1)))
480 /* This function determines only half of the communication cost.
481 * All PP, PME and PP-PME communication is symmetric
482 * and the "back"-communication cost is identical to the forward cost.
485 comm_vol
= comm_box_frac(nc
, cutoff
, ddbox
);
488 for (i
= 0; i
< 2; i
++)
490 /* Determine the largest volume for PME x/f redistribution */
491 if (nc
[i
] % npme
[i
] != 0)
495 comm_vol_xf
= (npme
[i
] == 2 ? 1.0/3.0 : 0.5);
499 comm_vol_xf
= 1.0 - lcd(nc
[i
], npme
[i
])/static_cast<double>(npme
[i
]);
501 comm_pme
+= 3*natoms
*comm_vol_xf
;
504 /* Grid overlap communication */
507 nk
= (i
== 0 ? ir
.nkx
: ir
.nky
);
508 overlap
= (nk
% npme
[i
] == 0 ? ir
.pme_order
-1 : ir
.pme_order
);
516 /* Old line comm_pme += npme[i]*overlap*ir.nkx*ir.nky*ir.nkz/nk; */
520 comm_pme
+= comm_pme_cost_vol(npme
[YY
], ir
.nky
, ir
.nkz
, ir
.nkx
);
521 comm_pme
+= comm_pme_cost_vol(npme
[XX
], ir
.nkx
, ir
.nky
, ir
.nkz
);
523 /* Add cost of pbc_dx for bondeds */
525 if ((nc
[XX
] == 1 || nc
[YY
] == 1) || (nc
[ZZ
] == 1 && ir
.ePBC
!= epbcXY
))
527 if ((ddbox
.tric_dir
[XX
] && nc
[XX
] == 1) ||
528 (ddbox
.tric_dir
[YY
] && nc
[YY
] == 1))
530 cost_pbcdx
= pbcdxr
*pbcdx_tric_fac
;
534 cost_pbcdx
= pbcdxr
*pbcdx_rect_fac
;
541 "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n",
542 nc
[XX
], nc
[YY
], nc
[ZZ
], npme
[XX
], npme
[YY
],
543 comm_vol
, cost_pbcdx
, comm_pme
/(3*natoms
),
544 comm_vol
+ cost_pbcdx
+ comm_pme
/(3*natoms
));
547 return 3*natoms
*(comm_vol
+ cost_pbcdx
) + comm_pme
;
550 /*! \brief Assign penalty factors to possible domain decompositions,
551 * based on the estimated communication costs. */
552 static void assign_factors(const real limit
, const bool request1D
,
554 const matrix box
, const gmx_ddbox_t
&ddbox
,
555 int natoms
, const t_inputrec
&ir
,
556 float pbcdxr
, int npme
,
557 int ndiv
, const int *div
, const int *mdiv
,
563 gmx::IVec
&ir_try
= *irTryPtr
;
567 const int maxDimensionSize
= std::max(ir_try
[XX
], std::max(ir_try
[YY
], ir_try
[ZZ
]));
568 const int productOfDimensionSizes
= ir_try
[XX
]*ir_try
[YY
]*ir_try
[ZZ
];
569 const bool decompositionHasOneDimension
= (maxDimensionSize
== productOfDimensionSizes
);
570 if (request1D
&& !decompositionHasOneDimension
)
572 /* We requested 1D DD, but got multiple dimensions */
576 ce
= comm_cost_est(limit
, cutoff
, box
, ddbox
,
577 natoms
, ir
, pbcdxr
, npme
, ir_try
);
578 if (ce
>= 0 && ((*opt
)[XX
] == 0 ||
579 ce
< comm_cost_est(limit
, cutoff
, box
, ddbox
,
589 for (x
= mdiv
[0]; x
>= 0; x
--)
591 for (i
= 0; i
< x
; i
++)
593 ir_try
[XX
] *= div
[0];
595 for (y
= mdiv
[0]-x
; y
>= 0; y
--)
597 for (i
= 0; i
< y
; i
++)
599 ir_try
[YY
] *= div
[0];
601 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
603 ir_try
[ZZ
] *= div
[0];
607 assign_factors(limit
, request1D
,
608 cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
,
609 ndiv
-1, div
+1, mdiv
+1, irTryPtr
, opt
);
611 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
613 ir_try
[ZZ
] /= div
[0];
615 for (i
= 0; i
< y
; i
++)
617 ir_try
[YY
] /= div
[0];
620 for (i
= 0; i
< x
; i
++)
622 ir_try
[XX
] /= div
[0];
627 /*! \brief Determine the optimal distribution of DD cells for the
628 * simulation system and number of MPI ranks
630 * \returns The optimal grid cell choice. The latter will contain all
631 * zeros if no valid cell choice exists. */
633 optimizeDDCells(const gmx::MDLogger
&mdlog
,
634 const int numRanksRequested
,
635 const int numPmeOnlyRanks
,
636 const real cellSizeLimit
,
637 const bool request1DAnd1Pulse
,
638 const gmx_mtop_t
&mtop
,
640 const gmx_ddbox_t
&ddbox
,
641 const t_inputrec
&ir
,
642 const DDSystemInfo
&systemInfo
)
646 const int numPPRanks
= numRanksRequested
- numPmeOnlyRanks
;
648 GMX_LOG(mdlog
.info
).appendTextFormatted(
649 "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
650 numPPRanks
, cellSizeLimit
);
651 if (inhomogeneous_z(ir
))
653 GMX_LOG(mdlog
.info
).appendTextFormatted(
654 "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.",
655 eewg_names
[ir
.ewald_geometry
]);
659 // For cost estimates, we need the number of ranks doing PME work,
660 // which is the number of PP ranks when not using separate
662 const int numRanksDoingPmeWork
= (EEL_PME(ir
.coulombtype
) ?
663 ((numPmeOnlyRanks
> 0) ? numPmeOnlyRanks
: numPPRanks
) :
666 if (systemInfo
.haveInterDomainBondeds
)
668 /* If we can skip PBC for distance calculations in plain-C bondeds,
669 * we can save some time (e.g. 3D DD with pbc=xyz).
670 * Here we ignore SIMD bondeds as they always do (fast) PBC.
672 count_bonded_distances(mtop
, ir
, &pbcdxr
, nullptr);
673 pbcdxr
/= static_cast<double>(mtop
.natoms
);
677 /* Every molecule is a single charge group: no pbc required */
681 if (cellSizeLimit
> 0)
683 std::string maximumCells
= "The maximum allowed number of cells is:";
684 for (int d
= 0; d
< DIM
; d
++)
686 int nmax
= static_cast<int>(ddbox
.box_size
[d
]*ddbox
.skew_fac
[d
]/cellSizeLimit
);
687 if (d
>= ddbox
.npbcdim
&& nmax
< 2)
691 if (d
== ZZ
&& inhomogeneous_z(ir
))
695 maximumCells
+= gmx::formatString(" %c %d", 'X' + d
, nmax
);
697 GMX_LOG(mdlog
.info
).appendText(maximumCells
);
702 fprintf(debug
, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr
);
705 /* Decompose numPPRanks in factors */
706 std::vector
<int> div
;
707 std::vector
<int> mdiv
;
708 factorize(numPPRanks
, &div
, &mdiv
);
710 gmx::IVec itry
= { 1, 1, 1 };
711 gmx::IVec numDomains
= { 0, 0, 0 };
712 assign_factors(cellSizeLimit
, request1DAnd1Pulse
,
713 systemInfo
.cutoff
, box
, ddbox
, mtop
.natoms
, ir
, pbcdxr
,
714 numRanksDoingPmeWork
, div
.size(), div
.data(), mdiv
.data(), &itry
, &numDomains
);
720 getDDGridSetupCellSizeLimit(const gmx::MDLogger
&mdlog
,
721 const bool request1DAnd1Pulse
,
722 const bool bDynLoadBal
,
723 const real dlb_scale
,
724 const t_inputrec
&ir
,
725 const real systemInfoCellSizeLimit
)
727 real cellSizeLimit
= systemInfoCellSizeLimit
;
728 if (request1DAnd1Pulse
)
730 cellSizeLimit
= std::max(cellSizeLimit
, ir
.rlist
);
733 /* Add a margin for DLB and/or pressure scaling */
736 if (dlb_scale
>= 1.0)
738 gmx_fatal(FARGS
, "The value for option -dds should be smaller than 1");
740 GMX_LOG(mdlog
.info
).appendTextFormatted(
741 "Scaling the initial minimum size with 1/%g (option -dds) = %g",
742 dlb_scale
, 1/dlb_scale
);
743 cellSizeLimit
/= dlb_scale
;
745 else if (ir
.epc
!= epcNO
)
747 GMX_LOG(mdlog
.info
).appendTextFormatted(
748 "To account for pressure scaling, scaling the initial minimum size with %g",
749 DD_GRID_MARGIN_PRES_SCALE
);
750 cellSizeLimit
*= DD_GRID_MARGIN_PRES_SCALE
;
753 return cellSizeLimit
;
756 checkForValidRankCountRequests(const int numRanksRequested
,
758 const int numPmeRanksRequested
)
760 int numPPRanksRequested
= numRanksRequested
;
763 numPPRanksRequested
-= numPmeRanksRequested
;
764 if (numPmeRanksRequested
> numPPRanksRequested
)
767 "Cannot have %d separate PME ranks with only %d PP ranks, choose fewer or no separate PME ranks",
768 numPmeRanksRequested
, numPPRanksRequested
);
772 // Once the rank count is large enough, it becomes worth
773 // suggesting improvements to the user.
774 const int minPPRankCountToCheckForLargePrimeFactors
= 13;
775 if (numPPRanksRequested
>= minPPRankCountToCheckForLargePrimeFactors
)
777 const int largestDivisor
= largest_divisor(numPPRanksRequested
);
778 /* Check if the largest divisor is more than numPPRanks ^ (2/3) */
779 if (largestDivisor
*largestDivisor
*largestDivisor
>
780 numPPRanksRequested
*numPPRanksRequested
)
782 gmx_fatal(FARGS
, "The number of ranks selected for particle-particle work (%d) "
783 "contains a large prime factor %d. In most cases this will lead to "
784 "bad performance. Choose a number with smaller prime factors or "
785 "set the decomposition (option -dd) manually.",
786 numPPRanksRequested
, largestDivisor
);
791 /*! \brief Return the number of PME-only ranks used by the simulation
793 * If the user did not choose a number, then decide for them. */
795 getNumPmeOnlyRanksToUse(const gmx::MDLogger
&mdlog
,
796 const DomdecOptions
&options
,
797 const gmx_mtop_t
&mtop
,
798 const t_inputrec
&ir
,
800 const int numRanksRequested
)
803 const char *extraMessage
= "";
805 if (options
.numCells
[XX
] > 0)
807 if (options
.numPmeRanks
>= 0)
809 // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
810 numPmeOnlyRanks
= options
.numPmeRanks
;
814 // When the DD grid is set explicitly and -npme is set to auto,
815 // don't use PME ranks. We check later if the DD grid is
816 // compatible with the total number of ranks.
822 if (!EEL_PME(ir
.coulombtype
))
824 // TODO mdrun should give a fatal error with a non-PME input file and -npme > 0
829 if (options
.numPmeRanks
>= 0)
831 numPmeOnlyRanks
= options
.numPmeRanks
;
835 // Controls the automated choice of when to use separate PME-only ranks.
836 const int minRankCountToDefaultToSeparatePmeRanks
= 19;
838 if (numRanksRequested
< minRankCountToDefaultToSeparatePmeRanks
)
841 extraMessage
= ", as there are too few total\n"
842 " ranks for efficient splitting";
846 numPmeOnlyRanks
= guess_npme(mdlog
, mtop
, ir
, box
, numRanksRequested
);
847 extraMessage
= ", as guessed by mdrun";
853 GMX_RELEASE_ASSERT(numPmeOnlyRanks
<= numRanksRequested
,
854 "Cannot have more PME ranks than total ranks");
855 if (EEL_PME(ir
.coulombtype
))
857 GMX_LOG(mdlog
.info
).appendTextFormatted
858 ("Using %d separate PME ranks%s", numPmeOnlyRanks
, extraMessage
);
861 return numPmeOnlyRanks
;
864 /*! \brief Sets the order of the DD dimensions, returns the number of DD dimensions */
865 static int set_dd_dim(const gmx::IVec
&numDDCells
,
866 const DDSettings
&ddSettings
,
870 if (ddSettings
.useDDOrderZYX
)
872 /* Decomposition order z,y,x */
873 for (int dim
= DIM
- 1; dim
>= 0; dim
--)
875 if (numDDCells
[dim
] > 1)
877 (*dims
)[ndim
++] = dim
;
883 /* Decomposition order x,y,z */
884 for (int dim
= 0; dim
< DIM
; dim
++)
886 if (numDDCells
[dim
] > 1)
888 (*dims
)[ndim
++] = dim
;
895 /* Set dim[0] to avoid extra checks on ndim in several places */
903 getDDGridSetup(const gmx::MDLogger
&mdlog
,
905 const int numRanksRequested
,
906 const DomdecOptions
&options
,
907 const DDSettings
&ddSettings
,
908 const DDSystemInfo
&systemInfo
,
909 const real cellSizeLimit
,
910 const gmx_mtop_t
&mtop
,
911 const t_inputrec
&ir
,
913 gmx::ArrayRef
<const gmx::RVec
> xGlobal
,
916 int numPmeOnlyRanks
= getNumPmeOnlyRanksToUse(mdlog
, options
, mtop
, ir
, box
, numRanksRequested
);
918 if (ddSettings
.request1DAnd1Pulse
&&
919 (numRanksRequested
- numPmeOnlyRanks
== 1))
921 // With only one PP rank, there will not be a need for
922 // GPU-based halo exchange that wants to request that any DD
923 // has only 1 dimension and 1 pulse.
924 return DDGridSetup
{};
927 gmx::IVec numDomains
;
928 if (options
.numCells
[XX
] > 0)
930 numDomains
= gmx::IVec(options
.numCells
);
931 const ivec numDomainsLegacyIvec
= { numDomains
[XX
], numDomains
[YY
], numDomains
[ZZ
] };
932 set_ddbox_cr(*cr
, &numDomainsLegacyIvec
, ir
, box
, xGlobal
, ddbox
);
936 set_ddbox_cr(*cr
, nullptr, ir
, box
, xGlobal
, ddbox
);
941 optimizeDDCells(mdlog
, numRanksRequested
, numPmeOnlyRanks
,
943 ddSettings
.request1DAnd1Pulse
,
944 mtop
, box
, *ddbox
, ir
,
949 /* Communicate the information set by the master to all ranks */
950 gmx_bcast(sizeof(numDomains
), numDomains
, cr
);
951 if (EEL_PME(ir
.coulombtype
))
953 gmx_bcast(sizeof(numPmeOnlyRanks
), &numPmeOnlyRanks
, cr
);
956 DDGridSetup ddGridSetup
;
957 ddGridSetup
.numPmeOnlyRanks
= numPmeOnlyRanks
;
958 ddGridSetup
.numDomains
[XX
] = numDomains
[XX
];
959 ddGridSetup
.numDomains
[YY
] = numDomains
[YY
];
960 ddGridSetup
.numDomains
[ZZ
] = numDomains
[ZZ
];
961 ddGridSetup
.numDDDimensions
= set_dd_dim(numDomains
, ddSettings
, &ddGridSetup
.ddDimensions
);