2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines functions used by the domdec module
39 * in its initial setup phase.
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_domdec
52 #include "gromacs/domdec/domdec.h"
53 #include "gromacs/legacyheaders/names.h"
54 #include "gromacs/legacyheaders/network.h"
55 #include "gromacs/legacyheaders/perf_est.h"
56 #include "gromacs/legacyheaders/typedefs.h"
57 #include "gromacs/legacyheaders/types/commrec.h"
58 #include "gromacs/math/vec.h"
59 #include "gromacs/utility/smalloc.h"
61 /*! \brief Margin for setting up the DD grid */
62 #define DD_GRID_MARGIN_PRES_SCALE 1.05
64 /*! \brief Factorize \p n.
66 * \param[in] n Value to factorize
67 * \param[out] fac Pointer to array of factors (to be allocated in this function)
68 * \param[out] mfac Pointer to array of the number of times each factor repeats in the factorization (to be allocated in this function)
69 * \return The number of unique factors
71 static int factorize(int n
, int **fac
, int **mfac
)
77 gmx_fatal(FARGS
, "Can only factorize positive integers.");
80 /* Decompose n in factors */
89 if (ndiv
== 0 || (*fac
)[ndiv
-1] != d
)
103 /*! \brief Find largest divisor of \p n smaller than \p n*/
104 static gmx_bool
largest_divisor(int n
)
106 int ndiv
, *div
, *mdiv
, ldiv
;
108 ndiv
= factorize(n
, &div
, &mdiv
);
116 /*! \brief Compute largest common divisor of \p n1 and \b n2 */
117 static int lcd(int n1
, int n2
)
122 for (i
= 2; (i
<= n1
&& i
<= n2
); i
++)
124 if (n1
% i
== 0 && n2
% i
== 0)
133 /*! \brief Returns TRUE when there are enough PME ranks for the ratio */
134 static gmx_bool
fits_pme_ratio(int nrank_tot
, int nrank_pme
, float ratio
)
136 return ((double)nrank_pme
/(double)nrank_tot
> 0.95*ratio
);
139 /*! \brief Returns TRUE when npme out of ntot ranks doing PME is expected to give reasonable performance */
140 static gmx_bool
fits_pp_pme_perf(int ntot
, int npme
, float ratio
)
142 int ndiv
, *div
, *mdiv
, ldiv
;
143 int npp_root3
, npme_root2
;
145 ndiv
= factorize(ntot
- npme
, &div
, &mdiv
);
150 npp_root3
= static_cast<int>(std::pow(ntot
- npme
, 1.0/3.0) + 0.5);
151 npme_root2
= static_cast<int>(std::sqrt(static_cast<double>(npme
)) + 0.5);
153 /* The check below gives a reasonable division:
154 * factor 5 allowed at 5 or more PP ranks,
155 * factor 7 allowed at 49 or more PP ranks.
157 if (ldiv
> 3 + npp_root3
)
162 /* Check if the number of PP and PME ranks have a reasonable sized
163 * denominator in common, such that we can use 2D PME decomposition
164 * when required (which requires nx_pp == nx_pme).
165 * The factor of 2 allows for a maximum ratio of 2^2=4
166 * between nx_pme and ny_pme.
168 if (lcd(ntot
- npme
, npme
)*2 < npme_root2
)
173 /* Does this division gives a reasonable PME load? */
174 return fits_pme_ratio(ntot
, npme
, ratio
);
177 /*! \brief Make a guess for the number of PME ranks to use. */
178 static int guess_npme(FILE *fplog
, gmx_mtop_t
*mtop
, t_inputrec
*ir
, matrix box
,
184 ratio
= pme_load_estimate(mtop
, ir
, box
);
188 fprintf(fplog
, "Guess for relative PME load: %.2f\n", ratio
);
191 /* We assume the optimal rank ratio is close to the load ratio.
192 * The communication load is neglected,
193 * but (hopefully) this will balance out between PP and PME.
196 if (!fits_pme_ratio(nrank_tot
, nrank_tot
/2, ratio
))
198 /* We would need more than nrank_tot/2 PME only nodes,
199 * which is not possible. Since the PME load is very high,
200 * we will not loose much performance when all ranks do PME.
206 /* First try to find npme as a factor of nrank_tot up to nrank_tot/3.
207 * We start with a minimum PME node fraction of 1/16
208 * and avoid ratios which lead to large prime factors in nnodes-npme.
210 npme
= (nrank_tot
+ 15)/16;
211 while (npme
<= nrank_tot
/3)
213 if (nrank_tot
% npme
== 0)
215 /* Note that fits_perf might change the PME grid,
216 * in the current implementation it does not.
218 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
225 if (npme
> nrank_tot
/3)
227 /* Try any possible number for npme */
229 while (npme
<= nrank_tot
/2)
231 /* Note that fits_perf may change the PME grid */
232 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
239 if (npme
> nrank_tot
/2)
241 gmx_fatal(FARGS
, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
242 "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.",
243 ratio
, (int)(0.95*ratio
*nrank_tot
+ 0.5), nrank_tot
/2, ir
->nkx
, ir
->nky
);
244 /* Keep the compiler happy */
252 "Will use %d particle-particle and %d PME only ranks\n"
253 "This is a guess, check the performance at the end of the log file\n",
254 nrank_tot
- npme
, npme
);
257 "Will use %d particle-particle and %d PME only ranks\n"
258 "This is a guess, check the performance at the end of the log file\n",
259 nrank_tot
- npme
, npme
);
265 /*! \brief Return \p n divided by \p f rounded up to the next integer. */
266 static int div_up(int n
, int f
)
268 return (n
+ f
- 1)/f
;
271 real
comm_box_frac(ivec dd_nc
, real cutoff
, gmx_ddbox_t
*ddbox
)
277 for (i
= 0; i
< DIM
; i
++)
279 real bt
= ddbox
->box_size
[i
]*ddbox
->skew_fac
[i
];
280 nw
[i
] = dd_nc
[i
]*cutoff
/bt
;
284 for (i
= 0; i
< DIM
; i
++)
289 for (j
= i
+1; j
< DIM
; j
++)
293 comm_vol
+= nw
[i
]*nw
[j
]*M_PI
/4;
294 for (k
= j
+1; k
< DIM
; k
++)
298 comm_vol
+= nw
[i
]*nw
[j
]*nw
[k
]*M_PI
/6;
309 /*! \brief Return whether the DD inhomogeneous in the z direction */
310 static gmx_bool
inhomogeneous_z(const t_inputrec
*ir
)
312 return ((EEL_PME(ir
->coulombtype
) || ir
->coulombtype
== eelEWALD
) &&
313 ir
->ePBC
== epbcXYZ
&& ir
->ewald_geometry
== eewg3DC
);
316 /*! \brief Estimate cost of PME FFT communication
318 * This only takes the communication into account and not imbalance
319 * in the calculation. But the imbalance in communication and calculation
320 * are similar and therefore these formulas also prefer load balance
321 * in the FFT and pme_solve calculation.
323 static float comm_pme_cost_vol(int npme
, int a
, int b
, int c
)
325 /* We use a float here, since an integer might overflow */
330 comm_vol
*= div_up(a
, npme
);
331 comm_vol
*= div_up(b
, npme
);
337 /*! \brief Estimate cost of communication for a possible domain decomposition. */
338 static float comm_cost_est(real limit
, real cutoff
,
339 matrix box
, gmx_ddbox_t
*ddbox
,
340 int natoms
, t_inputrec
*ir
,
342 int npme_tot
, ivec nc
)
344 ivec npme
= {1, 1, 1};
345 int i
, j
, nk
, overlap
;
347 float comm_vol
, comm_vol_xf
, comm_pme
, cost_pbcdx
;
348 /* This is the cost of a pbc_dx call relative to the cost
349 * of communicating the coordinate and force of an atom.
350 * This will be machine dependent.
351 * These factors are for x86 with SMP or Infiniband.
353 float pbcdx_rect_fac
= 0.1;
354 float pbcdx_tric_fac
= 0.2;
357 /* Check the DD algorithm restrictions */
358 if ((ir
->ePBC
== epbcXY
&& ir
->nwall
< 2 && nc
[ZZ
] > 1) ||
359 (ir
->ePBC
== epbcSCREW
&& (nc
[XX
] == 1 || nc
[YY
] > 1 || nc
[ZZ
] > 1)))
364 if (inhomogeneous_z(ir
) && nc
[ZZ
] > 1)
369 assert(ddbox
->npbcdim
<= DIM
);
371 /* Check if the triclinic requirements are met */
372 for (i
= 0; i
< DIM
; i
++)
374 for (j
= i
+1; j
< ddbox
->npbcdim
; j
++)
376 if (box
[j
][i
] != 0 || ir
->deform
[j
][i
] != 0 ||
377 (ir
->epc
!= epcNO
&& ir
->compress
[j
][i
] != 0))
379 if (nc
[j
] > 1 && nc
[i
] == 1)
387 for (i
= 0; i
< DIM
; i
++)
389 bt
[i
] = ddbox
->box_size
[i
]*ddbox
->skew_fac
[i
];
391 /* Without PBC and with 2 cells, there are no lower limits on the cell size */
392 if (!(i
>= ddbox
->npbcdim
&& nc
[i
] <= 2) && bt
[i
] < nc
[i
]*limit
)
396 /* With PBC, check if the cut-off fits in nc[i]-1 cells */
397 if (i
< ddbox
->npbcdim
&& nc
[i
] > 1 && (nc
[i
] - 1)*bt
[i
] < nc
[i
]*cutoff
)
405 /* The following choices should match those
406 * in init_domain_decomposition in domdec.c.
408 if (nc
[XX
] == 1 && nc
[YY
] > 1)
413 else if (nc
[YY
] == 1)
420 /* Will we use 1D or 2D PME decomposition? */
421 npme
[XX
] = (npme_tot
% nc
[XX
] == 0) ? nc
[XX
] : npme_tot
;
422 npme
[YY
] = npme_tot
/npme
[XX
];
426 /* When two dimensions are (nearly) equal, use more cells
427 * for the smallest index, so the decomposition does not
428 * depend sensitively on the rounding of the box elements.
430 for (i
= 0; i
< DIM
; i
++)
432 for (j
= i
+1; j
< DIM
; j
++)
434 /* Check if the box size is nearly identical,
435 * in that case we prefer nx > ny and ny > nz.
437 if (fabs(bt
[j
] - bt
[i
]) < 0.01*bt
[i
] && nc
[j
] > nc
[i
])
439 /* The XX/YY check is a bit compact. If nc[YY]==npme[YY]
440 * this means the swapped nc has nc[XX]==npme[XX],
441 * and we can also swap X and Y for PME.
443 /* Check if dimension i and j are equivalent for PME.
444 * For x/y: if nc[YY]!=npme[YY], we can not swap x/y
445 * For y/z: we can not have PME decomposition in z
448 !((i
== XX
&& j
== YY
&& nc
[YY
] != npme
[YY
]) ||
449 (i
== YY
&& j
== ZZ
&& npme
[YY
] > 1)))
457 /* This function determines only half of the communication cost.
458 * All PP, PME and PP-PME communication is symmetric
459 * and the "back"-communication cost is identical to the forward cost.
462 comm_vol
= comm_box_frac(nc
, cutoff
, ddbox
);
465 for (i
= 0; i
< 2; i
++)
467 /* Determine the largest volume for PME x/f redistribution */
468 if (nc
[i
] % npme
[i
] != 0)
472 comm_vol_xf
= (npme
[i
] == 2 ? 1.0/3.0 : 0.5);
476 comm_vol_xf
= 1.0 - lcd(nc
[i
], npme
[i
])/(double)npme
[i
];
478 comm_pme
+= 3*natoms
*comm_vol_xf
;
481 /* Grid overlap communication */
484 nk
= (i
== 0 ? ir
->nkx
: ir
->nky
);
485 overlap
= (nk
% npme
[i
] == 0 ? ir
->pme_order
-1 : ir
->pme_order
);
493 /* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */
497 comm_pme
+= comm_pme_cost_vol(npme
[YY
], ir
->nky
, ir
->nkz
, ir
->nkx
);
498 comm_pme
+= comm_pme_cost_vol(npme
[XX
], ir
->nkx
, ir
->nky
, ir
->nkz
);
500 /* Add cost of pbc_dx for bondeds */
502 if ((nc
[XX
] == 1 || nc
[YY
] == 1) || (nc
[ZZ
] == 1 && ir
->ePBC
!= epbcXY
))
504 if ((ddbox
->tric_dir
[XX
] && nc
[XX
] == 1) ||
505 (ddbox
->tric_dir
[YY
] && nc
[YY
] == 1))
507 cost_pbcdx
= pbcdxr
*pbcdx_tric_fac
;
511 cost_pbcdx
= pbcdxr
*pbcdx_rect_fac
;
518 "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n",
519 nc
[XX
], nc
[YY
], nc
[ZZ
], npme
[XX
], npme
[YY
],
520 comm_vol
, cost_pbcdx
, comm_pme
/(3*natoms
),
521 comm_vol
+ cost_pbcdx
+ comm_pme
/(3*natoms
));
524 return 3*natoms
*(comm_vol
+ cost_pbcdx
) + comm_pme
;
527 /*! \brief Assign penalty factors to possible domain decompositions, based on the estimated communication costs. */
528 static void assign_factors(gmx_domdec_t
*dd
,
529 real limit
, real cutoff
,
530 matrix box
, gmx_ddbox_t
*ddbox
,
531 int natoms
, t_inputrec
*ir
,
532 float pbcdxr
, int npme
,
533 int ndiv
, int *div
, int *mdiv
, ivec ir_try
, ivec opt
)
540 ce
= comm_cost_est(limit
, cutoff
, box
, ddbox
,
541 natoms
, ir
, pbcdxr
, npme
, ir_try
);
542 if (ce
>= 0 && (opt
[XX
] == 0 ||
543 ce
< comm_cost_est(limit
, cutoff
, box
, ddbox
,
547 copy_ivec(ir_try
, opt
);
553 for (x
= mdiv
[0]; x
>= 0; x
--)
555 for (i
= 0; i
< x
; i
++)
557 ir_try
[XX
] *= div
[0];
559 for (y
= mdiv
[0]-x
; y
>= 0; y
--)
561 for (i
= 0; i
< y
; i
++)
563 ir_try
[YY
] *= div
[0];
565 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
567 ir_try
[ZZ
] *= div
[0];
571 assign_factors(dd
, limit
, cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
,
572 ndiv
-1, div
+1, mdiv
+1, ir_try
, opt
);
574 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
576 ir_try
[ZZ
] /= div
[0];
578 for (i
= 0; i
< y
; i
++)
580 ir_try
[YY
] /= div
[0];
583 for (i
= 0; i
< x
; i
++)
585 ir_try
[XX
] /= div
[0];
590 /*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */
591 static real
optimize_ncells(FILE *fplog
,
592 int nnodes_tot
, int npme_only
,
593 gmx_bool bDynLoadBal
, real dlb_scale
,
594 gmx_mtop_t
*mtop
, matrix box
, gmx_ddbox_t
*ddbox
,
597 real cellsize_limit
, real cutoff
,
598 gmx_bool bInterCGBondeds
,
601 int npp
, npme
, ndiv
, *div
, *mdiv
, d
, nmax
;
606 limit
= cellsize_limit
;
612 npp
= nnodes_tot
- npme_only
;
613 if (EEL_PME(ir
->coulombtype
))
615 npme
= (npme_only
> 0 ? npme_only
: npp
);
624 /* If we can skip PBC for distance calculations in plain-C bondeds,
625 * we can save some time (e.g. 3D DD with pbc=xyz).
626 * Here we ignore SIMD bondeds as they always do (fast) PBC.
628 count_bonded_distances(mtop
, ir
, &pbcdxr
, NULL
);
629 pbcdxr
/= (double)mtop
->natoms
;
633 /* Every molecule is a single charge group: no pbc required */
636 /* Add a margin for DLB and/or pressure scaling */
639 if (dlb_scale
>= 1.0)
641 gmx_fatal(FARGS
, "The value for option -dds should be smaller than 1");
645 fprintf(fplog
, "Scaling the initial minimum size with 1/%g (option -dds) = %g\n", dlb_scale
, 1/dlb_scale
);
649 else if (ir
->epc
!= epcNO
)
653 fprintf(fplog
, "To account for pressure scaling, scaling the initial minimum size with %g\n", DD_GRID_MARGIN_PRES_SCALE
);
654 limit
*= DD_GRID_MARGIN_PRES_SCALE
;
660 fprintf(fplog
, "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n", npp
, limit
);
662 if (inhomogeneous_z(ir
))
664 fprintf(fplog
, "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.\n", eewg_names
[ir
->ewald_geometry
]);
669 fprintf(fplog
, "The maximum allowed number of cells is:");
670 for (d
= 0; d
< DIM
; d
++)
672 nmax
= (int)(ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/limit
);
673 if (d
>= ddbox
->npbcdim
&& nmax
< 2)
677 if (d
== ZZ
&& inhomogeneous_z(ir
))
681 fprintf(fplog
, " %c %d", 'X' + d
, nmax
);
683 fprintf(fplog
, "\n");
689 fprintf(debug
, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr
);
692 /* Decompose npp in factors */
693 ndiv
= factorize(npp
, &div
, &mdiv
);
699 assign_factors(dd
, limit
, cutoff
, box
, ddbox
, mtop
->natoms
, ir
, pbcdxr
,
700 npme
, ndiv
, div
, mdiv
, itry
, nc
);
708 real
dd_choose_grid(FILE *fplog
,
709 t_commrec
*cr
, gmx_domdec_t
*dd
, t_inputrec
*ir
,
710 gmx_mtop_t
*mtop
, matrix box
, gmx_ddbox_t
*ddbox
,
711 gmx_bool bDynLoadBal
, real dlb_scale
,
712 real cellsize_limit
, real cutoff_dd
,
713 gmx_bool bInterCGBondeds
)
715 gmx_int64_t nnodes_div
, ldiv
;
720 nnodes_div
= cr
->nnodes
;
721 if (EEL_PME(ir
->coulombtype
))
723 if (cr
->npmenodes
> 0)
725 if (cr
->npmenodes
>= cr
->nnodes
)
728 "Cannot have %d separate PME ranks with just %d total ranks",
729 cr
->npmenodes
, cr
->nnodes
);
732 /* If the user purposely selected the number of PME nodes,
733 * only check for large primes in the PP node count.
735 nnodes_div
-= cr
->npmenodes
;
745 ldiv
= largest_divisor(nnodes_div
);
746 /* Check if the largest divisor is more than nnodes^2/3 */
747 if (ldiv
*ldiv
*ldiv
> nnodes_div
*nnodes_div
)
749 gmx_fatal(FARGS
, "The number of ranks you selected (%d) contains a large prime factor %d. In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
754 if (EEL_PME(ir
->coulombtype
))
756 if (cr
->npmenodes
< 0)
758 /* Use PME nodes when the number of nodes is more than 16 */
759 if (cr
->nnodes
<= 18)
764 fprintf(fplog
, "Using %d separate PME ranks, as there are too few total\n ranks for efficient splitting\n", cr
->npmenodes
);
769 cr
->npmenodes
= guess_npme(fplog
, mtop
, ir
, box
, cr
->nnodes
);
772 fprintf(fplog
, "Using %d separate PME ranks, as guessed by mdrun\n", cr
->npmenodes
);
780 fprintf(fplog
, "Using %d separate PME ranks, per user request\n", cr
->npmenodes
);
785 limit
= optimize_ncells(fplog
, cr
->nnodes
, cr
->npmenodes
,
786 bDynLoadBal
, dlb_scale
,
787 mtop
, box
, ddbox
, ir
, dd
,
788 cellsize_limit
, cutoff_dd
,
796 /* Communicate the information set by the master to all nodes */
797 gmx_bcast(sizeof(dd
->nc
), dd
->nc
, cr
);
798 if (EEL_PME(ir
->coulombtype
))
800 gmx_bcast(sizeof(ir
->nkx
), &ir
->nkx
, cr
);
801 gmx_bcast(sizeof(ir
->nky
), &ir
->nky
, cr
);
802 gmx_bcast(sizeof(cr
->npmenodes
), &cr
->npmenodes
, cr
);