2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "types/commrec.h"
47 #include "gromacs/utility/smalloc.h"
52 /* Margin for setting up the DD grid */
53 #define DD_GRID_MARGIN_PRES_SCALE 1.05
55 static int factorize(int n
, int **fac
, int **mfac
)
61 gmx_fatal(FARGS
, "Can only factorize positive integers.");
64 /* Decompose n in factors */
73 if (ndiv
== 0 || (*fac
)[ndiv
-1] != d
)
87 static gmx_bool
largest_divisor(int n
)
89 int ndiv
, *div
, *mdiv
, ldiv
;
91 ndiv
= factorize(n
, &div
, &mdiv
);
99 static int lcd(int n1
, int n2
)
104 for (i
= 2; (i
<= n1
&& i
<= n2
); i
++)
106 if (n1
% i
== 0 && n2
% i
== 0)
115 static gmx_bool
fits_pme_ratio(int nnodes
, int npme
, float ratio
)
117 return ((double)npme
/(double)nnodes
> 0.95*ratio
);
120 static gmx_bool
fits_pp_pme_perf(int nnodes
, int npme
, float ratio
)
122 int ndiv
, *div
, *mdiv
, ldiv
;
123 int npp_root3
, npme_root2
;
125 ndiv
= factorize(nnodes
-npme
, &div
, &mdiv
);
130 npp_root3
= (int)(pow(nnodes
-npme
, 1.0/3.0) + 0.5);
131 npme_root2
= (int)(sqrt(npme
) + 0.5);
133 /* The check below gives a reasonable division:
134 * factor 5 allowed at 5 or more PP nodes,
135 * factor 7 allowed at 49 or more PP nodes.
137 if (ldiv
> 3 + npp_root3
)
142 /* Check if the number of PP and PME nodes have a reasonable sized
143 * denominator in common, such that we can use 2D PME decomposition
144 * when required (which requires nx_pp == nx_pme).
145 * The factor of 2 allows for a maximum ratio of 2^2=4
146 * between nx_pme and ny_pme.
148 if (lcd(nnodes
-npme
, npme
)*2 < npme_root2
)
153 /* Does this division gives a reasonable PME load? */
154 return fits_pme_ratio(nnodes
, npme
, ratio
);
157 static int guess_npme(FILE *fplog
, gmx_mtop_t
*mtop
, t_inputrec
*ir
, matrix box
,
164 ratio
= pme_load_estimate(mtop
, ir
, box
);
168 fprintf(fplog
, "Guess for relative PME load: %.2f\n", ratio
);
171 /* We assume the optimal node ratio is close to the load ratio.
172 * The communication load is neglected,
173 * but (hopefully) this will balance out between PP and PME.
176 if (!fits_pme_ratio(nnodes
, nnodes
/2, ratio
))
178 /* We would need more than nnodes/2 PME only nodes,
179 * which is not possible. Since the PME load is very high,
180 * we will not loose much performance when all nodes do PME.
186 /* First try to find npme as a factor of nnodes up to nnodes/3.
187 * We start with a minimum PME node fraction of 1/16
188 * and avoid ratios which lead to large prime factors in nnodes-npme.
190 npme
= (nnodes
+ 15)/16;
191 while (npme
<= nnodes
/3)
193 if (nnodes
% npme
== 0)
195 /* Note that fits_perf might change the PME grid,
196 * in the current implementation it does not.
198 if (fits_pp_pme_perf(nnodes
, npme
, ratio
))
207 /* Try any possible number for npme */
209 while (npme
<= nnodes
/2)
211 /* Note that fits_perf may change the PME grid */
212 if (fits_pp_pme_perf(nnodes
, npme
, ratio
))
221 gmx_fatal(FARGS
, "Could not find an appropriate number of separate PME nodes. i.e. >= %5f*#nodes (%d) and <= #nodes/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
222 "Use the -npme option of mdrun or change the number of processors or the PME grid dimensions, see the manual for details.",
223 ratio
, (int)(0.95*ratio
*nnodes
+0.5), nnodes
/2, ir
->nkx
, ir
->nky
);
224 /* Keep the compiler happy */
232 "Will use %d particle-particle and %d PME only nodes\n"
233 "This is a guess, check the performance at the end of the log file\n",
237 "Will use %d particle-particle and %d PME only nodes\n"
238 "This is a guess, check the performance at the end of the log file\n",
245 static int div_up(int n
, int f
)
247 return (n
+ f
- 1)/f
;
250 real
comm_box_frac(ivec dd_nc
, real cutoff
, gmx_ddbox_t
*ddbox
)
256 for (i
= 0; i
< DIM
; i
++)
258 bt
[i
] = ddbox
->box_size
[i
]*ddbox
->skew_fac
[i
];
259 nw
[i
] = dd_nc
[i
]*cutoff
/bt
[i
];
264 for (i
= 0; i
< DIM
; i
++)
270 for (j
= i
+1; j
< DIM
; j
++)
274 comm_vol
+= nw
[i
]*nw
[j
]*M_PI
/4;
275 for (k
= j
+1; k
< DIM
; k
++)
279 comm_vol
+= nw
[i
]*nw
[j
]*nw
[k
]*M_PI
/6;
290 static gmx_bool
inhomogeneous_z(const t_inputrec
*ir
)
292 return ((EEL_PME(ir
->coulombtype
) || ir
->coulombtype
== eelEWALD
) &&
293 ir
->ePBC
== epbcXYZ
&& ir
->ewald_geometry
== eewg3DC
);
296 /* Avoid integer overflows */
297 static float comm_pme_cost_vol(int npme
, int a
, int b
, int c
)
303 comm_vol
*= div_up(a
, npme
);
304 comm_vol
*= div_up(b
, npme
);
309 static float comm_cost_est(real limit
, real cutoff
,
310 matrix box
, gmx_ddbox_t
*ddbox
,
311 int natoms
, t_inputrec
*ir
,
313 int npme_tot
, ivec nc
)
315 ivec npme
= {1, 1, 1};
316 int i
, j
, k
, nk
, overlap
;
318 float comm_vol
, comm_vol_xf
, comm_pme
, cost_pbcdx
;
319 /* This is the cost of a pbc_dx call relative to the cost
320 * of communicating the coordinate and force of an atom.
321 * This will be machine dependent.
322 * These factors are for x86 with SMP or Infiniband.
324 float pbcdx_rect_fac
= 0.1;
325 float pbcdx_tric_fac
= 0.2;
328 /* Check the DD algorithm restrictions */
329 if ((ir
->ePBC
== epbcXY
&& ir
->nwall
< 2 && nc
[ZZ
] > 1) ||
330 (ir
->ePBC
== epbcSCREW
&& (nc
[XX
] == 1 || nc
[YY
] > 1 || nc
[ZZ
] > 1)))
335 if (inhomogeneous_z(ir
) && nc
[ZZ
] > 1)
340 assert(ddbox
->npbcdim
<= DIM
);
342 /* Check if the triclinic requirements are met */
343 for (i
= 0; i
< DIM
; i
++)
345 for (j
= i
+1; j
< ddbox
->npbcdim
; j
++)
347 if (box
[j
][i
] != 0 || ir
->deform
[j
][i
] != 0 ||
348 (ir
->epc
!= epcNO
&& ir
->compress
[j
][i
] != 0))
350 if (nc
[j
] > 1 && nc
[i
] == 1)
358 for (i
= 0; i
< DIM
; i
++)
360 bt
[i
] = ddbox
->box_size
[i
]*ddbox
->skew_fac
[i
];
362 /* Without PBC and with 2 cells, there are no lower limits on the cell size */
363 if (!(i
>= ddbox
->npbcdim
&& nc
[i
] <= 2) && bt
[i
] < nc
[i
]*limit
)
367 /* With PBC, check if the cut-off fits in nc[i]-1 cells */
368 if (i
< ddbox
->npbcdim
&& nc
[i
] > 1 && (nc
[i
] - 1)*bt
[i
] < nc
[i
]*cutoff
)
376 /* The following choices should match those
377 * in init_domain_decomposition in domdec.c.
379 if (nc
[XX
] == 1 && nc
[YY
] > 1)
384 else if (nc
[YY
] == 1)
391 /* Will we use 1D or 2D PME decomposition? */
392 npme
[XX
] = (npme_tot
% nc
[XX
] == 0) ? nc
[XX
] : npme_tot
;
393 npme
[YY
] = npme_tot
/npme
[XX
];
397 /* When two dimensions are (nearly) equal, use more cells
398 * for the smallest index, so the decomposition does not
399 * depend sensitively on the rounding of the box elements.
401 for (i
= 0; i
< DIM
; i
++)
403 for (j
= i
+1; j
< DIM
; j
++)
405 /* Check if the box size is nearly identical,
406 * in that case we prefer nx > ny and ny > nz.
408 if (fabs(bt
[j
] - bt
[i
]) < 0.01*bt
[i
] && nc
[j
] > nc
[i
])
410 /* The XX/YY check is a bit compact. If nc[YY]==npme[YY]
411 * this means the swapped nc has nc[XX]==npme[XX],
412 * and we can also swap X and Y for PME.
414 /* Check if dimension i and j are equivalent for PME.
415 * For x/y: if nc[YY]!=npme[YY], we can not swap x/y
416 * For y/z: we can not have PME decomposition in z
419 !((i
== XX
&& j
== YY
&& nc
[YY
] != npme
[YY
]) ||
420 (i
== YY
&& j
== ZZ
&& npme
[YY
] > 1)))
428 /* This function determines only half of the communication cost.
429 * All PP, PME and PP-PME communication is symmetric
430 * and the "back"-communication cost is identical to the forward cost.
433 comm_vol
= comm_box_frac(nc
, cutoff
, ddbox
);
436 for (i
= 0; i
< 2; i
++)
438 /* Determine the largest volume for PME x/f redistribution */
439 if (nc
[i
] % npme
[i
] != 0)
443 comm_vol_xf
= (npme
[i
] == 2 ? 1.0/3.0 : 0.5);
447 comm_vol_xf
= 1.0 - lcd(nc
[i
], npme
[i
])/(double)npme
[i
];
449 comm_pme
+= 3*natoms
*comm_vol_xf
;
452 /* Grid overlap communication */
455 nk
= (i
== 0 ? ir
->nkx
: ir
->nky
);
456 overlap
= (nk
% npme
[i
] == 0 ? ir
->pme_order
-1 : ir
->pme_order
);
464 /* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */
468 /* PME FFT communication volume.
469 * This only takes the communication into account and not imbalance
470 * in the calculation. But the imbalance in communication and calculation
471 * are similar and therefore these formulas also prefer load balance
472 * in the FFT and pme_solve calculation.
474 comm_pme
+= comm_pme_cost_vol(npme
[YY
], ir
->nky
, ir
->nkz
, ir
->nkx
);
475 comm_pme
+= comm_pme_cost_vol(npme
[XX
], ir
->nkx
, ir
->nky
, ir
->nkz
);
477 /* Add cost of pbc_dx for bondeds */
479 if ((nc
[XX
] == 1 || nc
[YY
] == 1) || (nc
[ZZ
] == 1 && ir
->ePBC
!= epbcXY
))
481 if ((ddbox
->tric_dir
[XX
] && nc
[XX
] == 1) ||
482 (ddbox
->tric_dir
[YY
] && nc
[YY
] == 1))
484 cost_pbcdx
= pbcdxr
*pbcdx_tric_fac
;
488 cost_pbcdx
= pbcdxr
*pbcdx_rect_fac
;
495 "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n",
496 nc
[XX
], nc
[YY
], nc
[ZZ
], npme
[XX
], npme
[YY
],
497 comm_vol
, cost_pbcdx
, comm_pme
,
498 3*natoms
*(comm_vol
+ cost_pbcdx
) + comm_pme
);
501 return 3*natoms
*(comm_vol
+ cost_pbcdx
) + comm_pme
;
504 static void assign_factors(gmx_domdec_t
*dd
,
505 real limit
, real cutoff
,
506 matrix box
, gmx_ddbox_t
*ddbox
,
507 int natoms
, t_inputrec
*ir
,
508 float pbcdxr
, int npme
,
509 int ndiv
, int *div
, int *mdiv
, ivec ir_try
, ivec opt
)
516 ce
= comm_cost_est(limit
, cutoff
, box
, ddbox
,
517 natoms
, ir
, pbcdxr
, npme
, ir_try
);
518 if (ce
>= 0 && (opt
[XX
] == 0 ||
519 ce
< comm_cost_est(limit
, cutoff
, box
, ddbox
,
523 copy_ivec(ir_try
, opt
);
529 for (x
= mdiv
[0]; x
>= 0; x
--)
531 for (i
= 0; i
< x
; i
++)
533 ir_try
[XX
] *= div
[0];
535 for (y
= mdiv
[0]-x
; y
>= 0; y
--)
537 for (i
= 0; i
< y
; i
++)
539 ir_try
[YY
] *= div
[0];
541 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
543 ir_try
[ZZ
] *= div
[0];
547 assign_factors(dd
, limit
, cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
,
548 ndiv
-1, div
+1, mdiv
+1, ir_try
, opt
);
550 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
552 ir_try
[ZZ
] /= div
[0];
554 for (i
= 0; i
< y
; i
++)
556 ir_try
[YY
] /= div
[0];
559 for (i
= 0; i
< x
; i
++)
561 ir_try
[XX
] /= div
[0];
566 static real
optimize_ncells(FILE *fplog
,
567 int nnodes_tot
, int npme_only
,
568 gmx_bool bDynLoadBal
, real dlb_scale
,
569 gmx_mtop_t
*mtop
, matrix box
, gmx_ddbox_t
*ddbox
,
572 real cellsize_limit
, real cutoff
,
573 gmx_bool bInterCGBondeds
,
576 int npp
, npme
, ndiv
, *div
, *mdiv
, d
, nmax
;
577 gmx_bool bExcl_pbcdx
;
582 limit
= cellsize_limit
;
588 npp
= nnodes_tot
- npme_only
;
589 if (EEL_PME(ir
->coulombtype
))
591 npme
= (npme_only
> 0 ? npme_only
: npp
);
600 /* For Ewald exclusions pbc_dx is not called */
602 (IR_EXCL_FORCES(*ir
) && !EEL_FULL(ir
->coulombtype
));
603 pbcdxr
= (double)n_bonded_dx(mtop
, bExcl_pbcdx
)/(double)mtop
->natoms
;
607 /* Every molecule is a single charge group: no pbc required */
610 /* Add a margin for DLB and/or pressure scaling */
613 if (dlb_scale
>= 1.0)
615 gmx_fatal(FARGS
, "The value for option -dds should be smaller than 1");
619 fprintf(fplog
, "Scaling the initial minimum size with 1/%g (option -dds) = %g\n", dlb_scale
, 1/dlb_scale
);
623 else if (ir
->epc
!= epcNO
)
627 fprintf(fplog
, "To account for pressure scaling, scaling the initial minimum size with %g\n", DD_GRID_MARGIN_PRES_SCALE
);
628 limit
*= DD_GRID_MARGIN_PRES_SCALE
;
634 fprintf(fplog
, "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n", npp
, limit
);
636 if (inhomogeneous_z(ir
))
638 fprintf(fplog
, "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.\n", eewg_names
[ir
->ewald_geometry
]);
643 fprintf(fplog
, "The maximum allowed number of cells is:");
644 for (d
= 0; d
< DIM
; d
++)
646 nmax
= (int)(ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/limit
);
647 if (d
>= ddbox
->npbcdim
&& nmax
< 2)
651 if (d
== ZZ
&& inhomogeneous_z(ir
))
655 fprintf(fplog
, " %c %d", 'X' + d
, nmax
);
657 fprintf(fplog
, "\n");
663 fprintf(debug
, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr
);
666 /* Decompose npp in factors */
667 ndiv
= factorize(npp
, &div
, &mdiv
);
673 assign_factors(dd
, limit
, cutoff
, box
, ddbox
, mtop
->natoms
, ir
, pbcdxr
,
674 npme
, ndiv
, div
, mdiv
, itry
, nc
);
682 real
dd_choose_grid(FILE *fplog
,
683 t_commrec
*cr
, gmx_domdec_t
*dd
, t_inputrec
*ir
,
684 gmx_mtop_t
*mtop
, matrix box
, gmx_ddbox_t
*ddbox
,
685 gmx_bool bDynLoadBal
, real dlb_scale
,
686 real cellsize_limit
, real cutoff_dd
,
687 gmx_bool bInterCGBondeds
)
689 gmx_int64_t nnodes_div
, ldiv
;
694 nnodes_div
= cr
->nnodes
;
695 if (EEL_PME(ir
->coulombtype
))
697 if (cr
->npmenodes
> 0)
702 "Can not have separate PME nodes with 2 or less nodes");
704 if (cr
->npmenodes
>= cr
->nnodes
)
707 "Can not have %d separate PME nodes with just %d total nodes",
708 cr
->npmenodes
, cr
->nnodes
);
711 /* If the user purposely selected the number of PME nodes,
712 * only check for large primes in the PP node count.
714 nnodes_div
-= cr
->npmenodes
;
724 ldiv
= largest_divisor(nnodes_div
);
725 /* Check if the largest divisor is more than nnodes^2/3 */
726 if (ldiv
*ldiv
*ldiv
> nnodes_div
*nnodes_div
)
728 gmx_fatal(FARGS
, "The number of nodes you selected (%d) contains a large prime factor %d. In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
733 if (EEL_PME(ir
->coulombtype
))
735 if (cr
->npmenodes
< 0)
737 /* Use PME nodes when the number of nodes is more than 16 */
738 if (cr
->nnodes
<= 18)
743 fprintf(fplog
, "Using %d separate PME nodes, as there are too few total\n nodes for efficient splitting\n", cr
->npmenodes
);
748 cr
->npmenodes
= guess_npme(fplog
, mtop
, ir
, box
, cr
->nnodes
);
751 fprintf(fplog
, "Using %d separate PME nodes, as guessed by mdrun\n", cr
->npmenodes
);
759 fprintf(fplog
, "Using %d separate PME nodes, per user request\n", cr
->npmenodes
);
764 limit
= optimize_ncells(fplog
, cr
->nnodes
, cr
->npmenodes
,
765 bDynLoadBal
, dlb_scale
,
766 mtop
, box
, ddbox
, ir
, dd
,
767 cellsize_limit
, cutoff_dd
,
775 /* Communicate the information set by the master to all nodes */
776 gmx_bcast(sizeof(dd
->nc
), dd
->nc
, cr
);
777 if (EEL_PME(ir
->coulombtype
))
779 gmx_bcast(sizeof(ir
->nkx
), &ir
->nkx
, cr
);
780 gmx_bcast(sizeof(ir
->nky
), &ir
->nky
, cr
);
781 gmx_bcast(sizeof(cr
->npmenodes
), &cr
->npmenodes
, cr
);