2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/commandline/filenm.h"
50 #include "gromacs/mdrunutility/multisim.h"
51 #include "gromacs/mdtypes/commrec.h"
52 #include "gromacs/utility/basenetwork.h"
53 #include "gromacs/utility/cstringutil.h"
54 #include "gromacs/utility/fatalerror.h"
55 #include "gromacs/utility/futil.h"
56 #include "gromacs/utility/gmxassert.h"
57 #include "gromacs/utility/gmxmpi.h"
58 #include "gromacs/utility/mpiinplacebuffers.h"
59 #include "gromacs/utility/real.h"
60 #include "gromacs/utility/smalloc.h"
62 /* The source code in this file should be thread-safe.
63 Please keep it that way. */
65 CommrecHandle
init_commrec(MPI_Comm communicator
, const gmx_multisim_t
* ms
)
73 int rankInCommunicator
, sizeOfCommunicator
;
76 GMX_RELEASE_ASSERT(gmx_mpi_initialized(), "Must have initialized MPI before building commrec");
78 MPI_Comm_rank(communicator
, &rankInCommunicator
);
79 MPI_Comm_size(communicator
, &sizeOfCommunicator
);
81 GMX_UNUSED_VALUE(communicator
);
82 rankInCommunicator
= 0;
83 sizeOfCommunicator
= 1;
89 cr
->nnodes
= sizeOfCommunicator
/ ms
->nsim
;
90 MPI_Comm_split(communicator
, ms
->sim
, rankInCommunicator
, &cr
->mpi_comm_mysim
);
91 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
92 MPI_Comm_rank(cr
->mpi_comm_mysim
, &cr
->sim_nodeid
);
93 MPI_Comm_rank(cr
->mpi_comm_mygroup
, &cr
->nodeid
);
98 cr
->nnodes
= sizeOfCommunicator
;
99 cr
->nodeid
= rankInCommunicator
;
100 cr
->sim_nodeid
= cr
->nodeid
;
101 cr
->mpi_comm_mysim
= communicator
;
102 cr
->mpi_comm_mygroup
= communicator
;
105 // TODO cr->duty should not be initialized here
106 cr
->duty
= (DUTY_PP
| DUTY_PME
);
108 #if GMX_MPI && !MPI_IN_PLACE_EXISTS
109 /* initialize the MPI_IN_PLACE replacement buffers */
111 cr
->mpb
->ibuf
= nullptr;
112 cr
->mpb
->libuf
= nullptr;
113 cr
->mpb
->fbuf
= nullptr;
114 cr
->mpb
->dbuf
= nullptr;
115 cr
->mpb
->ibuf_alloc
= 0;
116 cr
->mpb
->libuf_alloc
= 0;
117 cr
->mpb
->fbuf_alloc
= 0;
118 cr
->mpb
->dbuf_alloc
= 0;
124 void done_commrec(t_commrec
* cr
)
128 if (nullptr != cr
->dd
)
131 // done_domdec(cr->dd);
133 done_mpi_in_place_buf(cr
->mpb
);
136 // TODO We need to be able to free communicators, but the
137 // structure of the commrec and domdec initialization code makes
138 // it hard to avoid both leaks and double frees.
139 bool mySimIsMyGroup
= (cr
->mpi_comm_mysim
== cr
->mpi_comm_mygroup
);
140 if (cr
->mpi_comm_mysim
!= MPI_COMM_NULL
&& cr
->mpi_comm_mysim
!= MPI_COMM_WORLD
)
143 // MPI_Comm_free(&cr->mpi_comm_mysim);
145 if (!mySimIsMyGroup
&& cr
->mpi_comm_mygroup
!= MPI_COMM_NULL
&& cr
->mpi_comm_mygroup
!= MPI_COMM_WORLD
)
148 // MPI_Comm_free(&cr->mpi_comm_mygroup);
154 void gmx_setup_nodecomm(FILE gmx_unused
* fplog
, t_commrec
* cr
)
158 /* Many MPI implementations do not optimize MPI_Allreduce
159 * (and probably also other global communication calls)
160 * for multi-core nodes connected by a network.
161 * We can optimize such communication by using one MPI call
162 * within each node and one between the nodes.
163 * For MVAPICH2 and Intel MPI this reduces the time for
164 * the global_stat communication by 25%
165 * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
166 * B. Hess, November 2007
176 // TODO PhysicalNodeCommunicator could be extended/used to handle
177 // the need for per-node per-group communicators.
178 MPI_Comm_size(cr
->mpi_comm_mygroup
, &n
);
179 MPI_Comm_rank(cr
->mpi_comm_mygroup
, &rank
);
181 int nodehash
= gmx_physicalnode_id_hash();
185 fprintf(debug
, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n
);
189 /* The intra-node communicator, split on node number */
190 MPI_Comm_split(cr
->mpi_comm_mygroup
, nodehash
, rank
, &nc
->comm_intra
);
191 MPI_Comm_rank(nc
->comm_intra
, &nc
->rank_intra
);
194 fprintf(debug
, "In gmx_setup_nodecomm: node ID %d rank within node %d\n", rank
, nc
->rank_intra
);
196 /* The inter-node communicator, split on rank_intra.
197 * We actually only need the one for rank=0,
198 * but it is easier to create them all.
200 MPI_Comm_split(cr
->mpi_comm_mygroup
, nc
->rank_intra
, rank
, &nc
->comm_inter
);
201 /* Check if this really created two step communication */
204 MPI_Comm_size(nc
->comm_inter
, &ng
);
205 MPI_Comm_size(nc
->comm_intra
, &ni
);
208 fprintf(debug
, "In gmx_setup_nodecomm: groups %d, my group size %d\n", ng
, ni
);
211 if (getenv("GMX_NO_NODECOMM") == nullptr && ((ng
> 1 && ng
< n
) || (ni
> 1 && ni
< n
)))
216 fprintf(fplog
, "Using two step summing over %d groups of on average %.1f ranks\n\n", ng
,
219 if (nc
->rank_intra
> 0)
221 MPI_Comm_free(&nc
->comm_inter
);
226 /* One group or all processes in a separate group, use normal summing */
227 MPI_Comm_free(&nc
->comm_inter
);
228 MPI_Comm_free(&nc
->comm_intra
);
232 "In gmx_setup_nodecomm: not unsing separate inter- and intra-node "
238 /* tMPI runs only on a single node so just use the nodeid */
239 nc
->rank_intra
= cr
->nodeid
;
243 void gmx_barrier(MPI_Comm gmx_unused communicator
)
246 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_barrier");
248 MPI_Barrier(communicator
);
252 void gmx_bcast(int gmx_unused nbytes
, void gmx_unused
* b
, MPI_Comm gmx_unused communicator
)
255 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_bcast");
257 MPI_Bcast(b
, nbytes
, MPI_BYTE
, 0, communicator
);
261 void gmx_sumd(int gmx_unused nr
, double gmx_unused r
[], const t_commrec gmx_unused
* cr
)
264 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumd");
266 # if MPI_IN_PLACE_EXISTS
269 if (cr
->nc
.rank_intra
== 0)
271 /* Use two step summing. */
272 MPI_Reduce(MPI_IN_PLACE
, r
, nr
, MPI_DOUBLE
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
273 /* Sum the roots of the internal (intra) buffers. */
274 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_DOUBLE
, MPI_SUM
, cr
->nc
.comm_inter
);
278 /* This is here because of the silly MPI specification
279 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
280 MPI_Reduce(r
, nullptr, nr
, MPI_DOUBLE
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
282 MPI_Bcast(r
, nr
, MPI_DOUBLE
, 0, cr
->nc
.comm_intra
);
286 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_DOUBLE
, MPI_SUM
, cr
->mpi_comm_mygroup
);
291 if (nr
> cr
->mpb
->dbuf_alloc
)
293 cr
->mpb
->dbuf_alloc
= nr
;
294 srenew(cr
->mpb
->dbuf
, cr
->mpb
->dbuf_alloc
);
298 /* Use two step summing */
299 MPI_Allreduce(r
, cr
->mpb
->dbuf
, nr
, MPI_DOUBLE
, MPI_SUM
, cr
->nc
.comm_intra
);
300 if (cr
->nc
.rank_intra
== 0)
302 /* Sum with the buffers reversed */
303 MPI_Allreduce(cr
->mpb
->dbuf
, r
, nr
, MPI_DOUBLE
, MPI_SUM
, cr
->nc
.comm_inter
);
305 MPI_Bcast(r
, nr
, MPI_DOUBLE
, 0, cr
->nc
.comm_intra
);
309 MPI_Allreduce(r
, cr
->mpb
->dbuf
, nr
, MPI_DOUBLE
, MPI_SUM
, cr
->mpi_comm_mygroup
);
310 for (i
= 0; i
< nr
; i
++)
312 r
[i
] = cr
->mpb
->dbuf
[i
];
319 void gmx_sumf(int gmx_unused nr
, float gmx_unused r
[], const t_commrec gmx_unused
* cr
)
322 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumf");
324 # if MPI_IN_PLACE_EXISTS
327 /* Use two step summing. */
328 if (cr
->nc
.rank_intra
== 0)
330 MPI_Reduce(MPI_IN_PLACE
, r
, nr
, MPI_FLOAT
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
331 /* Sum the roots of the internal (intra) buffers */
332 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_FLOAT
, MPI_SUM
, cr
->nc
.comm_inter
);
336 /* This is here because of the silly MPI specification
337 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
338 MPI_Reduce(r
, nullptr, nr
, MPI_FLOAT
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
340 MPI_Bcast(r
, nr
, MPI_FLOAT
, 0, cr
->nc
.comm_intra
);
344 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_FLOAT
, MPI_SUM
, cr
->mpi_comm_mygroup
);
349 if (nr
> cr
->mpb
->fbuf_alloc
)
351 cr
->mpb
->fbuf_alloc
= nr
;
352 srenew(cr
->mpb
->fbuf
, cr
->mpb
->fbuf_alloc
);
356 /* Use two step summing */
357 MPI_Allreduce(r
, cr
->mpb
->fbuf
, nr
, MPI_FLOAT
, MPI_SUM
, cr
->nc
.comm_intra
);
358 if (cr
->nc
.rank_intra
== 0)
360 /* Sum with the buffers reversed */
361 MPI_Allreduce(cr
->mpb
->fbuf
, r
, nr
, MPI_FLOAT
, MPI_SUM
, cr
->nc
.comm_inter
);
363 MPI_Bcast(r
, nr
, MPI_FLOAT
, 0, cr
->nc
.comm_intra
);
367 MPI_Allreduce(r
, cr
->mpb
->fbuf
, nr
, MPI_FLOAT
, MPI_SUM
, cr
->mpi_comm_mygroup
);
368 for (i
= 0; i
< nr
; i
++)
370 r
[i
] = cr
->mpb
->fbuf
[i
];
377 void gmx_sumi(int gmx_unused nr
, int gmx_unused r
[], const t_commrec gmx_unused
* cr
)
380 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumi");
382 # if MPI_IN_PLACE_EXISTS
385 /* Use two step summing */
386 if (cr
->nc
.rank_intra
== 0)
388 MPI_Reduce(MPI_IN_PLACE
, r
, nr
, MPI_INT
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
389 /* Sum with the buffers reversed */
390 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_INT
, MPI_SUM
, cr
->nc
.comm_inter
);
394 /* This is here because of the silly MPI specification
395 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
396 MPI_Reduce(r
, nullptr, nr
, MPI_INT
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
398 MPI_Bcast(r
, nr
, MPI_INT
, 0, cr
->nc
.comm_intra
);
402 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_INT
, MPI_SUM
, cr
->mpi_comm_mygroup
);
407 if (nr
> cr
->mpb
->ibuf_alloc
)
409 cr
->mpb
->ibuf_alloc
= nr
;
410 srenew(cr
->mpb
->ibuf
, cr
->mpb
->ibuf_alloc
);
414 /* Use two step summing */
415 MPI_Allreduce(r
, cr
->mpb
->ibuf
, nr
, MPI_INT
, MPI_SUM
, cr
->nc
.comm_intra
);
416 if (cr
->nc
.rank_intra
== 0)
418 /* Sum with the buffers reversed */
419 MPI_Allreduce(cr
->mpb
->ibuf
, r
, nr
, MPI_INT
, MPI_SUM
, cr
->nc
.comm_inter
);
421 MPI_Bcast(r
, nr
, MPI_INT
, 0, cr
->nc
.comm_intra
);
425 MPI_Allreduce(r
, cr
->mpb
->ibuf
, nr
, MPI_INT
, MPI_SUM
, cr
->mpi_comm_mygroup
);
426 for (i
= 0; i
< nr
; i
++)
428 r
[i
] = cr
->mpb
->ibuf
[i
];
435 void gmx_sumli(int gmx_unused nr
, int64_t gmx_unused r
[], const t_commrec gmx_unused
* cr
)
438 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumli");
440 # if MPI_IN_PLACE_EXISTS
443 /* Use two step summing */
444 if (cr
->nc
.rank_intra
== 0)
446 MPI_Reduce(MPI_IN_PLACE
, r
, nr
, MPI_INT64_T
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
447 /* Sum with the buffers reversed */
448 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_INT64_T
, MPI_SUM
, cr
->nc
.comm_inter
);
452 /* This is here because of the silly MPI specification
453 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
454 MPI_Reduce(r
, nullptr, nr
, MPI_INT64_T
, MPI_SUM
, 0, cr
->nc
.comm_intra
);
456 MPI_Bcast(r
, nr
, MPI_INT64_T
, 0, cr
->nc
.comm_intra
);
460 MPI_Allreduce(MPI_IN_PLACE
, r
, nr
, MPI_INT64_T
, MPI_SUM
, cr
->mpi_comm_mygroup
);
465 if (nr
> cr
->mpb
->libuf_alloc
)
467 cr
->mpb
->libuf_alloc
= nr
;
468 srenew(cr
->mpb
->libuf
, cr
->mpb
->libuf_alloc
);
472 /* Use two step summing */
473 MPI_Allreduce(r
, cr
->mpb
->libuf
, nr
, MPI_INT64_T
, MPI_SUM
, cr
->nc
.comm_intra
);
474 if (cr
->nc
.rank_intra
== 0)
476 /* Sum with the buffers reversed */
477 MPI_Allreduce(cr
->mpb
->libuf
, r
, nr
, MPI_INT64_T
, MPI_SUM
, cr
->nc
.comm_inter
);
479 MPI_Bcast(r
, nr
, MPI_INT64_T
, 0, cr
->nc
.comm_intra
);
483 MPI_Allreduce(r
, cr
->mpb
->libuf
, nr
, MPI_INT64_T
, MPI_SUM
, cr
->mpi_comm_mygroup
);
484 for (i
= 0; i
< nr
; i
++)
486 r
[i
] = cr
->mpb
->libuf
[i
];
493 const char* opt2fn_master(const char* opt
, int nfile
, const t_filenm fnm
[], t_commrec
* cr
)
495 return SIMMASTER(cr
) ? opt2fn(opt
, nfile
, fnm
) : nullptr;
498 void gmx_fatal_collective(int f_errno
,
503 gmx_fmtstr
const char* fmt
,
510 /* Check if we are calling on all processes in MPI_COMM_WORLD */
511 MPI_Comm_compare(comm
, MPI_COMM_WORLD
, &result
);
512 /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
513 bFinalize
= (result
!= MPI_UNEQUAL
);
515 GMX_UNUSED_VALUE(comm
);
520 gmx_fatal_mpi_va(f_errno
, file
, line
, bMaster
, bFinalize
, fmt
, ap
);