src/gromacs/gmxlib/network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "network.h"
  41
  42 #include "config.h"
  43
  44 #include <cctype>
  45 #include <cstdarg>
  46 #include <cstdlib>
  47 #include <cstring>
  48
  49 #include "gromacs/commandline/filenm.h"
  50 #include "gromacs/mdrunutility/multisim.h"
  51 #include "gromacs/mdtypes/commrec.h"
  52 #include "gromacs/utility/basenetwork.h"
  53 #include "gromacs/utility/cstringutil.h"
  54 #include "gromacs/utility/fatalerror.h"
  55 #include "gromacs/utility/futil.h"
  56 #include "gromacs/utility/gmxassert.h"
  57 #include "gromacs/utility/gmxmpi.h"
  58 #include "gromacs/utility/mpiinplacebuffers.h"
  59 #include "gromacs/utility/real.h"
  60 #include "gromacs/utility/smalloc.h"
  61
  62 /* The source code in this file should be thread-safe.
  63       Please keep it that way. */
  64
  65 CommrecHandle init_commrec(MPI_Comm communicator, const gmx_multisim_t* ms)
  66 {
  67     CommrecHandle handle;
  68     t_commrec*    cr;
  69
  70     snew(cr, 1);
  71     handle.reset(cr);
  72
  73     int rankInCommunicator, sizeOfCommunicator;
  74 #if GMX_MPI
  75 #    if GMX_LIB_MPI
  76     GMX_RELEASE_ASSERT(gmx_mpi_initialized(), "Must have initialized MPI before building commrec");
  77 #    endif
  78     MPI_Comm_rank(communicator, &rankInCommunicator);
  79     MPI_Comm_size(communicator, &sizeOfCommunicator);
  80 #else
  81     GMX_UNUSED_VALUE(communicator);
  82     rankInCommunicator = 0;
  83     sizeOfCommunicator = 1;
  84 #endif
  85
  86     if (ms != nullptr)
  87     {
  88 #if GMX_MPI
  89         cr->nnodes = sizeOfCommunicator / ms->nsim;
  90         MPI_Comm_split(communicator, ms->sim, rankInCommunicator, &cr->mpi_comm_mysim);
  91         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
  92         MPI_Comm_rank(cr->mpi_comm_mysim, &cr->sim_nodeid);
  93         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
  94 #endif
  95     }
  96     else
  97     {
  98         cr->nnodes           = sizeOfCommunicator;
  99         cr->nodeid           = rankInCommunicator;
 100         cr->sim_nodeid       = cr->nodeid;
 101         cr->mpi_comm_mysim   = communicator;
 102         cr->mpi_comm_mygroup = communicator;
 103     }
 104
 105     // TODO cr->duty should not be initialized here
 106     cr->duty = (DUTY_PP | DUTY_PME);
 107
 108 #if GMX_MPI && !MPI_IN_PLACE_EXISTS
 109     /* initialize the MPI_IN_PLACE replacement buffers */
 110     snew(cr->mpb, 1);
 111     cr->mpb->ibuf        = nullptr;
 112     cr->mpb->libuf       = nullptr;
 113     cr->mpb->fbuf        = nullptr;
 114     cr->mpb->dbuf        = nullptr;
 115     cr->mpb->ibuf_alloc  = 0;
 116     cr->mpb->libuf_alloc = 0;
 117     cr->mpb->fbuf_alloc  = 0;
 118     cr->mpb->dbuf_alloc  = 0;
 119 #endif
 120
 121     return handle;
 122 }
 123
 124 void done_commrec(t_commrec* cr)
 125 {
 126     if (MASTER(cr))
 127     {
 128         if (nullptr != cr->dd)
 129         {
 130             // TODO: implement
 131             // done_domdec(cr->dd);
 132         }
 133         done_mpi_in_place_buf(cr->mpb);
 134     }
 135 #if GMX_MPI
 136     // TODO We need to be able to free communicators, but the
 137     // structure of the commrec and domdec initialization code makes
 138     // it hard to avoid both leaks and double frees.
 139     bool mySimIsMyGroup = (cr->mpi_comm_mysim == cr->mpi_comm_mygroup);
 140     if (cr->mpi_comm_mysim != MPI_COMM_NULL && cr->mpi_comm_mysim != MPI_COMM_WORLD)
 141     {
 142         // TODO see above
 143         // MPI_Comm_free(&cr->mpi_comm_mysim);
 144     }
 145     if (!mySimIsMyGroup && cr->mpi_comm_mygroup != MPI_COMM_NULL && cr->mpi_comm_mygroup != MPI_COMM_WORLD)
 146     {
 147         // TODO see above
 148         // MPI_Comm_free(&cr->mpi_comm_mygroup);
 149     }
 150 #endif
 151     sfree(cr);
 152 }
 153
 154 void gmx_setup_nodecomm(FILE gmx_unused* fplog, t_commrec* cr)
 155 {
 156     gmx_nodecomm_t* nc;
 157
 158     /* Many MPI implementations do not optimize MPI_Allreduce
 159      * (and probably also other global communication calls)
 160      * for multi-core nodes connected by a network.
 161      * We can optimize such communication by using one MPI call
 162      * within each node and one between the nodes.
 163      * For MVAPICH2 and Intel MPI this reduces the time for
 164      * the global_stat communication by 25%
 165      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 166      * B. Hess, November 2007
 167      */
 168
 169     nc = &cr->nc;
 170
 171     nc->bUse = FALSE;
 172 #if !GMX_THREAD_MPI
 173 #    if GMX_MPI
 174     int n, rank;
 175
 176     // TODO PhysicalNodeCommunicator could be extended/used to handle
 177     // the need for per-node per-group communicators.
 178     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 179     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 180
 181     int nodehash = gmx_physicalnode_id_hash();
 182
 183     if (debug)
 184     {
 185         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 186     }
 187
 188
 189     /* The intra-node communicator, split on node number */
 190     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 191     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 192     if (debug)
 193     {
 194         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n", rank, nc->rank_intra);
 195     }
 196     /* The inter-node communicator, split on rank_intra.
 197      * We actually only need the one for rank=0,
 198      * but it is easier to create them all.
 199      */
 200     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 201     /* Check if this really created two step communication */
 202     int ng, ni;
 203
 204     MPI_Comm_size(nc->comm_inter, &ng);
 205     MPI_Comm_size(nc->comm_intra, &ni);
 206     if (debug)
 207     {
 208         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n", ng, ni);
 209     }
 210
 211     if (getenv("GMX_NO_NODECOMM") == nullptr && ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 212     {
 213         nc->bUse = TRUE;
 214         if (fplog)
 215         {
 216             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n", ng,
 217                     (real)n / (real)ng);
 218         }
 219         if (nc->rank_intra > 0)
 220         {
 221             MPI_Comm_free(&nc->comm_inter);
 222         }
 223     }
 224     else
 225     {
 226         /* One group or all processes in a separate group, use normal summing */
 227         MPI_Comm_free(&nc->comm_inter);
 228         MPI_Comm_free(&nc->comm_intra);
 229         if (debug)
 230         {
 231             fprintf(debug,
 232                     "In gmx_setup_nodecomm: not unsing separate inter- and intra-node "
 233                     "communicators.\n");
 234         }
 235     }
 236 #    endif
 237 #else
 238     /* tMPI runs only on a single node so just use the nodeid */
 239     nc->rank_intra = cr->nodeid;
 240 #endif
 241 }
 242
 243 void gmx_barrier(MPI_Comm gmx_unused communicator)
 244 {
 245 #if !GMX_MPI
 246     GMX_RELEASE_ASSERT(false, "Invalid call to gmx_barrier");
 247 #else
 248     MPI_Barrier(communicator);
 249 #endif
 250 }
 251
 252 void gmx_bcast(int gmx_unused nbytes, void gmx_unused* b, MPI_Comm gmx_unused communicator)
 253 {
 254 #if !GMX_MPI
 255     GMX_RELEASE_ASSERT(false, "Invalid call to gmx_bcast");
 256 #else
 257     MPI_Bcast(b, nbytes, MPI_BYTE, 0, communicator);
 258 #endif
 259 }
 260
 261 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused* cr)
 262 {
 263 #if !GMX_MPI
 264     GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumd");
 265 #else
 266 #    if MPI_IN_PLACE_EXISTS
 267     if (cr->nc.bUse)
 268     {
 269         if (cr->nc.rank_intra == 0)
 270         {
 271             /* Use two step summing. */
 272             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 273             /* Sum the roots of the internal (intra) buffers. */
 274             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_inter);
 275         }
 276         else
 277         {
 278             /* This is here because of the silly MPI specification
 279                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 280             MPI_Reduce(r, nullptr, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 281         }
 282         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 283     }
 284     else
 285     {
 286         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mygroup);
 287     }
 288 #    else
 289     int i;
 290
 291     if (nr > cr->mpb->dbuf_alloc)
 292     {
 293         cr->mpb->dbuf_alloc = nr;
 294         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 295     }
 296     if (cr->nc.bUse)
 297     {
 298         /* Use two step summing */
 299         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 300         if (cr->nc.rank_intra == 0)
 301         {
 302             /* Sum with the buffers reversed */
 303             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_inter);
 304         }
 305         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 306     }
 307     else
 308     {
 309         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mygroup);
 310         for (i = 0; i < nr; i++)
 311         {
 312             r[i] = cr->mpb->dbuf[i];
 313         }
 314     }
 315 #    endif
 316 #endif
 317 }
 318
 319 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused* cr)
 320 {
 321 #if !GMX_MPI
 322     GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumf");
 323 #else
 324 #    if MPI_IN_PLACE_EXISTS
 325     if (cr->nc.bUse)
 326     {
 327         /* Use two step summing.  */
 328         if (cr->nc.rank_intra == 0)
 329         {
 330             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 331             /* Sum the roots of the internal (intra) buffers */
 332             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_inter);
 333         }
 334         else
 335         {
 336             /* This is here because of the silly MPI specification
 337                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 338             MPI_Reduce(r, nullptr, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 339         }
 340         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 341     }
 342     else
 343     {
 344         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 345     }
 346 #    else
 347     int i;
 348
 349     if (nr > cr->mpb->fbuf_alloc)
 350     {
 351         cr->mpb->fbuf_alloc = nr;
 352         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 353     }
 354     if (cr->nc.bUse)
 355     {
 356         /* Use two step summing */
 357         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 358         if (cr->nc.rank_intra == 0)
 359         {
 360             /* Sum with the buffers reversed */
 361             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_inter);
 362         }
 363         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 364     }
 365     else
 366     {
 367         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 368         for (i = 0; i < nr; i++)
 369         {
 370             r[i] = cr->mpb->fbuf[i];
 371         }
 372     }
 373 #    endif
 374 #endif
 375 }
 376
 377 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused* cr)
 378 {
 379 #if !GMX_MPI
 380     GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumi");
 381 #else
 382 #    if MPI_IN_PLACE_EXISTS
 383     if (cr->nc.bUse)
 384     {
 385         /* Use two step summing */
 386         if (cr->nc.rank_intra == 0)
 387         {
 388             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 389             /* Sum with the buffers reversed */
 390             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 391         }
 392         else
 393         {
 394             /* This is here because of the silly MPI specification
 395                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 396             MPI_Reduce(r, nullptr, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 397         }
 398         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 399     }
 400     else
 401     {
 402         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 403     }
 404 #    else
 405     int i;
 406
 407     if (nr > cr->mpb->ibuf_alloc)
 408     {
 409         cr->mpb->ibuf_alloc = nr;
 410         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 411     }
 412     if (cr->nc.bUse)
 413     {
 414         /* Use two step summing */
 415         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 416         if (cr->nc.rank_intra == 0)
 417         {
 418             /* Sum with the buffers reversed */
 419             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 420         }
 421         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 422     }
 423     else
 424     {
 425         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 426         for (i = 0; i < nr; i++)
 427         {
 428             r[i] = cr->mpb->ibuf[i];
 429         }
 430     }
 431 #    endif
 432 #endif
 433 }
 434
 435 void gmx_sumli(int gmx_unused nr, int64_t gmx_unused r[], const t_commrec gmx_unused* cr)
 436 {
 437 #if !GMX_MPI
 438     GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumli");
 439 #else
 440 #    if MPI_IN_PLACE_EXISTS
 441     if (cr->nc.bUse)
 442     {
 443         /* Use two step summing */
 444         if (cr->nc.rank_intra == 0)
 445         {
 446             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 447             /* Sum with the buffers reversed */
 448             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->nc.comm_inter);
 449         }
 450         else
 451         {
 452             /* This is here because of the silly MPI specification
 453                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 454             MPI_Reduce(r, nullptr, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 455         }
 456         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 457     }
 458     else
 459     {
 460         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 461     }
 462 #    else
 463     int i;
 464
 465     if (nr > cr->mpb->libuf_alloc)
 466     {
 467         cr->mpb->libuf_alloc = nr;
 468         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 469     }
 470     if (cr->nc.bUse)
 471     {
 472         /* Use two step summing */
 473         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM, cr->nc.comm_intra);
 474         if (cr->nc.rank_intra == 0)
 475         {
 476             /* Sum with the buffers reversed */
 477             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM, cr->nc.comm_inter);
 478         }
 479         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 480     }
 481     else
 482     {
 483         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 484         for (i = 0; i < nr; i++)
 485         {
 486             r[i] = cr->mpb->libuf[i];
 487         }
 488     }
 489 #    endif
 490 #endif
 491 }
 492
 493 const char* opt2fn_master(const char* opt, int nfile, const t_filenm fnm[], t_commrec* cr)
 494 {
 495     return SIMMASTER(cr) ? opt2fn(opt, nfile, fnm) : nullptr;
 496 }
 497
 498 void gmx_fatal_collective(int                    f_errno,
 499                           const char*            file,
 500                           int                    line,
 501                           MPI_Comm               comm,
 502                           gmx_bool               bMaster,
 503                           gmx_fmtstr const char* fmt,
 504                           ...)
 505 {
 506     va_list  ap;
 507     gmx_bool bFinalize;
 508 #if GMX_MPI
 509     int result;
 510     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 511     MPI_Comm_compare(comm, MPI_COMM_WORLD, &result);
 512     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 513     bFinalize = (result != MPI_UNEQUAL);
 514 #else
 515     GMX_UNUSED_VALUE(comm);
 516     bFinalize = TRUE;
 517 #endif
 518
 519     va_start(ap, fmt);
 520     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 521     va_end(ap);
 522 }