src/gromacs/gmxlib/network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "network.h"
  40
  41 #include "config.h"
  42
  43 #include <cctype>
  44 #include <cstdarg>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include "gromacs/commandline/filenm.h"
  49 #include "gromacs/mdtypes/commrec.h"
  50 #include "gromacs/utility/basenetwork.h"
  51 #include "gromacs/utility/cstringutil.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/utility/futil.h"
  54 #include "gromacs/utility/gmxmpi.h"
  55 #include "gromacs/utility/smalloc.h"
  56
  57 /* The source code in this file should be thread-safe.
  58       Please keep it that way. */
  59
  60 void gmx_fill_commrec_from_mpi(t_commrec gmx_unused *cr)
  61 {
  62 #if !GMX_MPI
  63     gmx_call("gmx_fill_commrec_from_mpi");
  64 #else
  65     if (!gmx_mpi_initialized())
  66     {
  67         gmx_comm("MPI has not been initialized properly");
  68     }
  69
  70     cr->nnodes           = gmx_node_num();
  71     cr->nodeid           = gmx_node_rank();
  72     // TODO This communicator should be always available. Currently we
  73     // make it multiple times, and keep it only when relevant. But the
  74     // cost of an extra communicator is negligible in single-node
  75     // cases (both thread-MPI and real MPI) case, and we need it in
  76     // all multi-node MPI cases with more than one PP rank per node,
  77     // with and without GPUs. By always having it available, we also
  78     // don't need to protect calls to mpi_comm_physicalnode, etc.
  79     if (PAR(cr) || MULTISIM(cr))
  80     {
  81         MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), cr->nodeid, &cr->mpi_comm_physicalnode);
  82     }
  83     cr->sim_nodeid       = cr->nodeid;
  84     cr->mpi_comm_mysim   = MPI_COMM_WORLD;
  85     cr->mpi_comm_mygroup = MPI_COMM_WORLD;
  86
  87 #endif
  88 }
  89
  90 t_commrec *init_commrec()
  91 {
  92     t_commrec    *cr;
  93
  94     snew(cr, 1);
  95
  96 #if GMX_LIB_MPI
  97     gmx_fill_commrec_from_mpi(cr);
  98 #else
  99     cr->mpi_comm_mysim   = nullptr;
 100     cr->mpi_comm_mygroup = nullptr;
 101     cr->nnodes           = 1;
 102     cr->sim_nodeid       = 0;
 103     cr->nodeid           = cr->sim_nodeid;
 104 #endif
 105
 106     // TODO cr->duty should not be initialized here
 107     cr->duty = (DUTY_PP | DUTY_PME);
 108
 109 #if GMX_MPI && !MPI_IN_PLACE_EXISTS
 110     /* initialize the MPI_IN_PLACE replacement buffers */
 111     snew(cr->mpb, 1);
 112     cr->mpb->ibuf        = NULL;
 113     cr->mpb->libuf       = NULL;
 114     cr->mpb->fbuf        = NULL;
 115     cr->mpb->dbuf        = NULL;
 116     cr->mpb->ibuf_alloc  = 0;
 117     cr->mpb->libuf_alloc = 0;
 118     cr->mpb->fbuf_alloc  = 0;
 119     cr->mpb->dbuf_alloc  = 0;
 120 #endif
 121
 122     return cr;
 123 }
 124
 125 static void done_mpi_in_place_buf(mpi_in_place_buf_t *buf)
 126 {
 127     if (nullptr != buf)
 128     {
 129         sfree(buf->ibuf);
 130         sfree(buf->libuf);
 131         sfree(buf->fbuf);
 132         sfree(buf->dbuf);
 133         sfree(buf);
 134     }
 135 }
 136
 137 void done_commrec(t_commrec *cr)
 138 {
 139 #if GMX_MPI
 140     if (PAR(cr) || MULTISIM(cr))
 141     {
 142         MPI_Comm_free(&cr->mpi_comm_physicalnode);
 143     }
 144 #endif
 145     if (nullptr != cr->dd)
 146     {
 147         // TODO: implement
 148         // done_domdec(cr->dd);
 149     }
 150     if (nullptr != cr->ms)
 151     {
 152         done_mpi_in_place_buf(cr->ms->mpb);
 153         sfree(cr->ms);
 154     }
 155     done_mpi_in_place_buf(cr->mpb);
 156     sfree(cr);
 157 }
 158
 159 t_commrec *reinitialize_commrec_for_this_thread(const t_commrec gmx_unused *cro)
 160 {
 161 #if GMX_THREAD_MPI
 162     t_commrec *cr;
 163
 164     /* make a thread-specific commrec */
 165     snew(cr, 1);
 166     /* now copy the whole thing, so settings like the number of PME nodes
 167        get propagated. */
 168     *cr = *cro;
 169
 170     /* and we start setting our own thread-specific values for things */
 171     gmx_fill_commrec_from_mpi(cr);
 172
 173     // TODO cr->duty should not be initialized here
 174     cr->duty             = (DUTY_PP | DUTY_PME);
 175
 176     return cr;
 177 #else
 178     return nullptr;
 179 #endif
 180 }
 181
 182 void gmx_setup_nodecomm(FILE gmx_unused *fplog, t_commrec *cr)
 183 {
 184     gmx_nodecomm_t *nc;
 185
 186     /* Many MPI implementations do not optimize MPI_Allreduce
 187      * (and probably also other global communication calls)
 188      * for multi-core nodes connected by a network.
 189      * We can optimize such communication by using one MPI call
 190      * within each node and one between the nodes.
 191      * For MVAPICH2 and Intel MPI this reduces the time for
 192      * the global_stat communication by 25%
 193      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 194      * B. Hess, November 2007
 195      */
 196
 197     nc = &cr->nc;
 198
 199     nc->bUse = FALSE;
 200 #if !GMX_THREAD_MPI
 201 #if GMX_MPI
 202     int n, rank;
 203
 204     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 205     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 206
 207     int nodehash = gmx_physicalnode_id_hash();
 208
 209     if (debug)
 210     {
 211         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 212     }
 213
 214
 215     /* The intra-node communicator, split on node number */
 216     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 217     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 218     if (debug)
 219     {
 220         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n",
 221                 rank, nc->rank_intra);
 222     }
 223     /* The inter-node communicator, split on rank_intra.
 224      * We actually only need the one for rank=0,
 225      * but it is easier to create them all.
 226      */
 227     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 228     /* Check if this really created two step communication */
 229     int ng, ni;
 230
 231     MPI_Comm_size(nc->comm_inter, &ng);
 232     MPI_Comm_size(nc->comm_intra, &ni);
 233     if (debug)
 234     {
 235         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 236                 ng, ni);
 237     }
 238
 239     if (getenv("GMX_NO_NODECOMM") == nullptr &&
 240         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 241     {
 242         nc->bUse = TRUE;
 243         if (fplog)
 244         {
 245             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n",
 246                     ng, (real)n/(real)ng);
 247         }
 248         if (nc->rank_intra > 0)
 249         {
 250             MPI_Comm_free(&nc->comm_inter);
 251         }
 252     }
 253     else
 254     {
 255         /* One group or all processes in a separate group, use normal summing */
 256         MPI_Comm_free(&nc->comm_inter);
 257         MPI_Comm_free(&nc->comm_intra);
 258         if (debug)
 259         {
 260             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 261         }
 262     }
 263 #endif
 264 #else
 265     /* tMPI runs only on a single node so just use the nodeid */
 266     nc->rank_intra = cr->nodeid;
 267 #endif
 268 }
 269
 270 void gmx_init_intranode_counters(t_commrec *cr)
 271 {
 272     /* counters for PP+PME and PP-only processes on my physical node */
 273     int nrank_intranode, rank_intranode;
 274     int nrank_pp_intranode, rank_pp_intranode;
 275     /* thread-MPI is not initialized when not running in parallel */
 276 #if GMX_MPI && !GMX_THREAD_MPI
 277     int nrank_world, rank_world;
 278     int i, myhash, *hash, *hash_s, *hash_pp, *hash_pp_s;
 279
 280     MPI_Comm_size(MPI_COMM_WORLD, &nrank_world);
 281     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 282
 283     /* Get a (hopefully unique) hash that identifies our physical node */
 284     myhash = gmx_physicalnode_id_hash();
 285
 286     /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
 287     snew(hash,   nrank_world);
 288     snew(hash_s, nrank_world);
 289     snew(hash_pp,   nrank_world);
 290     snew(hash_pp_s, nrank_world);
 291
 292     hash_s[rank_world]    = myhash;
 293     hash_pp_s[rank_world] = thisRankHasDuty(cr, DUTY_PP) ? myhash : -1;
 294
 295     MPI_Allreduce(hash_s,    hash,    nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 296     MPI_Allreduce(hash_pp_s, hash_pp, nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 297
 298     nrank_intranode    = 0;
 299     rank_intranode     = 0;
 300     nrank_pp_intranode = 0;
 301     rank_pp_intranode  = 0;
 302     for (i = 0; i < nrank_world; i++)
 303     {
 304         if (hash[i] == myhash)
 305         {
 306             nrank_intranode++;
 307             if (i < rank_world)
 308             {
 309                 rank_intranode++;
 310             }
 311         }
 312         if (hash_pp[i] == myhash)
 313         {
 314             nrank_pp_intranode++;
 315             if (thisRankHasDuty(cr, DUTY_PP) && i < rank_world)
 316             {
 317                 rank_pp_intranode++;
 318             }
 319         }
 320     }
 321     sfree(hash);
 322     sfree(hash_s);
 323     sfree(hash_pp);
 324     sfree(hash_pp_s);
 325 #else
 326     /* Serial or thread-MPI code: we run within a single physical node */
 327     nrank_intranode    = cr->nnodes;
 328     rank_intranode     = cr->sim_nodeid;
 329     nrank_pp_intranode = cr->nnodes - cr->npmenodes;
 330     rank_pp_intranode  = cr->nodeid;
 331 #endif
 332
 333     if (debug)
 334     {
 335         char sbuf[STRLEN];
 336         if (thisRankHasDuty(cr, DUTY_PP) && thisRankHasDuty(cr, DUTY_PME))
 337         {
 338             sprintf(sbuf, "PP+PME");
 339         }
 340         else
 341         {
 342             sprintf(sbuf, "%s", thisRankHasDuty(cr, DUTY_PP) ? "PP" : "PME");
 343         }
 344         fprintf(debug, "On %3s rank %d: nrank_intranode=%d, rank_intranode=%d, "
 345                 "nrank_pp_intranode=%d, rank_pp_intranode=%d\n",
 346                 sbuf, cr->sim_nodeid,
 347                 nrank_intranode, rank_intranode,
 348                 nrank_pp_intranode, rank_pp_intranode);
 349     }
 350
 351     cr->nrank_intranode    = nrank_intranode;
 352     cr->rank_intranode     = rank_intranode;
 353     cr->nrank_pp_intranode = nrank_pp_intranode;
 354     cr->rank_pp_intranode  = rank_pp_intranode;
 355 }
 356
 357
 358 void gmx_barrier(const t_commrec gmx_unused *cr)
 359 {
 360 #if !GMX_MPI
 361     gmx_call("gmx_barrier");
 362 #else
 363     MPI_Barrier(cr->mpi_comm_mygroup);
 364 #endif
 365 }
 366
 367 void gmx_barrier_physical_node(const t_commrec gmx_unused *cr)
 368 {
 369 #if !GMX_MPI
 370     gmx_call("gmx_barrier_physical_node");
 371 #else
 372     MPI_Barrier(cr->mpi_comm_physicalnode);
 373 #endif
 374 }
 375
 376 void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 377 {
 378 #if !GMX_MPI
 379     gmx_call("gmx_bast");
 380 #else
 381     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 382 #endif
 383 }
 384
 385 void gmx_bcast_sim(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 386 {
 387 #if !GMX_MPI
 388     gmx_call("gmx_bast");
 389 #else
 390     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 391 #endif
 392 }
 393
 394 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused *cr)
 395 {
 396 #if !GMX_MPI
 397     gmx_call("gmx_sumd");
 398 #else
 399 #if MPI_IN_PLACE_EXISTS
 400     if (cr->nc.bUse)
 401     {
 402         if (cr->nc.rank_intra == 0)
 403         {
 404             /* Use two step summing. */
 405             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 406                        cr->nc.comm_intra);
 407             /* Sum the roots of the internal (intra) buffers. */
 408             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 409                           cr->nc.comm_inter);
 410         }
 411         else
 412         {
 413             /* This is here because of the silly MPI specification
 414                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 415             MPI_Reduce(r, nullptr, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 416         }
 417         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 418     }
 419     else
 420     {
 421         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 422                       cr->mpi_comm_mygroup);
 423     }
 424 #else
 425     int i;
 426
 427     if (nr > cr->mpb->dbuf_alloc)
 428     {
 429         cr->mpb->dbuf_alloc = nr;
 430         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 431     }
 432     if (cr->nc.bUse)
 433     {
 434         /* Use two step summing */
 435         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 436         if (cr->nc.rank_intra == 0)
 437         {
 438             /* Sum with the buffers reversed */
 439             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 440                           cr->nc.comm_inter);
 441         }
 442         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 443     }
 444     else
 445     {
 446         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 447                       cr->mpi_comm_mygroup);
 448         for (i = 0; i < nr; i++)
 449         {
 450             r[i] = cr->mpb->dbuf[i];
 451         }
 452     }
 453 #endif
 454 #endif
 455 }
 456
 457 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused *cr)
 458 {
 459 #if !GMX_MPI
 460     gmx_call("gmx_sumf");
 461 #else
 462 #if MPI_IN_PLACE_EXISTS
 463     if (cr->nc.bUse)
 464     {
 465         /* Use two step summing.  */
 466         if (cr->nc.rank_intra == 0)
 467         {
 468             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 469                        cr->nc.comm_intra);
 470             /* Sum the roots of the internal (intra) buffers */
 471             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 472                           cr->nc.comm_inter);
 473         }
 474         else
 475         {
 476             /* This is here because of the silly MPI specification
 477                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 478             MPI_Reduce(r, nullptr, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 479         }
 480         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 481     }
 482     else
 483     {
 484         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 485     }
 486 #else
 487     int i;
 488
 489     if (nr > cr->mpb->fbuf_alloc)
 490     {
 491         cr->mpb->fbuf_alloc = nr;
 492         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 493     }
 494     if (cr->nc.bUse)
 495     {
 496         /* Use two step summing */
 497         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 498         if (cr->nc.rank_intra == 0)
 499         {
 500             /* Sum with the buffers reversed */
 501             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 502                           cr->nc.comm_inter);
 503         }
 504         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 505     }
 506     else
 507     {
 508         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 509                       cr->mpi_comm_mygroup);
 510         for (i = 0; i < nr; i++)
 511         {
 512             r[i] = cr->mpb->fbuf[i];
 513         }
 514     }
 515 #endif
 516 #endif
 517 }
 518
 519 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused *cr)
 520 {
 521 #if !GMX_MPI
 522     gmx_call("gmx_sumi");
 523 #else
 524 #if MPI_IN_PLACE_EXISTS
 525     if (cr->nc.bUse)
 526     {
 527         /* Use two step summing */
 528         if (cr->nc.rank_intra == 0)
 529         {
 530             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 531             /* Sum with the buffers reversed */
 532             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 533         }
 534         else
 535         {
 536             /* This is here because of the silly MPI specification
 537                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 538             MPI_Reduce(r, nullptr, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 539         }
 540         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 541     }
 542     else
 543     {
 544         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 545     }
 546 #else
 547     int i;
 548
 549     if (nr > cr->mpb->ibuf_alloc)
 550     {
 551         cr->mpb->ibuf_alloc = nr;
 552         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 553     }
 554     if (cr->nc.bUse)
 555     {
 556         /* Use two step summing */
 557         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 558         if (cr->nc.rank_intra == 0)
 559         {
 560             /* Sum with the buffers reversed */
 561             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 562         }
 563         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 564     }
 565     else
 566     {
 567         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 568         for (i = 0; i < nr; i++)
 569         {
 570             r[i] = cr->mpb->ibuf[i];
 571         }
 572     }
 573 #endif
 574 #endif
 575 }
 576
 577 void gmx_sumli(int gmx_unused nr, gmx_int64_t gmx_unused r[], const t_commrec gmx_unused *cr)
 578 {
 579 #if !GMX_MPI
 580     gmx_call("gmx_sumli");
 581 #else
 582 #if MPI_IN_PLACE_EXISTS
 583     if (cr->nc.bUse)
 584     {
 585         /* Use two step summing */
 586         if (cr->nc.rank_intra == 0)
 587         {
 588             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0,
 589                        cr->nc.comm_intra);
 590             /* Sum with the buffers reversed */
 591             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 592                           cr->nc.comm_inter);
 593         }
 594         else
 595         {
 596             /* This is here because of the silly MPI specification
 597                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 598             MPI_Reduce(r, nullptr, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 599         }
 600         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 601     }
 602     else
 603     {
 604         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 605     }
 606 #else
 607     int i;
 608
 609     if (nr > cr->mpb->libuf_alloc)
 610     {
 611         cr->mpb->libuf_alloc = nr;
 612         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 613     }
 614     if (cr->nc.bUse)
 615     {
 616         /* Use two step summing */
 617         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 618                       cr->nc.comm_intra);
 619         if (cr->nc.rank_intra == 0)
 620         {
 621             /* Sum with the buffers reversed */
 622             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM,
 623                           cr->nc.comm_inter);
 624         }
 625         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 626     }
 627     else
 628     {
 629         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 630                       cr->mpi_comm_mygroup);
 631         for (i = 0; i < nr; i++)
 632         {
 633             r[i] = cr->mpb->libuf[i];
 634         }
 635     }
 636 #endif
 637 #endif
 638 }
 639
 640
 641
 642 #if GMX_MPI
 643 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
 644 {
 645 #if MPI_IN_PLACE_EXISTS
 646     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 647 #else
 648     /* this function is only used in code that is not performance critical,
 649        (during setup, when comm_rec is not the appropriate communication
 650        structure), so this isn't as bad as it looks. */
 651     double *buf;
 652     int     i;
 653
 654     snew(buf, nr);
 655     MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 656     for (i = 0; i < nr; i++)
 657     {
 658         r[i] = buf[i];
 659     }
 660     sfree(buf);
 661 #endif
 662 }
 663 #endif
 664
 665 #if GMX_MPI
 666 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
 667 {
 668 #if MPI_IN_PLACE_EXISTS
 669     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 670 #else
 671     /* this function is only used in code that is not performance critical,
 672        (during setup, when comm_rec is not the appropriate communication
 673        structure), so this isn't as bad as it looks. */
 674     float *buf;
 675     int    i;
 676
 677     snew(buf, nr);
 678     MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 679     for (i = 0; i < nr; i++)
 680     {
 681         r[i] = buf[i];
 682     }
 683     sfree(buf);
 684 #endif
 685 }
 686 #endif
 687
 688 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 689 {
 690 #if !GMX_MPI
 691     gmx_call("gmx_sumd_sim");
 692 #else
 693     gmx_sumd_comm(nr, r, ms->mpi_comm_masters);
 694 #endif
 695 }
 696
 697 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 698 {
 699 #if !GMX_MPI
 700     gmx_call("gmx_sumf_sim");
 701 #else
 702     gmx_sumf_comm(nr, r, ms->mpi_comm_masters);
 703 #endif
 704 }
 705
 706 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 707 {
 708 #if !GMX_MPI
 709     gmx_call("gmx_sumi_sim");
 710 #else
 711 #if MPI_IN_PLACE_EXISTS
 712     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 713 #else
 714     /* this is thread-unsafe, but it will do for now: */
 715     int i;
 716
 717     if (nr > ms->mpb->ibuf_alloc)
 718     {
 719         ms->mpb->ibuf_alloc = nr;
 720         srenew(ms->mpb->ibuf, ms->mpb->ibuf_alloc);
 721     }
 722     MPI_Allreduce(r, ms->mpb->ibuf, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 723     for (i = 0; i < nr; i++)
 724     {
 725         r[i] = ms->mpb->ibuf[i];
 726     }
 727 #endif
 728 #endif
 729 }
 730
 731 void gmx_sumli_sim(int gmx_unused nr, gmx_int64_t gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 732 {
 733 #if !GMX_MPI
 734     gmx_call("gmx_sumli_sim");
 735 #else
 736 #if MPI_IN_PLACE_EXISTS
 737     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 738                   ms->mpi_comm_masters);
 739 #else
 740     /* this is thread-unsafe, but it will do for now: */
 741     int i;
 742
 743     if (nr > ms->mpb->libuf_alloc)
 744     {
 745         ms->mpb->libuf_alloc = nr;
 746         srenew(ms->mpb->libuf, ms->mpb->libuf_alloc);
 747     }
 748     MPI_Allreduce(r, ms->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 749                   ms->mpi_comm_masters);
 750     for (i = 0; i < nr; i++)
 751     {
 752         r[i] = ms->mpb->libuf[i];
 753     }
 754 #endif
 755 #endif
 756 }
 757
 758 const char *opt2fn_master(const char *opt, int nfile, const t_filenm fnm[],
 759                           t_commrec *cr)
 760 {
 761     return SIMMASTER(cr) ? opt2fn(opt, nfile, fnm) : nullptr;
 762 }
 763
 764 void gmx_fatal_collective(int f_errno, const char *file, int line,
 765                           MPI_Comm comm, gmx_bool bMaster,
 766                           const char *fmt, ...)
 767 {
 768     va_list  ap;
 769     gmx_bool bFinalize;
 770 #if GMX_MPI
 771     int      result;
 772     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 773     MPI_Comm_compare(comm, MPI_COMM_WORLD, &result);
 774     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 775     bFinalize = (result != MPI_UNEQUAL);
 776 #else
 777     GMX_UNUSED_VALUE(comm);
 778     bFinalize = TRUE;
 779 #endif
 780
 781     va_start(ap, fmt);
 782     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 783     va_end(ap);
 784 }