src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/builder.h"
  54 #include "gromacs/domdec/collect.h"
  55 #include "gromacs/domdec/dlb.h"
  56 #include "gromacs/domdec/dlbtiming.h"
  57 #include "gromacs/domdec/domdec_network.h"
  58 #include "gromacs/domdec/ga2la.h"
  59 #include "gromacs/domdec/gpuhaloexchange.h"
  60 #include "gromacs/domdec/options.h"
  61 #include "gromacs/domdec/partition.h"
  62 #include "gromacs/gmxlib/network.h"
  63 #include "gromacs/gmxlib/nrnb.h"
  64 #include "gromacs/gpu_utils/gpu_utils.h"
  65 #include "gromacs/hardware/hw_info.h"
  66 #include "gromacs/listed_forces/manage_threading.h"
  67 #include "gromacs/math/vec.h"
  68 #include "gromacs/math/vectypes.h"
  69 #include "gromacs/mdlib/calc_verletbuf.h"
  70 #include "gromacs/mdlib/constr.h"
  71 #include "gromacs/mdlib/constraintrange.h"
  72 #include "gromacs/mdlib/updategroups.h"
  73 #include "gromacs/mdlib/vsite.h"
  74 #include "gromacs/mdtypes/commrec.h"
  75 #include "gromacs/mdtypes/forceoutput.h"
  76 #include "gromacs/mdtypes/inputrec.h"
  77 #include "gromacs/mdtypes/mdrunoptions.h"
  78 #include "gromacs/mdtypes/state.h"
  79 #include "gromacs/pbcutil/ishift.h"
  80 #include "gromacs/pbcutil/pbc.h"
  81 #include "gromacs/pulling/pull.h"
  82 #include "gromacs/timing/wallcycle.h"
  83 #include "gromacs/topology/block.h"
  84 #include "gromacs/topology/idef.h"
  85 #include "gromacs/topology/ifunc.h"
  86 #include "gromacs/topology/mtop_lookup.h"
  87 #include "gromacs/topology/mtop_util.h"
  88 #include "gromacs/topology/topology.h"
  89 #include "gromacs/utility/basedefinitions.h"
  90 #include "gromacs/utility/basenetwork.h"
  91 #include "gromacs/utility/cstringutil.h"
  92 #include "gromacs/utility/exceptions.h"
  93 #include "gromacs/utility/fatalerror.h"
  94 #include "gromacs/utility/gmxmpi.h"
  95 #include "gromacs/utility/logger.h"
  96 #include "gromacs/utility/real.h"
  97 #include "gromacs/utility/smalloc.h"
  98 #include "gromacs/utility/strconvert.h"
  99 #include "gromacs/utility/stringstream.h"
 100 #include "gromacs/utility/stringutil.h"
 101 #include "gromacs/utility/textwriter.h"
 102
 103 #include "atomdistribution.h"
 104 #include "box.h"
 105 #include "cellsizes.h"
 106 #include "distribute.h"
 107 #include "domdec_constraints.h"
 108 #include "domdec_internal.h"
 109 #include "domdec_setup.h"
 110 #include "domdec_vsite.h"
 111 #include "redistribute.h"
 112 #include "utility.h"
 113
 114 // TODO remove this when moving domdec into gmx namespace
 115 using gmx::DdRankOrder;
 116 using gmx::DlbOption;
 117 using gmx::DomdecOptions;
 118
 119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 120
 121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 122 #define DD_CGIBS 2
 123
 124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 125 #define DD_FLAG_NRCG  65535
 126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 128
 129 /* The DD zone order */
 130 static const ivec dd_zo[DD_MAXZONE] =
 131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 132
 133 /* The non-bonded zone-pair setup for domain decomposition
 134  * The first number is the i-zone, the second number the first j-zone seen by
 135  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 136  * As is, this is for 3D decomposition, where there are 4 i-zones.
 137  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 138  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 139  */
 140 static const int
 141     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 142                                                  {1, 3, 6},
 143                                                  {2, 5, 6},
 144                                                  {3, 5, 7}};
 145
 146
 147 /*
 148    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 149
 150    static void index2xyz(ivec nc,int ind,ivec xyz)
 151    {
 152    xyz[XX] = ind % nc[XX];
 153    xyz[YY] = (ind / nc[XX]) % nc[YY];
 154    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 155    }
 156  */
 157
 158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 159 {
 160     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 161     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 162     xyz[ZZ] = ind % nc[ZZ];
 163 }
 164
 165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 166 {
 167     int                       ddnodeid = -1;
 168
 169     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 170     const int                 ddindex   = dd_index(dd->nc, c);
 171     if (cartSetup.bCartesianPP_PME)
 172     {
 173         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 174     }
 175     else if (cartSetup.bCartesianPP)
 176     {
 177 #if GMX_MPI
 178         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 179 #endif
 180     }
 181     else
 182     {
 183         ddnodeid = ddindex;
 184     }
 185
 186     return ddnodeid;
 187 }
 188
 189 int ddglatnr(const gmx_domdec_t *dd, int i)
 190 {
 191     int atnr;
 192
 193     if (dd == nullptr)
 194     {
 195         atnr = i + 1;
 196     }
 197     else
 198     {
 199         if (i >= dd->comm->atomRanges.numAtomsTotal())
 200         {
 201             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 202         }
 203         atnr = dd->globalAtomIndices[i] + 1;
 204     }
 205
 206     return atnr;
 207 }
 208
 209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 210 {
 211     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 212     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 213 }
 214
 215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 216 {
 217     int i;
 218
 219     if (state->ddp_count != dd->ddp_count)
 220     {
 221         gmx_incons("The MD state does not match the domain decomposition state");
 222     }
 223
 224     state->cg_gl.resize(dd->ncg_home);
 225     for (i = 0; i < dd->ncg_home; i++)
 226     {
 227         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 228     }
 229
 230     state->ddp_count_cg_gl = dd->ddp_count;
 231 }
 232
 233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 234 {
 235     return &dd->comm->zones;
 236 }
 237
 238 int dd_numAtomsZones(const gmx_domdec_t &dd)
 239 {
 240     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 241 }
 242
 243 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 244 {
 245     return dd.comm->atomRanges.numHomeAtoms();
 246 }
 247
 248 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 249 {
 250     /* We currently set mdatoms entries for all atoms:
 251      * local + non-local + communicated for vsite + constraints
 252      */
 253
 254     return dd->comm->atomRanges.numAtomsTotal();
 255 }
 256
 257 int dd_natoms_vsite(const gmx_domdec_t *dd)
 258 {
 259     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 260 }
 261
 262 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 263 {
 264     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 265     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 266 }
 267
 268 void dd_move_x(gmx_domdec_t             *dd,
 269                const matrix              box,
 270                gmx::ArrayRef<gmx::RVec>  x,
 271                gmx_wallcycle            *wcycle)
 272 {
 273     wallcycle_start(wcycle, ewcMOVEX);
 274
 275     int                    nzone, nat_tot;
 276     gmx_domdec_comm_t     *comm;
 277     gmx_domdec_comm_dim_t *cd;
 278     rvec                   shift = {0, 0, 0};
 279     gmx_bool               bPBC, bScrew;
 280
 281     comm = dd->comm;
 282
 283     nzone   = 1;
 284     nat_tot = comm->atomRanges.numHomeAtoms();
 285     for (int d = 0; d < dd->ndim; d++)
 286     {
 287         bPBC   = (dd->ci[dd->dim[d]] == 0);
 288         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 289         if (bPBC)
 290         {
 291             copy_rvec(box[dd->dim[d]], shift);
 292         }
 293         cd = &comm->cd[d];
 294         for (const gmx_domdec_ind_t &ind : cd->ind)
 295         {
 296             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 297             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 298             int                        n          = 0;
 299             if (!bPBC)
 300             {
 301                 for (int j : ind.index)
 302                 {
 303                     sendBuffer[n] = x[j];
 304                     n++;
 305                 }
 306             }
 307             else if (!bScrew)
 308             {
 309                 for (int j : ind.index)
 310                 {
 311                     /* We need to shift the coordinates */
 312                     for (int d = 0; d < DIM; d++)
 313                     {
 314                         sendBuffer[n][d] = x[j][d] + shift[d];
 315                     }
 316                     n++;
 317                 }
 318             }
 319             else
 320             {
 321                 for (int j : ind.index)
 322                 {
 323                     /* Shift x */
 324                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 325                     /* Rotate y and z.
 326                      * This operation requires a special shift force
 327                      * treatment, which is performed in calc_vir.
 328                      */
 329                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 330                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 331                     n++;
 332                 }
 333             }
 334
 335             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 336
 337             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 338             if (cd->receiveInPlace)
 339             {
 340                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 341             }
 342             else
 343             {
 344                 receiveBuffer = receiveBufferAccess.buffer;
 345             }
 346             /* Send and receive the coordinates */
 347             ddSendrecv(dd, d, dddirBackward,
 348                        sendBuffer, receiveBuffer);
 349
 350             if (!cd->receiveInPlace)
 351             {
 352                 int j = 0;
 353                 for (int zone = 0; zone < nzone; zone++)
 354                 {
 355                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 356                     {
 357                         x[i] = receiveBuffer[j++];
 358                     }
 359                 }
 360             }
 361             nat_tot += ind.nrecv[nzone+1];
 362         }
 363         nzone += nzone;
 364     }
 365
 366     wallcycle_stop(wcycle, ewcMOVEX);
 367 }
 368
 369 void dd_move_f(gmx_domdec_t              *dd,
 370                gmx::ForceWithShiftForces *forceWithShiftForces,
 371                gmx_wallcycle             *wcycle)
 372 {
 373     wallcycle_start(wcycle, ewcMOVEF);
 374
 375     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 376     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 377
 378     gmx_domdec_comm_t       &comm    = *dd->comm;
 379     int                      nzone   = comm.zones.n/2;
 380     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 381     for (int d = dd->ndim-1; d >= 0; d--)
 382     {
 383         /* Only forces in domains near the PBC boundaries need to
 384            consider PBC in the treatment of fshift */
 385         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 386         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 387         /* Determine which shift vector we need */
 388         ivec       vis   = { 0, 0, 0 };
 389         vis[dd->dim[d]]  = 1;
 390         const int  is    = IVEC2IS(vis);
 391
 392         /* Loop over the pulses */
 393         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 394         for (int p = cd.numPulses() - 1; p >= 0; p--)
 395         {
 396             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 397             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 398             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 399
 400             nat_tot                                 -= ind.nrecv[nzone+1];
 401
 402             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 403
 404             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 405             if (cd.receiveInPlace)
 406             {
 407                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 408             }
 409             else
 410             {
 411                 sendBuffer = sendBufferAccess.buffer;
 412                 int j = 0;
 413                 for (int zone = 0; zone < nzone; zone++)
 414                 {
 415                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 416                     {
 417                         sendBuffer[j++] = f[i];
 418                     }
 419                 }
 420             }
 421             /* Communicate the forces */
 422             ddSendrecv(dd, d, dddirForward,
 423                        sendBuffer, receiveBuffer);
 424             /* Add the received forces */
 425             int n = 0;
 426             if (!shiftForcesNeedPbc)
 427             {
 428                 for (int j : ind.index)
 429                 {
 430                     for (int d = 0; d < DIM; d++)
 431                     {
 432                         f[j][d] += receiveBuffer[n][d];
 433                     }
 434                     n++;
 435                 }
 436             }
 437             else if (!applyScrewPbc)
 438             {
 439                 for (int j : ind.index)
 440                 {
 441                     for (int d = 0; d < DIM; d++)
 442                     {
 443                         f[j][d] += receiveBuffer[n][d];
 444                     }
 445                     /* Add this force to the shift force */
 446                     for (int d = 0; d < DIM; d++)
 447                     {
 448                         fshift[is][d] += receiveBuffer[n][d];
 449                     }
 450                     n++;
 451                 }
 452             }
 453             else
 454             {
 455                 for (int j : ind.index)
 456                 {
 457                     /* Rotate the force */
 458                     f[j][XX] += receiveBuffer[n][XX];
 459                     f[j][YY] -= receiveBuffer[n][YY];
 460                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 461                     if (shiftForcesNeedPbc)
 462                     {
 463                         /* Add this force to the shift force */
 464                         for (int d = 0; d < DIM; d++)
 465                         {
 466                             fshift[is][d] += receiveBuffer[n][d];
 467                         }
 468                     }
 469                     n++;
 470                 }
 471             }
 472         }
 473         nzone /= 2;
 474     }
 475     wallcycle_stop(wcycle, ewcMOVEF);
 476 }
 477
 478 /* Convenience function for extracting a real buffer from an rvec buffer
 479  *
 480  * To reduce the number of temporary communication buffers and avoid
 481  * cache polution, we reuse gmx::RVec buffers for storing reals.
 482  * This functions return a real buffer reference with the same number
 483  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 484  */
 485 static gmx::ArrayRef<real>
 486 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 487 {
 488     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 489                                   arrayRef.size());
 490 }
 491
 492 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 493 {
 494     int                    nzone, nat_tot;
 495     gmx_domdec_comm_t     *comm;
 496     gmx_domdec_comm_dim_t *cd;
 497
 498     comm = dd->comm;
 499
 500     nzone   = 1;
 501     nat_tot = comm->atomRanges.numHomeAtoms();
 502     for (int d = 0; d < dd->ndim; d++)
 503     {
 504         cd = &comm->cd[d];
 505         for (const gmx_domdec_ind_t &ind : cd->ind)
 506         {
 507             /* Note: We provision for RVec instead of real, so a factor of 3
 508              * more than needed. The buffer actually already has this size
 509              * and we pass a plain pointer below, so this does not matter.
 510              */
 511             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 512             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 513             int                       n          = 0;
 514             for (int j : ind.index)
 515             {
 516                 sendBuffer[n++] = v[j];
 517             }
 518
 519             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 520
 521             gmx::ArrayRef<real>       receiveBuffer;
 522             if (cd->receiveInPlace)
 523             {
 524                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 525             }
 526             else
 527             {
 528                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 529             }
 530             /* Send and receive the data */
 531             ddSendrecv(dd, d, dddirBackward,
 532                        sendBuffer, receiveBuffer);
 533             if (!cd->receiveInPlace)
 534             {
 535                 int j = 0;
 536                 for (int zone = 0; zone < nzone; zone++)
 537                 {
 538                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 539                     {
 540                         v[i] = receiveBuffer[j++];
 541                     }
 542                 }
 543             }
 544             nat_tot += ind.nrecv[nzone+1];
 545         }
 546         nzone += nzone;
 547     }
 548 }
 549
 550 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 551 {
 552     int                    nzone, nat_tot;
 553     gmx_domdec_comm_t     *comm;
 554     gmx_domdec_comm_dim_t *cd;
 555
 556     comm = dd->comm;
 557
 558     nzone   = comm->zones.n/2;
 559     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 560     for (int d = dd->ndim-1; d >= 0; d--)
 561     {
 562         cd = &comm->cd[d];
 563         for (int p = cd->numPulses() - 1; p >= 0; p--)
 564         {
 565             const gmx_domdec_ind_t &ind = cd->ind[p];
 566
 567             /* Note: We provision for RVec instead of real, so a factor of 3
 568              * more than needed. The buffer actually already has this size
 569              * and we typecast, so this works as intended.
 570              */
 571             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 572             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 573             nat_tot -= ind.nrecv[nzone + 1];
 574
 575             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 576
 577             gmx::ArrayRef<real>       sendBuffer;
 578             if (cd->receiveInPlace)
 579             {
 580                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 581             }
 582             else
 583             {
 584                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 585                 int j = 0;
 586                 for (int zone = 0; zone < nzone; zone++)
 587                 {
 588                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 589                     {
 590                         sendBuffer[j++] = v[i];
 591                     }
 592                 }
 593             }
 594             /* Communicate the forces */
 595             ddSendrecv(dd, d, dddirForward,
 596                        sendBuffer, receiveBuffer);
 597             /* Add the received forces */
 598             int n = 0;
 599             for (int j : ind.index)
 600             {
 601                 v[j] += receiveBuffer[n];
 602                 n++;
 603             }
 604         }
 605         nzone /= 2;
 606     }
 607 }
 608
 609 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 610 {
 611     const gmx_domdec_comm_t &comm       = *dd->comm;
 612     const DDSystemInfo      &systemInfo = comm.systemInfo;
 613
 614     real                     r = -1;
 615     if (systemInfo.haveInterDomainMultiBodyBondeds)
 616     {
 617         if (comm.cutoff_mbody > 0)
 618         {
 619             r = comm.cutoff_mbody;
 620         }
 621         else
 622         {
 623             /* cutoff_mbody=0 means we do not have DLB */
 624             r = comm.cellsize_min[dd->dim[0]];
 625             for (int di = 1; di < dd->ndim; di++)
 626             {
 627                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 628             }
 629             if (comm.systemInfo.filterBondedCommunication)
 630             {
 631                 r = std::max(r, comm.cutoff_mbody);
 632             }
 633             else
 634             {
 635                 r = std::min(r, systemInfo.cutoff);
 636             }
 637         }
 638     }
 639
 640     return r;
 641 }
 642
 643 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 644 {
 645     real r_mb;
 646
 647     r_mb = dd_cutoff_multibody(dd);
 648
 649     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 650 }
 651
 652
 653 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 654                                    const CartesianRankSetup &cartSetup,
 655                                    const ivec                coord,
 656                                    ivec                      coord_pme)
 657 {
 658     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 659     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 660     copy_ivec(coord, coord_pme);
 661     coord_pme[cartSetup.cartpmedim] =
 662         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 663 }
 664
 665 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 666 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 667                             const int          ddCellIndex)
 668 {
 669     const int npp  = ddRankSetup.numPPRanks;
 670     const int npme = ddRankSetup.numRanksDoingPme;
 671
 672     /* Here we assign a PME node to communicate with this DD node
 673      * by assuming that the major index of both is x.
 674      * We add npme/2 to obtain an even distribution.
 675      */
 676     return (ddCellIndex*npme + npme/2)/npp;
 677 }
 678
 679 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 680 {
 681     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 682
 683     int              n = 0;
 684     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 685     {
 686         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 687         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 688         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 689         {
 690             if (debug)
 691             {
 692                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 693             }
 694             pmeRanks[n] = i + 1 + n;
 695             n++;
 696         }
 697     }
 698
 699     return pmeRanks;
 700 }
 701
 702 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 703 {
 704     gmx_domdec_t *dd;
 705     ivec          coords;
 706     int           slab;
 707
 708     dd = cr->dd;
 709     /*
 710        if (dd->comm->bCartesian) {
 711        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 712        dd_coords2pmecoords(dd,coords,coords_pme);
 713        copy_ivec(dd->ntot,nc);
 714        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 715        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 716
 717        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 718        } else {
 719        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 720        }
 721      */
 722     coords[XX] = x;
 723     coords[YY] = y;
 724     coords[ZZ] = z;
 725     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 726
 727     return slab;
 728 }
 729
 730 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 731 {
 732     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 733     ivec                      coords    = { x, y, z };
 734     int                       nodeid    = -1;
 735
 736     if (cartSetup.bCartesianPP_PME)
 737     {
 738 #if GMX_MPI
 739         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 740 #endif
 741     }
 742     else
 743     {
 744         int ddindex = dd_index(cr->dd->nc, coords);
 745         if (cartSetup.bCartesianPP)
 746         {
 747             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 748         }
 749         else
 750         {
 751             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 752             {
 753                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 754             }
 755             else
 756             {
 757                 nodeid = ddindex;
 758             }
 759         }
 760     }
 761
 762     return nodeid;
 763 }
 764
 765 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 766                               const CartesianRankSetup   &cartSetup,
 767                               gmx::ArrayRef<const int>    pmeRanks,
 768                               const t_commrec gmx_unused *cr,
 769                               const int                   sim_nodeid)
 770 {
 771     int                pmenode = -1;
 772
 773     /* This assumes a uniform x domain decomposition grid cell size */
 774     if (cartSetup.bCartesianPP_PME)
 775     {
 776 #if GMX_MPI
 777         ivec coord, coord_pme;
 778         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 779         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 780         {
 781             /* This is a PP rank */
 782             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 783             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 784         }
 785 #endif
 786     }
 787     else if (cartSetup.bCartesianPP)
 788     {
 789         if (sim_nodeid < ddRankSetup.numPPRanks)
 790         {
 791             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 792         }
 793     }
 794     else
 795     {
 796         /* This assumes DD cells with identical x coordinates
 797          * are numbered sequentially.
 798          */
 799         if (pmeRanks.empty())
 800         {
 801             if (sim_nodeid < ddRankSetup.numPPRanks)
 802             {
 803                 /* The DD index equals the nodeid */
 804                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 805             }
 806         }
 807         else
 808         {
 809             int i = 0;
 810             while (sim_nodeid > pmeRanks[i])
 811             {
 812                 i++;
 813             }
 814             if (sim_nodeid < pmeRanks[i])
 815             {
 816                 pmenode = pmeRanks[i];
 817             }
 818         }
 819     }
 820
 821     return pmenode;
 822 }
 823
 824 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 825 {
 826     if (dd != nullptr)
 827     {
 828         return {
 829                    dd->comm->ddRankSetup.npmenodes_x,
 830                    dd->comm->ddRankSetup.npmenodes_y
 831         };
 832     }
 833     else
 834     {
 835         return {
 836                    1, 1
 837         };
 838     }
 839 }
 840
 841 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 842                                  const int        pmenodeid)
 843 {
 844     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 845     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 846     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 847     std::vector<int>          ddranks;
 848     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 849
 850     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 851     {
 852         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 853         {
 854             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 855             {
 856                 if (cartSetup.bCartesianPP_PME)
 857                 {
 858                     ivec coord = { x, y, z };
 859                     ivec coord_pme;
 860                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 861                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 862                         cr->dd->ci[YY] == coord_pme[YY] &&
 863                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 864                     {
 865                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 866                     }
 867                 }
 868                 else
 869                 {
 870                     /* The slab corresponds to the nodeid in the PME group */
 871                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 872                     {
 873                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 874                     }
 875                 }
 876             }
 877         }
 878     }
 879     return ddranks;
 880 }
 881
 882 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 883                                  gmx::ArrayRef<const int>  pmeRanks,
 884                                  const t_commrec          *cr)
 885 {
 886     gmx_bool           bReceive    = TRUE;
 887
 888     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 889     if (ddRankSetup.usePmeOnlyRanks)
 890     {
 891         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 892         if (cartSetup.bCartesianPP_PME)
 893         {
 894 #if GMX_MPI
 895             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 896             ivec coords;
 897             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 898             coords[cartSetup.cartpmedim]++;
 899             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 900             {
 901                 int rank;
 902                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 903                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 904                 {
 905                     /* This is not the last PP node for pmenode */
 906                     bReceive = FALSE;
 907                 }
 908             }
 909 #else
 910             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 911 #endif
 912         }
 913         else
 914         {
 915             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 916             if (cr->sim_nodeid+1 < cr->nnodes &&
 917                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 918             {
 919                 /* This is not the last PP node for pmenode */
 920                 bReceive = FALSE;
 921             }
 922         }
 923     }
 924
 925     return bReceive;
 926 }
 927
 928 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 929 {
 930     gmx_domdec_comm_t *comm;
 931     int                i;
 932
 933     comm = dd->comm;
 934
 935     snew(*dim_f, dd->nc[dim]+1);
 936     (*dim_f)[0] = 0;
 937     for (i = 1; i < dd->nc[dim]; i++)
 938     {
 939         if (comm->slb_frac[dim])
 940         {
 941             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 942         }
 943         else
 944         {
 945             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 946         }
 947     }
 948     (*dim_f)[dd->nc[dim]] = 1;
 949 }
 950
 951 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 952 {
 953     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 954
 955     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 956     {
 957         ddpme->dim = YY;
 958     }
 959     else
 960     {
 961         ddpme->dim = dimind;
 962     }
 963     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 964
 965     ddpme->nslab = (ddpme->dim == 0 ?
 966                     ddRankSetup.npmenodes_x :
 967                     ddRankSetup.npmenodes_y);
 968
 969     if (ddpme->nslab <= 1)
 970     {
 971         return;
 972     }
 973
 974     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
 975     /* Determine for each PME slab the PP location range for dimension dim */
 976     snew(ddpme->pp_min, ddpme->nslab);
 977     snew(ddpme->pp_max, ddpme->nslab);
 978     for (int slab = 0; slab < ddpme->nslab; slab++)
 979     {
 980         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 981         ddpme->pp_max[slab] = 0;
 982     }
 983     for (int i = 0; i < dd->nnodes; i++)
 984     {
 985         ivec xyz;
 986         ddindex2xyz(dd->nc, i, xyz);
 987         /* For y only use our y/z slab.
 988          * This assumes that the PME x grid size matches the DD grid size.
 989          */
 990         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 991         {
 992             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 993             int       slab;
 994             if (dimind == 0)
 995             {
 996                 slab = pmeindex/nso;
 997             }
 998             else
 999             {
1000                 slab = pmeindex % ddpme->nslab;
1001             }
1002             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1003             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1004         }
1005     }
1006
1007     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1008 }
1009
1010 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1011 {
1012     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1013
1014     if (ddRankSetup.ddpme[0].dim == XX)
1015     {
1016         return ddRankSetup.ddpme[0].maxshift;
1017     }
1018     else
1019     {
1020         return 0;
1021     }
1022 }
1023
1024 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1025 {
1026     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1027
1028     if (ddRankSetup.ddpme[0].dim == YY)
1029     {
1030         return ddRankSetup.ddpme[0].maxshift;
1031     }
1032     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1033     {
1034         return ddRankSetup.ddpme[1].maxshift;
1035     }
1036     else
1037     {
1038         return 0;
1039     }
1040 }
1041
1042 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1043 {
1044     return dd.comm->systemInfo.haveSplitConstraints;
1045 }
1046
1047 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1048 {
1049     /* Note that the cycles value can be incorrect, either 0 or some
1050      * extremely large value, when our thread migrated to another core
1051      * with an unsynchronized cycle counter. If this happens less often
1052      * that once per nstlist steps, this will not cause issues, since
1053      * we later subtract the maximum value from the sum over nstlist steps.
1054      * A zero count will slightly lower the total, but that's a small effect.
1055      * Note that the main purpose of the subtraction of the maximum value
1056      * is to avoid throwing off the load balancing when stalls occur due
1057      * e.g. system activity or network congestion.
1058      */
1059     dd->comm->cycl[ddCycl] += cycles;
1060     dd->comm->cycl_n[ddCycl]++;
1061     if (cycles > dd->comm->cycl_max[ddCycl])
1062     {
1063         dd->comm->cycl_max[ddCycl] = cycles;
1064     }
1065 }
1066
1067 #if GMX_MPI
1068 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1069 {
1070     MPI_Comm           c_row;
1071     int                dim, i, rank;
1072     ivec               loc_c;
1073     gmx_bool           bPartOfGroup = FALSE;
1074
1075     dim = dd->dim[dim_ind];
1076     copy_ivec(loc, loc_c);
1077     for (i = 0; i < dd->nc[dim]; i++)
1078     {
1079         loc_c[dim] = i;
1080         rank       = dd_index(dd->nc, loc_c);
1081         if (rank == dd->rank)
1082         {
1083             /* This process is part of the group */
1084             bPartOfGroup = TRUE;
1085         }
1086     }
1087     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1088                    &c_row);
1089     if (bPartOfGroup)
1090     {
1091         dd->comm->mpi_comm_load[dim_ind] = c_row;
1092         if (!isDlbDisabled(dd->comm))
1093         {
1094             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1095
1096             if (dd->ci[dim] == dd->master_ci[dim])
1097             {
1098                 /* This is the root process of this row */
1099                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1100
1101                 RowMaster &rowMaster = *cellsizes.rowMaster;
1102                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1103                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1104                 rowMaster.isCellMin.resize(dd->nc[dim]);
1105                 if (dim_ind > 0)
1106                 {
1107                     rowMaster.bounds.resize(dd->nc[dim]);
1108                 }
1109                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1110             }
1111             else
1112             {
1113                 /* This is not a root process, we only need to receive cell_f */
1114                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1115             }
1116         }
1117         if (dd->ci[dim] == dd->master_ci[dim])
1118         {
1119             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1120         }
1121     }
1122 }
1123 #endif
1124
1125 void dd_setup_dlb_resource_sharing(const t_commrec      *cr,
1126                                    int                   gpu_id)
1127 {
1128 #if GMX_MPI
1129     int           physicalnode_id_hash;
1130     gmx_domdec_t *dd;
1131     MPI_Comm      mpi_comm_pp_physicalnode;
1132
1133     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1134     {
1135         /* Only ranks with short-ranged tasks (currently) use GPUs.
1136          * If we don't have GPUs assigned, there are no resources to share.
1137          */
1138         return;
1139     }
1140
1141     physicalnode_id_hash = gmx_physicalnode_id_hash();
1142
1143     dd = cr->dd;
1144
1145     if (debug)
1146     {
1147         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1148         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1149                 dd->rank, physicalnode_id_hash, gpu_id);
1150     }
1151     /* Split the PP communicator over the physical nodes */
1152     /* TODO: See if we should store this (before), as it's also used for
1153      * for the nodecomm summation.
1154      */
1155     // TODO PhysicalNodeCommunicator could be extended/used to handle
1156     // the need for per-node per-group communicators.
1157     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1158                    &mpi_comm_pp_physicalnode);
1159     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1160                    &dd->comm->mpi_comm_gpu_shared);
1161     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1162     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1163
1164     if (debug)
1165     {
1166         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1167     }
1168
1169     /* Note that some ranks could share a GPU, while others don't */
1170
1171     if (dd->comm->nrank_gpu_shared == 1)
1172     {
1173         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1174     }
1175 #else
1176     GMX_UNUSED_VALUE(cr);
1177     GMX_UNUSED_VALUE(gpu_id);
1178 #endif
1179 }
1180
1181 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1182 {
1183 #if GMX_MPI
1184     int  dim0, dim1, i, j;
1185     ivec loc;
1186
1187     if (debug)
1188     {
1189         fprintf(debug, "Making load communicators\n");
1190     }
1191
1192     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1193     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1194
1195     if (dd->ndim == 0)
1196     {
1197         return;
1198     }
1199
1200     clear_ivec(loc);
1201     make_load_communicator(dd, 0, loc);
1202     if (dd->ndim > 1)
1203     {
1204         dim0 = dd->dim[0];
1205         for (i = 0; i < dd->nc[dim0]; i++)
1206         {
1207             loc[dim0] = i;
1208             make_load_communicator(dd, 1, loc);
1209         }
1210     }
1211     if (dd->ndim > 2)
1212     {
1213         dim0 = dd->dim[0];
1214         for (i = 0; i < dd->nc[dim0]; i++)
1215         {
1216             loc[dim0] = i;
1217             dim1      = dd->dim[1];
1218             for (j = 0; j < dd->nc[dim1]; j++)
1219             {
1220                 loc[dim1] = j;
1221                 make_load_communicator(dd, 2, loc);
1222             }
1223         }
1224     }
1225
1226     if (debug)
1227     {
1228         fprintf(debug, "Finished making load communicators\n");
1229     }
1230 #endif
1231 }
1232
1233 /*! \brief Sets up the relation between neighboring domains and zones */
1234 static void setup_neighbor_relations(gmx_domdec_t *dd)
1235 {
1236     int                     d, dim, m;
1237     ivec                    tmp, s;
1238     gmx_domdec_zones_t     *zones;
1239     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1240
1241     for (d = 0; d < dd->ndim; d++)
1242     {
1243         dim = dd->dim[d];
1244         copy_ivec(dd->ci, tmp);
1245         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1246         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1247         copy_ivec(dd->ci, tmp);
1248         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1249         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1250         if (debug)
1251         {
1252             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1253                     dd->rank, dim,
1254                     dd->neighbor[d][0],
1255                     dd->neighbor[d][1]);
1256         }
1257     }
1258
1259     int nzone  = (1 << dd->ndim);
1260     int nizone = (1 << std::max(dd->ndim - 1, 0));
1261     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1262
1263     zones = &dd->comm->zones;
1264
1265     for (int i = 0; i < nzone; i++)
1266     {
1267         m = 0;
1268         clear_ivec(zones->shift[i]);
1269         for (d = 0; d < dd->ndim; d++)
1270         {
1271             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1272         }
1273     }
1274
1275     zones->n = nzone;
1276     for (int i = 0; i < nzone; i++)
1277     {
1278         for (d = 0; d < DIM; d++)
1279         {
1280             s[d] = dd->ci[d] - zones->shift[i][d];
1281             if (s[d] < 0)
1282             {
1283                 s[d] += dd->nc[d];
1284             }
1285             else if (s[d] >= dd->nc[d])
1286             {
1287                 s[d] -= dd->nc[d];
1288             }
1289         }
1290     }
1291     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1292     {
1293         GMX_RELEASE_ASSERT(ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1294                            "The first element for each ddNonbondedZonePairRanges should match its index");
1295
1296         DDPairInteractionRanges iZone;
1297         iZone.iZoneIndex = iZoneIndex;
1298         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1299          * j-zones up to nzone.
1300          */
1301         iZone.jZoneRange =
1302             gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1303                             std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1304         for (dim = 0; dim < DIM; dim++)
1305         {
1306             if (dd->nc[dim] == 1)
1307             {
1308                 /* All shifts should be allowed */
1309                 iZone.shift0[dim] = -1;
1310                 iZone.shift1[dim] = 1;
1311             }
1312             else
1313             {
1314                 /* Determine the min/max j-zone shift wrt the i-zone */
1315                 iZone.shift0[dim] = 1;
1316                 iZone.shift1[dim] = -1;
1317                 for (int jZone : iZone.jZoneRange)
1318                 {
1319                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1320                     if (shift_diff < iZone.shift0[dim])
1321                     {
1322                         iZone.shift0[dim] = shift_diff;
1323                     }
1324                     if (shift_diff > iZone.shift1[dim])
1325                     {
1326                         iZone.shift1[dim] = shift_diff;
1327                     }
1328                 }
1329             }
1330         }
1331
1332         zones->iZones.push_back(iZone);
1333     }
1334
1335     if (!isDlbDisabled(dd->comm))
1336     {
1337         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1338     }
1339
1340     if (dd->comm->ddSettings.recordLoad)
1341     {
1342         make_load_communicators(dd);
1343     }
1344 }
1345
1346 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1347                                  gmx_domdec_t         *dd,
1348                                  t_commrec gmx_unused *cr,
1349                                  bool gmx_unused       reorder)
1350 {
1351 #if GMX_MPI
1352     gmx_domdec_comm_t  *comm      = dd->comm;
1353     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1354
1355     if (cartSetup.bCartesianPP)
1356     {
1357         /* Set up cartesian communication for the particle-particle part */
1358         GMX_LOG(mdlog.info).appendTextFormatted(
1359                 "Will use a Cartesian communicator: %d x %d x %d",
1360                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1361
1362         ivec periods;
1363         for (int i = 0; i < DIM; i++)
1364         {
1365             periods[i] = TRUE;
1366         }
1367         MPI_Comm comm_cart;
1368         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1369                         &comm_cart);
1370         /* We overwrite the old communicator with the new cartesian one */
1371         cr->mpi_comm_mygroup = comm_cart;
1372     }
1373
1374     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1375     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1376
1377     if (cartSetup.bCartesianPP_PME)
1378     {
1379         /* Since we want to use the original cartesian setup for sim,
1380          * and not the one after split, we need to make an index.
1381          */
1382         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1383         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1384         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1385         /* Get the rank of the DD master,
1386          * above we made sure that the master node is a PP node.
1387          */
1388         int rank;
1389         if (MASTER(cr))
1390         {
1391             rank = dd->rank;
1392         }
1393         else
1394         {
1395             rank = 0;
1396         }
1397         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1398     }
1399     else if (cartSetup.bCartesianPP)
1400     {
1401         if (!comm->ddRankSetup.usePmeOnlyRanks)
1402         {
1403             /* The PP communicator is also
1404              * the communicator for this simulation
1405              */
1406             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1407         }
1408         cr->nodeid = dd->rank;
1409
1410         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1411
1412         /* We need to make an index to go from the coordinates
1413          * to the nodeid of this simulation.
1414          */
1415         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1416         std::vector<int> buf(dd->nnodes);
1417         if (thisRankHasDuty(cr, DUTY_PP))
1418         {
1419             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1420         }
1421         /* Communicate the ddindex to simulation nodeid index */
1422         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1423                       cr->mpi_comm_mysim);
1424
1425         /* Determine the master coordinates and rank.
1426          * The DD master should be the same node as the master of this sim.
1427          */
1428         for (int i = 0; i < dd->nnodes; i++)
1429         {
1430             if (cartSetup.ddindex2simnodeid[i] == 0)
1431             {
1432                 ddindex2xyz(dd->nc, i, dd->master_ci);
1433                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1434             }
1435         }
1436         if (debug)
1437         {
1438             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1439         }
1440     }
1441     else
1442     {
1443         /* No Cartesian communicators */
1444         /* We use the rank in dd->comm->all as DD index */
1445         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1446         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1447         dd->masterrank = 0;
1448         clear_ivec(dd->master_ci);
1449     }
1450 #endif
1451
1452     GMX_LOG(mdlog.info).appendTextFormatted(
1453             "Domain decomposition rank %d, coordinates %d %d %d\n",
1454             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1455     if (debug)
1456     {
1457         fprintf(debug,
1458                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1459                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1460     }
1461 }
1462
1463 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1464                                       t_commrec            *cr)
1465 {
1466 #if GMX_MPI
1467     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1468
1469     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1470     {
1471         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1472         std::vector<int> buf(dd->nnodes);
1473         if (thisRankHasDuty(cr, DUTY_PP))
1474         {
1475             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1476         }
1477         /* Communicate the ddindex to simulation nodeid index */
1478         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1479                       cr->mpi_comm_mysim);
1480     }
1481 #else
1482     GMX_UNUSED_VALUE(dd);
1483     GMX_UNUSED_VALUE(cr);
1484 #endif
1485 }
1486
1487 static CartesianRankSetup
1488 split_communicator(const gmx::MDLogger    &mdlog,
1489                    t_commrec              *cr,
1490                    const DdRankOrder       ddRankOrder,
1491                    bool gmx_unused         reorder,
1492                    const DDRankSetup      &ddRankSetup,
1493                    ivec                    ddCellIndex,
1494                    std::vector<int>       *pmeRanks)
1495 {
1496     CartesianRankSetup cartSetup;
1497
1498     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1499     cartSetup.bCartesianPP_PME = false;
1500
1501     const ivec &numDDCells     = ddRankSetup.numPPCells;
1502     /* Initially we set ntot to the number of PP cells */
1503     copy_ivec(numDDCells, cartSetup.ntot);
1504
1505     if (cartSetup.bCartesianPP)
1506     {
1507         const int numDDCellsTot = ddRankSetup.numPPRanks;
1508         bool      bDiv[DIM];
1509         for (int i = 1; i < DIM; i++)
1510         {
1511             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1512         }
1513         if (bDiv[YY] || bDiv[ZZ])
1514         {
1515             cartSetup.bCartesianPP_PME = TRUE;
1516             /* If we have 2D PME decomposition, which is always in x+y,
1517              * we stack the PME only nodes in z.
1518              * Otherwise we choose the direction that provides the thinnest slab
1519              * of PME only nodes as this will have the least effect
1520              * on the PP communication.
1521              * But for the PME communication the opposite might be better.
1522              */
1523             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1524                              !bDiv[YY] ||
1525                              numDDCells[YY] > numDDCells[ZZ]))
1526             {
1527                 cartSetup.cartpmedim = ZZ;
1528             }
1529             else
1530             {
1531                 cartSetup.cartpmedim = YY;
1532             }
1533             cartSetup.ntot[cartSetup.cartpmedim]
1534                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1535         }
1536         else
1537         {
1538             GMX_LOG(mdlog.info).appendTextFormatted(
1539                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1540                     ddRankSetup.numRanksDoingPme,
1541                     numDDCells[XX], numDDCells[YY],
1542                     numDDCells[XX], numDDCells[ZZ]);
1543             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1544         }
1545     }
1546
1547     if (cartSetup.bCartesianPP_PME)
1548     {
1549 #if GMX_MPI
1550         int  rank;
1551         ivec periods;
1552
1553         GMX_LOG(mdlog.info).appendTextFormatted(
1554                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1555                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1556
1557         for (int i = 0; i < DIM; i++)
1558         {
1559             periods[i] = TRUE;
1560         }
1561         MPI_Comm comm_cart;
1562         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1563                         &comm_cart);
1564         MPI_Comm_rank(comm_cart, &rank);
1565         if (MASTER(cr) && rank != 0)
1566         {
1567             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1568         }
1569
1570         /* With this assigment we loose the link to the original communicator
1571          * which will usually be MPI_COMM_WORLD, unless have multisim.
1572          */
1573         cr->mpi_comm_mysim = comm_cart;
1574         cr->sim_nodeid     = rank;
1575
1576         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1577
1578         GMX_LOG(mdlog.info).appendTextFormatted(
1579                 "Cartesian rank %d, coordinates %d %d %d\n",
1580                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1581
1582         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1583         {
1584             cr->duty = DUTY_PP;
1585         }
1586         if (!ddRankSetup.usePmeOnlyRanks ||
1587             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1588         {
1589             cr->duty = DUTY_PME;
1590         }
1591
1592         /* Split the sim communicator into PP and PME only nodes */
1593         MPI_Comm_split(cr->mpi_comm_mysim,
1594                        getThisRankDuties(cr),
1595                        dd_index(cartSetup.ntot, ddCellIndex),
1596                        &cr->mpi_comm_mygroup);
1597 #else
1598         GMX_UNUSED_VALUE(ddCellIndex);
1599 #endif
1600     }
1601     else
1602     {
1603         switch (ddRankOrder)
1604         {
1605             case DdRankOrder::pp_pme:
1606                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1607                 break;
1608             case DdRankOrder::interleave:
1609                 /* Interleave the PP-only and PME-only ranks */
1610                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1611                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1612                 break;
1613             case DdRankOrder::cartesian:
1614                 break;
1615             default:
1616                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1617         }
1618
1619         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1620         {
1621             cr->duty = DUTY_PME;
1622         }
1623         else
1624         {
1625             cr->duty = DUTY_PP;
1626         }
1627 #if GMX_MPI
1628         /* Split the sim communicator into PP and PME only nodes */
1629         MPI_Comm_split(cr->mpi_comm_mysim,
1630                        getThisRankDuties(cr),
1631                        cr->nodeid,
1632                        &cr->mpi_comm_mygroup);
1633         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1634 #endif
1635     }
1636
1637     GMX_LOG(mdlog.info).appendTextFormatted(
1638             "This rank does only %s work.\n",
1639             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1640
1641     return cartSetup;
1642 }
1643
1644 /*! \brief Makes the PP communicator and the PME communicator, when needed
1645  *
1646  * Returns the Cartesian rank setup.
1647  * Sets \p cr->mpi_comm_mygroup
1648  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1649  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1650  */
1651 static CartesianRankSetup
1652 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1653                        const DDSettings    &ddSettings,
1654                        const DdRankOrder    ddRankOrder,
1655                        const DDRankSetup   &ddRankSetup,
1656                        t_commrec           *cr,
1657                        ivec                 ddCellIndex,
1658                        std::vector<int>    *pmeRanks)
1659 {
1660     CartesianRankSetup cartSetup;
1661
1662     if (ddRankSetup.usePmeOnlyRanks)
1663     {
1664         /* Split the communicator into a PP and PME part */
1665         cartSetup =
1666             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1667                                ddRankSetup, ddCellIndex, pmeRanks);
1668     }
1669     else
1670     {
1671         /* All nodes do PP and PME */
1672         /* We do not require separate communicators */
1673         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1674
1675         cartSetup.bCartesianPP     = false;
1676         cartSetup.bCartesianPP_PME = false;
1677     }
1678
1679     return cartSetup;
1680 }
1681
1682 /*! \brief For PP ranks, sets or makes the communicator
1683  *
1684  * For PME ranks get the rank id.
1685  * For PP only ranks, sets the PME-only rank.
1686  */
1687 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1688                                     const DDSettings         &ddSettings,
1689                                     gmx::ArrayRef<const int>  pmeRanks,
1690                                     t_commrec                *cr,
1691                                     const int                 numAtomsInSystem,
1692                                     gmx_domdec_t             *dd)
1693 {
1694     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1695     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1696
1697     if (thisRankHasDuty(cr, DUTY_PP))
1698     {
1699         /* Copy or make a new PP communicator */
1700
1701         /* We (possibly) reordered the nodes in split_communicator,
1702          * so it is no longer required in make_pp_communicator.
1703          */
1704         const bool useCartesianReorder =
1705             (ddSettings.useCartesianReorder &&
1706              !cartSetup.bCartesianPP_PME);
1707
1708         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1709     }
1710     else
1711     {
1712         receive_ddindex2simnodeid(dd, cr);
1713     }
1714
1715     if (!thisRankHasDuty(cr, DUTY_PME))
1716     {
1717         /* Set up the commnuication to our PME node */
1718         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1719         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1720         if (debug)
1721         {
1722             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1723                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1724         }
1725     }
1726     else
1727     {
1728         dd->pme_nodeid = -1;
1729     }
1730
1731     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1732     if (MASTER(cr))
1733     {
1734         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1735                                                     numAtomsInSystem,
1736                                                     numAtomsInSystem);
1737     }
1738 }
1739
1740 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1741                           const char *dir, int nc, const char *size_string)
1742 {
1743     real  *slb_frac, tot;
1744     int    i, n;
1745     double dbl;
1746
1747     slb_frac = nullptr;
1748     if (nc > 1 && size_string != nullptr)
1749     {
1750         GMX_LOG(mdlog.info).appendTextFormatted(
1751                 "Using static load balancing for the %s direction", dir);
1752         snew(slb_frac, nc);
1753         tot = 0;
1754         for (i = 0; i < nc; i++)
1755         {
1756             dbl = 0;
1757             sscanf(size_string, "%20lf%n", &dbl, &n);
1758             if (dbl == 0)
1759             {
1760                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1761             }
1762             slb_frac[i]  = dbl;
1763             size_string += n;
1764             tot         += slb_frac[i];
1765         }
1766         /* Normalize */
1767         std::string relativeCellSizes = "Relative cell sizes:";
1768         for (i = 0; i < nc; i++)
1769         {
1770             slb_frac[i]       /= tot;
1771             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1772         }
1773         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1774     }
1775
1776     return slb_frac;
1777 }
1778
1779 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1780 {
1781     int                  n     = 0;
1782     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1783     int                  nmol;
1784     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1785     {
1786         for (auto &ilist : extractILists(*ilists, IF_BOND))
1787         {
1788             if (NRAL(ilist.functionType) >  2)
1789             {
1790                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1791             }
1792         }
1793     }
1794
1795     return n;
1796 }
1797
1798 static int dd_getenv(const gmx::MDLogger &mdlog,
1799                      const char *env_var, int def)
1800 {
1801     char *val;
1802     int   nst;
1803
1804     nst = def;
1805     val = getenv(env_var);
1806     if (val)
1807     {
1808         if (sscanf(val, "%20d", &nst) <= 0)
1809         {
1810             nst = 1;
1811         }
1812         GMX_LOG(mdlog.info).appendTextFormatted(
1813                 "Found env.var. %s = %s, using value %d",
1814                 env_var, val, nst);
1815     }
1816
1817     return nst;
1818 }
1819
1820 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1821                                   const t_inputrec    *ir,
1822                                   const gmx::MDLogger &mdlog)
1823 {
1824     if (ir->ePBC == epbcSCREW &&
1825         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1826     {
1827         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1828     }
1829
1830     if (ir->nstlist == 0)
1831     {
1832         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1833     }
1834
1835     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1836     {
1837         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1838     }
1839 }
1840
1841 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1842                                  const ivec         numDomains)
1843 {
1844     real r = ddbox.box_size[XX];
1845     for (int d = 0; d < DIM; d++)
1846     {
1847         if (numDomains[d] > 1)
1848         {
1849             /* Check using the initial average cell size */
1850             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1851         }
1852     }
1853
1854     return r;
1855 }
1856
1857 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1858  */
1859 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1860                                   const std::string   &reasonStr,
1861                                   const gmx::MDLogger &mdlog)
1862 {
1863     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1864     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1865
1866     if (cmdlineDlbState == DlbState::onUser)
1867     {
1868         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1869     }
1870     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1871     {
1872         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1873     }
1874     return DlbState::offForever;
1875 }
1876
1877 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1878  *
1879  * This function parses the parameters of "-dlb" command line option setting
1880  * corresponding state values. Then it checks the consistency of the determined
1881  * state with other run parameters and settings. As a result, the initial state
1882  * may be altered or an error may be thrown if incompatibility of options is detected.
1883  *
1884  * \param [in] mdlog       Logger.
1885  * \param [in] dlbOption   Enum value for the DLB option.
1886  * \param [in] bRecordLoad True if the load balancer is recording load information.
1887  * \param [in] mdrunOptions  Options for mdrun.
1888  * \param [in] ir          Pointer mdrun to input parameters.
1889  * \returns                DLB initial/startup state.
1890  */
1891 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1892                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1893                                          const gmx::MdrunOptions &mdrunOptions,
1894                                          const t_inputrec *ir)
1895 {
1896     DlbState dlbState = DlbState::offCanTurnOn;
1897
1898     switch (dlbOption)
1899     {
1900         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1901         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1902         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1903         default: gmx_incons("Invalid dlbOption enum value");
1904     }
1905
1906     /* Reruns don't support DLB: bail or override auto mode */
1907     if (mdrunOptions.rerun)
1908     {
1909         std::string reasonStr = "it is not supported in reruns.";
1910         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1911     }
1912
1913     /* Unsupported integrators */
1914     if (!EI_DYNAMICS(ir->eI))
1915     {
1916         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1917         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1918     }
1919
1920     /* Without cycle counters we can't time work to balance on */
1921     if (!bRecordLoad)
1922     {
1923         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1924         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1925     }
1926
1927     if (mdrunOptions.reproducible)
1928     {
1929         std::string reasonStr = "you started a reproducible run.";
1930         switch (dlbState)
1931         {
1932             case DlbState::offUser:
1933                 break;
1934             case DlbState::offForever:
1935                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1936                 break;
1937             case DlbState::offCanTurnOn:
1938                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1939             case DlbState::onCanTurnOff:
1940                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1941                 break;
1942             case DlbState::onUser:
1943                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1944             default:
1945                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1946         }
1947     }
1948
1949     return dlbState;
1950 }
1951
1952 static gmx_domdec_comm_t *init_dd_comm()
1953 {
1954     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1955
1956     comm->n_load_have      = 0;
1957     comm->n_load_collect   = 0;
1958
1959     comm->haveTurnedOffDlb = false;
1960
1961     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1962     {
1963         comm->sum_nat[i] = 0;
1964     }
1965     comm->ndecomp   = 0;
1966     comm->nload     = 0;
1967     comm->load_step = 0;
1968     comm->load_sum  = 0;
1969     comm->load_max  = 0;
1970     clear_ivec(comm->load_lim);
1971     comm->load_mdf  = 0;
1972     comm->load_pme  = 0;
1973
1974     /* This should be replaced by a unique pointer */
1975     comm->balanceRegion = ddBalanceRegionAllocate();
1976
1977     return comm;
1978 }
1979
1980 /* Returns whether mtop contains constraints and/or vsites */
1981 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1982 {
1983     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1984     int  nmol;
1985     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1986     {
1987         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1988         {
1989             return true;
1990         }
1991     }
1992
1993     return false;
1994 }
1995
1996 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1997                               const gmx_mtop_t    &mtop,
1998                               const t_inputrec    &inputrec,
1999                               const real           cutoffMargin,
2000                               DDSystemInfo        *systemInfo)
2001 {
2002     /* When we have constraints and/or vsites, it is beneficial to use
2003      * update groups (when possible) to allow independent update of groups.
2004      */
2005     if (!systemHasConstraintsOrVsites(mtop))
2006     {
2007         /* No constraints or vsites, atoms can be updated independently */
2008         return;
2009     }
2010
2011     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2012     systemInfo->useUpdateGroups               =
2013         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2014          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2015
2016     if (systemInfo->useUpdateGroups)
2017     {
2018         int numUpdateGroups = 0;
2019         for (const auto &molblock : mtop.molblock)
2020         {
2021             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2022         }
2023
2024         systemInfo->maxUpdateGroupRadius =
2025             computeMaxUpdateGroupRadius(mtop,
2026                                         systemInfo->updateGroupingPerMoleculetype,
2027                                         maxReferenceTemperature(inputrec));
2028
2029         /* To use update groups, the large domain-to-domain cutoff distance
2030          * should be compatible with the box size.
2031          */
2032         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2033
2034         if (systemInfo->useUpdateGroups)
2035         {
2036             GMX_LOG(mdlog.info).appendTextFormatted(
2037                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2038                     numUpdateGroups,
2039                     mtop.natoms/static_cast<double>(numUpdateGroups),
2040                     systemInfo->maxUpdateGroupRadius);
2041         }
2042         else
2043         {
2044             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2045             systemInfo->updateGroupingPerMoleculetype.clear();
2046         }
2047     }
2048 }
2049
2050 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2051     npbcdim(ePBC2npbcdim(ir.ePBC)),
2052     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2053     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2054     haveScrewPBC(ir.ePBC == epbcSCREW)
2055 {
2056 }
2057
2058 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2059 static bool
2060 moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
2061                         const bool                                   useUpdateGroups,
2062                         gmx::ArrayRef<const gmx::RangePartitioning>  updateGroupingPerMoleculetype)
2063 {
2064     if (useUpdateGroups)
2065     {
2066         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2067                            "Need one grouping per moltype");
2068         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2069         {
2070             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2071             {
2072                 return false;
2073             }
2074         }
2075     }
2076     else
2077     {
2078         for (const auto &moltype : mtop.moltype)
2079         {
2080             if (moltype.atoms.nr > 1)
2081             {
2082                 return false;
2083             }
2084         }
2085     }
2086
2087     return true;
2088 }
2089
2090 /*! \brief Generate the simulation system information */
2091 static DDSystemInfo
2092 getSystemInfo(const gmx::MDLogger           &mdlog,
2093               const t_commrec               *cr,
2094               const DomdecOptions           &options,
2095               const gmx_mtop_t              &mtop,
2096               const t_inputrec              &ir,
2097               const matrix                   box,
2098               gmx::ArrayRef<const gmx::RVec> xGlobal)
2099 {
2100     const real         tenPercentMargin = 1.1;
2101
2102     DDSystemInfo       systemInfo;
2103
2104     /* We need to decide on update groups early, as this affects communication distances */
2105     systemInfo.useUpdateGroups = false;
2106     if (ir.cutoff_scheme == ecutsVERLET)
2107     {
2108         real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2109         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2110     }
2111
2112     systemInfo.moleculesAreAlwaysWhole         =
2113         moleculesAreAlwaysWhole(mtop,
2114                                 systemInfo.useUpdateGroups,
2115                                 systemInfo.updateGroupingPerMoleculetype);
2116     systemInfo.haveInterDomainBondeds          = (!systemInfo.moleculesAreAlwaysWhole ||
2117                                                   mtop.bIntermolecularInteractions);
2118     systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2119                                                   multi_body_bondeds_count(&mtop) > 0);
2120
2121     if (systemInfo.useUpdateGroups)
2122     {
2123         systemInfo.haveSplitConstraints = false;
2124         systemInfo.haveSplitSettles     = false;
2125     }
2126     else
2127     {
2128         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2129                                            gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2130         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2131     }
2132
2133     if (ir.rlist == 0)
2134     {
2135         /* Set the cut-off to some very large value,
2136          * so we don't need if statements everywhere in the code.
2137          * We use sqrt, since the cut-off is squared in some places.
2138          */
2139         systemInfo.cutoff = GMX_CUTOFF_INF;
2140     }
2141     else
2142     {
2143         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2144     }
2145     systemInfo.minCutoffForMultiBody = 0;
2146
2147     /* Determine the minimum cell size limit, affected by many factors */
2148     systemInfo.cellsizeLimit             = 0;
2149     systemInfo.filterBondedCommunication = false;
2150
2151     /* We do not allow home atoms to move beyond the neighboring domain
2152      * between domain decomposition steps, which limits the cell size.
2153      * Get an estimate of cell size limit due to atom displacement.
2154      * In most cases this is a large overestimate, because it assumes
2155      * non-interaction atoms.
2156      * We set the chance to 1 in a trillion steps.
2157      */
2158     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2159     const real     limitForAtomDisplacement          =
2160         minCellSizeForAtomDisplacement(mtop, ir,
2161                                        systemInfo.updateGroupingPerMoleculetype,
2162                                        c_chanceThatAtomMovesBeyondDomain);
2163     GMX_LOG(mdlog.info).appendTextFormatted(
2164             "Minimum cell size due to atom displacement: %.3f nm",
2165             limitForAtomDisplacement);
2166
2167     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2168                                         limitForAtomDisplacement);
2169
2170     /* TODO: PME decomposition currently requires atoms not to be more than
2171      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2172      *       In nearly all cases, limitForAtomDisplacement will be smaller
2173      *       than 2/3*rlist, so the PME requirement is satisfied.
2174      *       But it would be better for both correctness and performance
2175      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2176      *       Note that we would need to improve the pairlist buffer case.
2177      */
2178
2179     if (systemInfo.haveInterDomainBondeds)
2180     {
2181         if (options.minimumCommunicationRange > 0)
2182         {
2183             systemInfo.minCutoffForMultiBody =
2184                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2185             if (options.useBondedCommunication)
2186             {
2187                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2188             }
2189             else
2190             {
2191                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2192                                              systemInfo.minCutoffForMultiBody);
2193             }
2194         }
2195         else if (ir.bPeriodicMols)
2196         {
2197             /* Can not easily determine the required cut-off */
2198             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2199             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2200         }
2201         else
2202         {
2203             real r_2b, r_mb;
2204
2205             if (MASTER(cr))
2206             {
2207                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2208                                       options.checkBondedInteractions,
2209                                       &r_2b, &r_mb);
2210             }
2211             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2212             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2213
2214             /* We use an initial margin of 10% for the minimum cell size,
2215              * except when we are just below the non-bonded cut-off.
2216              */
2217             if (options.useBondedCommunication)
2218             {
2219                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2220                 {
2221                     const real r_bonded              = std::max(r_2b, r_mb);
2222                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2223                     /* This is the (only) place where we turn on the filtering */
2224                     systemInfo.filterBondedCommunication = true;
2225                 }
2226                 else
2227                 {
2228                     const real r_bonded              = r_mb;
2229                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2230                                                                 systemInfo.cutoff);
2231                 }
2232                 /* We determine cutoff_mbody later */
2233                 systemInfo.increaseMultiBodyCutoff = true;
2234             }
2235             else
2236             {
2237                 /* No special bonded communication,
2238                  * simply increase the DD cut-off.
2239                  */
2240                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2241                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2242                                                             systemInfo.minCutoffForMultiBody);
2243             }
2244         }
2245         GMX_LOG(mdlog.info).appendTextFormatted(
2246                 "Minimum cell size due to bonded interactions: %.3f nm",
2247                 systemInfo.minCutoffForMultiBody);
2248
2249         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2250                                             systemInfo.minCutoffForMultiBody);
2251     }
2252
2253     systemInfo.constraintCommunicationRange = 0;
2254     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2255     {
2256         /* There is a cell size limit due to the constraints (P-LINCS) */
2257         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2258         GMX_LOG(mdlog.info).appendTextFormatted(
2259                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2260                 systemInfo.constraintCommunicationRange);
2261         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2262         {
2263             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2264         }
2265     }
2266     else if (options.constraintCommunicationRange > 0)
2267     {
2268         /* Here we do not check for dd->splitConstraints.
2269          * because one can also set a cell size limit for virtual sites only
2270          * and at this point we don't know yet if there are intercg v-sites.
2271          */
2272         GMX_LOG(mdlog.info).appendTextFormatted(
2273                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2274                 options.constraintCommunicationRange);
2275         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2276     }
2277     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2278                                         systemInfo.constraintCommunicationRange);
2279
2280     return systemInfo;
2281 }
2282
2283 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2284  * implemented. */
2285 static void
2286 checkDDGridSetup(const DDGridSetup   &ddGridSetup,
2287                  const t_commrec     *cr,
2288                  const DomdecOptions &options,
2289                  const DDSettings    &ddSettings,
2290                  const DDSystemInfo  &systemInfo,
2291                  const real           cellsizeLimit,
2292                  const gmx_ddbox_t   &ddbox)
2293 {
2294     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2295     {
2296         char     buf[STRLEN];
2297         gmx_bool bC = (systemInfo.haveSplitConstraints &&
2298                        systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2299         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2300                 !bC ? "-rdd" : "-rcon",
2301                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2302                 bC ? " or your LINCS settings" : "");
2303
2304         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2305                              "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2306                              "%s\n"
2307                              "Look in the log file for details on the domain decomposition",
2308                              cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2309                              cellsizeLimit,
2310                              buf);
2311     }
2312
2313     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2314     if (acs < cellsizeLimit)
2315     {
2316         if (options.numCells[XX] <= 0)
2317         {
2318             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2319         }
2320         else
2321         {
2322             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2323                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2324                                  acs, cellsizeLimit);
2325         }
2326     }
2327
2328     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2329     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2330     {
2331         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2332                              "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2333                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2334     }
2335     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2336     {
2337         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2338                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2339     }
2340 }
2341
2342 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2343 static DDRankSetup
2344 getDDRankSetup(const gmx::MDLogger &mdlog,
2345                t_commrec           *cr,
2346                const DDGridSetup   &ddGridSetup,
2347                const t_inputrec    &ir)
2348 {
2349     GMX_LOG(mdlog.info).appendTextFormatted(
2350             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2351             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2352             ddGridSetup.numPmeOnlyRanks);
2353
2354     DDRankSetup ddRankSetup;
2355
2356     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2357     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2358
2359     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2360     if (ddRankSetup.usePmeOnlyRanks)
2361     {
2362         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2363     }
2364     else
2365     {
2366         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2367     }
2368
2369     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2370     {
2371         /* The following choices should match those
2372          * in comm_cost_est in domdec_setup.c.
2373          * Note that here the checks have to take into account
2374          * that the decomposition might occur in a different order than xyz
2375          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2376          * in which case they will not match those in comm_cost_est,
2377          * but since that is mainly for testing purposes that's fine.
2378          */
2379         if (ddGridSetup.numDDDimensions >= 2 &&
2380             ddGridSetup.ddDimensions[0] == XX &&
2381             ddGridSetup.ddDimensions[1] == YY &&
2382             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2383             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2384             getenv("GMX_PMEONEDD") == nullptr)
2385         {
2386             ddRankSetup.npmedecompdim = 2;
2387             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2388             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2389         }
2390         else
2391         {
2392             /* In case nc is 1 in both x and y we could still choose to
2393              * decompose pme in y instead of x, but we use x for simplicity.
2394              */
2395             ddRankSetup.npmedecompdim = 1;
2396             if (ddGridSetup.ddDimensions[0] == YY)
2397             {
2398                 ddRankSetup.npmenodes_x = 1;
2399                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2400             }
2401             else
2402             {
2403                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2404                 ddRankSetup.npmenodes_y = 1;
2405             }
2406         }
2407         GMX_LOG(mdlog.info).appendTextFormatted(
2408                 "PME domain decomposition: %d x %d x %d",
2409                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2410     }
2411     else
2412     {
2413         ddRankSetup.npmedecompdim = 0;
2414         ddRankSetup.npmenodes_x   = 0;
2415         ddRankSetup.npmenodes_y   = 0;
2416     }
2417
2418     return ddRankSetup;
2419 }
2420
2421 /*! \brief Set the cell size and interaction limits */
2422 static void set_dd_limits(const gmx::MDLogger &mdlog,
2423                           t_commrec *cr, gmx_domdec_t *dd,
2424                           const DomdecOptions &options,
2425                           const DDSettings &ddSettings,
2426                           const DDSystemInfo &systemInfo,
2427                           const DDGridSetup &ddGridSetup,
2428                           const int numPPRanks,
2429                           const gmx_mtop_t *mtop,
2430                           const t_inputrec *ir,
2431                           const gmx_ddbox_t &ddbox)
2432 {
2433     gmx_domdec_comm_t *comm = dd->comm;
2434     comm->ddSettings        = ddSettings;
2435
2436     /* Initialize to GPU share count to 0, might change later */
2437     comm->nrank_gpu_shared = 0;
2438
2439     comm->dlbState         = comm->ddSettings.initialDlbState;
2440     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2441     /* To consider turning DLB on after 2*nstlist steps we need to check
2442      * at partitioning count 3. Thus we need to increase the first count by 2.
2443      */
2444     comm->ddPartioningCountFirstDlbOff += 2;
2445
2446     comm->bPMELoadBalDLBLimits          = FALSE;
2447
2448     /* Allocate the charge group/atom sorting struct */
2449     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2450
2451     comm->systemInfo = systemInfo;
2452
2453     if (systemInfo.useUpdateGroups)
2454     {
2455         /* Note: We would like to use dd->nnodes for the atom count estimate,
2456          *       but that is not yet available here. But this anyhow only
2457          *       affect performance up to the second dd_partition_system call.
2458          */
2459         const int homeAtomCountEstimate =  mtop->natoms/numPPRanks;
2460         comm->updateGroupsCog =
2461             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2462                                                    systemInfo.updateGroupingPerMoleculetype,
2463                                                    maxReferenceTemperature(*ir),
2464                                                    homeAtomCountEstimate);
2465     }
2466
2467     /* Set the DD setup given by ddGridSetup */
2468     copy_ivec(ddGridSetup.numDomains, dd->nc);
2469     dd->ndim = ddGridSetup.numDDDimensions;
2470     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2471
2472     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2473
2474     snew(comm->slb_frac, DIM);
2475     if (isDlbDisabled(comm))
2476     {
2477         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2478         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2479         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2480     }
2481
2482     /* Set the multi-body cut-off and cellsize limit for DLB */
2483     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2484     comm->cellsize_limit = systemInfo.cellsizeLimit;
2485     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2486     {
2487         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2488         {
2489             /* Set the bonded communication distance to halfway
2490              * the minimum and the maximum,
2491              * since the extra communication cost is nearly zero.
2492              */
2493             real acs           = average_cellsize_min(ddbox, dd->nc);
2494             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2495             if (!isDlbDisabled(comm))
2496             {
2497                 /* Check if this does not limit the scaling */
2498                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2499                                               options.dlbScaling*acs);
2500             }
2501             if (!systemInfo.filterBondedCommunication)
2502             {
2503                 /* Without bBondComm do not go beyond the n.b. cut-off */
2504                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2505                 if (comm->cellsize_limit >= systemInfo.cutoff)
2506                 {
2507                     /* We don't loose a lot of efficieny
2508                      * when increasing it to the n.b. cut-off.
2509                      * It can even be slightly faster, because we need
2510                      * less checks for the communication setup.
2511                      */
2512                     comm->cutoff_mbody = systemInfo.cutoff;
2513                 }
2514             }
2515             /* Check if we did not end up below our original limit */
2516             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2517                                           systemInfo.minCutoffForMultiBody);
2518
2519             if (comm->cutoff_mbody > comm->cellsize_limit)
2520             {
2521                 comm->cellsize_limit = comm->cutoff_mbody;
2522             }
2523         }
2524         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2525     }
2526
2527     if (debug)
2528     {
2529         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2530                 "cellsize limit %f\n",
2531                 gmx::boolToString(systemInfo.filterBondedCommunication),
2532                 comm->cellsize_limit);
2533     }
2534
2535     if (MASTER(cr))
2536     {
2537         check_dd_restrictions(dd, ir, mdlog);
2538     }
2539 }
2540
2541 void dd_init_bondeds(FILE                       *fplog,
2542                      gmx_domdec_t               *dd,
2543                      const gmx_mtop_t           *mtop,
2544                      const gmx_vsite_t          *vsite,
2545                      const t_inputrec           *ir,
2546                      gmx_bool                    bBCheck,
2547                      cginfo_mb_t                *cginfo_mb)
2548 {
2549     gmx_domdec_comm_t *comm;
2550
2551     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2552
2553     comm = dd->comm;
2554
2555     if (comm->systemInfo.filterBondedCommunication)
2556     {
2557         /* Communicate atoms beyond the cut-off for bonded interactions */
2558         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2559     }
2560     else
2561     {
2562         /* Only communicate atoms based on cut-off */
2563         comm->bondedLinks = nullptr;
2564     }
2565 }
2566
2567 static void writeSettings(gmx::TextWriter       *log,
2568                           gmx_domdec_t          *dd,
2569                           const gmx_mtop_t      *mtop,
2570                           const t_inputrec      *ir,
2571                           gmx_bool               bDynLoadBal,
2572                           real                   dlb_scale,
2573                           const gmx_ddbox_t     *ddbox)
2574 {
2575     gmx_domdec_comm_t *comm;
2576     int                d;
2577     ivec               np;
2578     real               limit, shrink;
2579
2580     comm = dd->comm;
2581
2582     if (bDynLoadBal)
2583     {
2584         log->writeString("The maximum number of communication pulses is:");
2585         for (d = 0; d < dd->ndim; d++)
2586         {
2587             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2588         }
2589         log->ensureLineBreak();
2590         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2591         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2592         log->writeString("The allowed shrink of domain decomposition cells is:");
2593         for (d = 0; d < DIM; d++)
2594         {
2595             if (dd->nc[d] > 1)
2596             {
2597                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2598                 {
2599                     shrink = 0;
2600                 }
2601                 else
2602                 {
2603                     shrink =
2604                         comm->cellsize_min_dlb[d]/
2605                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2606                 }
2607                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2608             }
2609         }
2610         log->ensureLineBreak();
2611     }
2612     else
2613     {
2614         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2615         log->writeString("The initial number of communication pulses is:");
2616         for (d = 0; d < dd->ndim; d++)
2617         {
2618             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2619         }
2620         log->ensureLineBreak();
2621         log->writeString("The initial domain decomposition cell size is:");
2622         for (d = 0; d < DIM; d++)
2623         {
2624             if (dd->nc[d] > 1)
2625             {
2626                 log->writeStringFormatted(" %c %.2f nm",
2627                                           dim2char(d), dd->comm->cellsize_min[d]);
2628             }
2629         }
2630         log->ensureLineBreak();
2631         log->writeLine();
2632     }
2633
2634     const bool haveInterDomainVsites =
2635         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2636
2637     if (comm->systemInfo.haveInterDomainBondeds ||
2638         haveInterDomainVsites ||
2639         comm->systemInfo.haveSplitConstraints ||
2640         comm->systemInfo.haveSplitSettles)
2641     {
2642         std::string decompUnits;
2643         if (comm->systemInfo.useUpdateGroups)
2644         {
2645             decompUnits = "atom groups";
2646         }
2647         else
2648         {
2649             decompUnits = "atoms";
2650         }
2651
2652         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2653         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2654
2655         if (bDynLoadBal)
2656         {
2657             limit = dd->comm->cellsize_limit;
2658         }
2659         else
2660         {
2661             if (dd->unitCellInfo.ddBoxIsDynamic)
2662             {
2663                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2664             }
2665             limit = dd->comm->cellsize_min[XX];
2666             for (d = 1; d < DIM; d++)
2667             {
2668                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2669             }
2670         }
2671
2672         if (comm->systemInfo.haveInterDomainBondeds)
2673         {
2674             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2675                                     "two-body bonded interactions", "(-rdd)",
2676                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2677             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2678                                     "multi-body bonded interactions", "(-rdd)",
2679                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2680         }
2681         if (haveInterDomainVsites)
2682         {
2683             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2684                                     "virtual site constructions", "(-rcon)", limit);
2685         }
2686         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2687         {
2688             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2689                                                        1+ir->nProjOrder);
2690             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2691                                     separation.c_str(), "(-rcon)", limit);
2692         }
2693         log->ensureLineBreak();
2694     }
2695 }
2696
2697 static void logSettings(const gmx::MDLogger &mdlog,
2698                         gmx_domdec_t        *dd,
2699                         const gmx_mtop_t    *mtop,
2700                         const t_inputrec    *ir,
2701                         real                 dlb_scale,
2702                         const gmx_ddbox_t   *ddbox)
2703 {
2704     gmx::StringOutputStream stream;
2705     gmx::TextWriter         log(&stream);
2706     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2707     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2708     {
2709         {
2710             log.ensureEmptyLine();
2711             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2712         }
2713         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2714     }
2715     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2716 }
2717
2718 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2719                                 gmx_domdec_t        *dd,
2720                                 real                 dlb_scale,
2721                                 const t_inputrec    *ir,
2722                                 const gmx_ddbox_t   *ddbox)
2723 {
2724     gmx_domdec_comm_t *comm;
2725     int                d, dim, npulse, npulse_d_max, npulse_d;
2726     gmx_bool           bNoCutOff;
2727
2728     comm = dd->comm;
2729
2730     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2731
2732     /* Determine the maximum number of comm. pulses in one dimension */
2733
2734     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2735
2736     /* Determine the maximum required number of grid pulses */
2737     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2738     {
2739         /* Only a single pulse is required */
2740         npulse = 1;
2741     }
2742     else if (!bNoCutOff && comm->cellsize_limit > 0)
2743     {
2744         /* We round down slightly here to avoid overhead due to the latency
2745          * of extra communication calls when the cut-off
2746          * would be only slightly longer than the cell size.
2747          * Later cellsize_limit is redetermined,
2748          * so we can not miss interactions due to this rounding.
2749          */
2750         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2751     }
2752     else
2753     {
2754         /* There is no cell size limit */
2755         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2756     }
2757
2758     if (!bNoCutOff && npulse > 1)
2759     {
2760         /* See if we can do with less pulses, based on dlb_scale */
2761         npulse_d_max = 0;
2762         for (d = 0; d < dd->ndim; d++)
2763         {
2764             dim      = dd->dim[d];
2765             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2766                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2767             npulse_d_max = std::max(npulse_d_max, npulse_d);
2768         }
2769         npulse = std::min(npulse, npulse_d_max);
2770     }
2771
2772     /* This env var can override npulse */
2773     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2774     if (d > 0)
2775     {
2776         npulse = d;
2777     }
2778
2779     comm->maxpulse       = 1;
2780     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2781     for (d = 0; d < dd->ndim; d++)
2782     {
2783         if (comm->ddSettings.request1DAnd1Pulse)
2784         {
2785             comm->cd[d].np_dlb = 1;
2786         }
2787         else
2788         {
2789             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2790             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2791         }
2792         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2793         {
2794             comm->bVacDLBNoLimit = FALSE;
2795         }
2796     }
2797
2798     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2799     if (!comm->bVacDLBNoLimit)
2800     {
2801         comm->cellsize_limit = std::max(comm->cellsize_limit,
2802                                         comm->systemInfo.cutoff/comm->maxpulse);
2803     }
2804     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2805     /* Set the minimum cell size for each DD dimension */
2806     for (d = 0; d < dd->ndim; d++)
2807     {
2808         if (comm->bVacDLBNoLimit ||
2809             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2810         {
2811             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2812         }
2813         else
2814         {
2815             comm->cellsize_min_dlb[dd->dim[d]] =
2816                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2817         }
2818     }
2819     if (comm->cutoff_mbody <= 0)
2820     {
2821         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2822     }
2823     if (isDlbOn(comm))
2824     {
2825         set_dlb_limits(dd);
2826     }
2827 }
2828
2829 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2830 {
2831     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2832 }
2833
2834 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2835 {
2836     /* If each molecule is a single charge group
2837      * or we use domain decomposition for each periodic dimension,
2838      * we do not need to take pbc into account for the bonded interactions.
2839      */
2840     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2841             !(dd->nc[XX] > 1 &&
2842               dd->nc[YY] > 1 &&
2843               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2844 }
2845
2846 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2847 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2848                                   gmx_domdec_t *dd, real dlb_scale,
2849                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2850                                   const gmx_ddbox_t *ddbox)
2851 {
2852     gmx_domdec_comm_t *comm        = dd->comm;
2853     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2854
2855     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2856     {
2857         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2858         if (ddRankSetup.npmedecompdim >= 2)
2859         {
2860             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2861         }
2862     }
2863     else
2864     {
2865         ddRankSetup.numRanksDoingPme = 0;
2866         if (dd->pme_nodeid >= 0)
2867         {
2868             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2869                                  "Can not have separate PME ranks without PME electrostatics");
2870         }
2871     }
2872
2873     if (debug)
2874     {
2875         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2876     }
2877     if (!isDlbDisabled(comm))
2878     {
2879         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2880     }
2881
2882     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2883
2884     real vol_frac;
2885     if (ir->ePBC == epbcNONE)
2886     {
2887         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2888     }
2889     else
2890     {
2891         vol_frac =
2892             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2893     }
2894     if (debug)
2895     {
2896         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2897     }
2898     int natoms_tot = mtop->natoms;
2899
2900     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
2901                                      static_cast<int>(vol_frac*natoms_tot));
2902 }
2903
2904 /*! \brief Get some important DD parameters which can be modified by env.vars */
2905 static DDSettings
2906 getDDSettings(const gmx::MDLogger     &mdlog,
2907               const DomdecOptions     &options,
2908               const gmx::MdrunOptions &mdrunOptions,
2909               const t_inputrec        &ir)
2910 {
2911     DDSettings ddSettings;
2912
2913     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2914     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2915     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2916     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2917     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2918     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2919     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2920     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2921     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2922     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2923
2924     if (ddSettings.useSendRecv2)
2925     {
2926         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2927     }
2928
2929     if (ddSettings.eFlop)
2930     {
2931         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2932         ddSettings.recordLoad = true;
2933     }
2934     else
2935     {
2936         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2937     }
2938
2939     ddSettings.initialDlbState =
2940         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2941     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2942                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2943
2944     return ddSettings;
2945 }
2946
2947 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2948     unitCellInfo(ir)
2949 {
2950 }
2951
2952 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2953  *
2954  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2955  * generally requires a larger box than other possible decompositions
2956  * with the same rank count, so the calling code might need to decide
2957  * what is the most appropriate way to run the simulation based on
2958  * whether such a DD is possible.
2959  *
2960  * This function works like init_domain_decomposition(), but will not
2961  * give a fatal error, and only uses \c cr for communicating between
2962  * ranks.
2963  *
2964  * It is safe to call before thread-MPI spawns ranks, so that
2965  * thread-MPI can decide whether and how to trigger the GPU halo
2966  * exchange code path. The number of PME ranks, if any, should be set
2967  * in \c options.numPmeRanks.
2968  */
2969 static bool
2970 canMake1DAnd1PulseDomainDecomposition(const DDSettings              &ddSettingsOriginal,
2971                                       const t_commrec               *cr,
2972                                       const int                      numRanksRequested,
2973                                       const DomdecOptions           &options,
2974                                       const gmx_mtop_t              &mtop,
2975                                       const t_inputrec              &ir,
2976                                       const matrix                   box,
2977                                       gmx::ArrayRef<const gmx::RVec> xGlobal)
2978 {
2979     // Ensure we don't write any output from this checking routine
2980     gmx::MDLogger dummyLogger;
2981
2982     DDSystemInfo  systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2983
2984     DDSettings    ddSettings = ddSettingsOriginal;
2985     ddSettings.request1DAnd1Pulse = true;
2986     const real    gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2987                                                                        !isDlbDisabled(ddSettings.initialDlbState),
2988                                                                        options.dlbScaling, ir,
2989                                                                        systemInfo.cellsizeLimit);
2990     gmx_ddbox_t ddbox       = {0};
2991     DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2992                                              ddSettings, systemInfo, gridSetupCellsizeLimit,
2993                                              mtop, ir, box, xGlobal, &ddbox);
2994
2995     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2996
2997     return canMakeDDWith1DAnd1Pulse;
2998 }
2999
3000 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
3001 {
3002     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
3003     const int  productOfDimensionSizes      = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
3004     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
3005
3006     const bool hasMax1Pulse =
3007         ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
3008          (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3009
3010     return decompositionHasOneDimension && hasMax1Pulse;
3011
3012 }
3013
3014 namespace gmx
3015 {
3016
3017 // TODO once the functionality stablizes, move this class and
3018 // supporting functionality into builder.cpp
3019 /*! \brief Impl class for DD builder */
3020 class DomainDecompositionBuilder::Impl
3021 {
3022     public:
3023         //! Constructor
3024         Impl(const MDLogger      &mdlog,
3025              t_commrec           *cr,
3026              const DomdecOptions &options,
3027              const MdrunOptions  &mdrunOptions,
3028              bool                 prefer1DAnd1Pulse,
3029              const gmx_mtop_t    &mtop,
3030              const t_inputrec    &ir,
3031              const matrix         box,
3032              ArrayRef<const RVec> xGlobal);
3033
3034         //! Build the resulting DD manager
3035         gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3036
3037         //! Objects used in constructing and configuring DD
3038         //! {
3039         //! Logging object
3040         const MDLogger      &mdlog_;
3041         //! Communication object
3042         t_commrec           *cr_;
3043         //! User-supplied options configuring DD behavior
3044         const DomdecOptions  options_;
3045         //! Global system topology
3046         const gmx_mtop_t    &mtop_;
3047         //! User input values from the tpr file
3048         const t_inputrec    &ir_;
3049         //! }
3050
3051         //! Internal objects used in constructing DD
3052         //! {
3053         //! Settings combined from the user input
3054         DDSettings           ddSettings_;
3055         //! Information derived from the simulation system
3056         DDSystemInfo         systemInfo_;
3057         //! Box structure
3058         gmx_ddbox_t          ddbox_ = { 0 };
3059         //! Organization of the DD grids
3060         DDGridSetup          ddGridSetup_;
3061         //! Organzation of the DD ranks
3062         DDRankSetup          ddRankSetup_;
3063         //! Number of DD cells in each dimension
3064         ivec                 ddCellIndex_ = { 0, 0, 0 };
3065         //! IDs of PME-only ranks
3066         std::vector<int>     pmeRanks_;
3067         //! Contains a valid Cartesian-communicator-based setup, or defaults.
3068         CartesianRankSetup   cartSetup_;
3069         //! }
3070
3071 };
3072
3073 DomainDecompositionBuilder::Impl::Impl(const MDLogger      &mdlog,
3074                                        t_commrec           *cr,
3075                                        const DomdecOptions &options,
3076                                        const MdrunOptions  &mdrunOptions,
3077                                        const bool           prefer1DAnd1Pulse,
3078                                        const gmx_mtop_t    &mtop,
3079                                        const t_inputrec    &ir,
3080                                        const matrix         box,
3081                                        ArrayRef<const RVec> xGlobal)
3082     : mdlog_(mdlog),
3083       cr_(cr),
3084       options_(options),
3085       mtop_(mtop),
3086       ir_(ir)
3087 {
3088     GMX_LOG(mdlog_.info).appendTextFormatted(
3089             "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3090
3091     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3092
3093     if (prefer1DAnd1Pulse &&
3094         canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3095                                               mtop_, ir_, box, xGlobal))
3096     {
3097         ddSettings_.request1DAnd1Pulse = true;
3098     }
3099
3100     if (ddSettings_.eFlop > 1)
3101     {
3102         /* Ensure that we have different random flop counts on different ranks */
3103         srand(1 + cr_->nodeid);
3104     }
3105
3106     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3107
3108     const int numRanksRequested = cr_->nnodes;
3109     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3110
3111     // DD grid setup uses a more different cell size limit for
3112     // automated setup than the one in systemInfo_. The latter is used
3113     // in set_dd_limits() to configure DLB, for example.
3114     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3115                                                                     !isDlbDisabled(ddSettings_.initialDlbState),
3116                                                                     options_.dlbScaling, ir_,
3117                                                                     systemInfo_.cellsizeLimit);
3118     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3119                                   ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3120                                   mtop_, ir_, box, xGlobal, &ddbox_);
3121     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3122
3123     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3124
3125     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3126
3127     /* Generate the group communicator, also decides the duty of each rank */
3128     cartSetup_   =
3129         makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3130                                ddRankSetup_, cr_,
3131                                ddCellIndex_, &pmeRanks_);
3132 }
3133
3134 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3135 {
3136     gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3137
3138     copy_ivec(ddCellIndex_, dd->ci);
3139
3140     dd->comm = init_dd_comm();
3141
3142     dd->comm->ddRankSetup        = ddRankSetup_;
3143     dd->comm->cartesianRankSetup = cartSetup_;
3144
3145     set_dd_limits(mdlog_, cr_, dd, options_,
3146                   ddSettings_, systemInfo_,
3147                   ddGridSetup_,
3148                   ddRankSetup_.numPPRanks,
3149                   &mtop_, &ir_,
3150                   ddbox_);
3151
3152     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3153
3154     if (thisRankHasDuty(cr_, DUTY_PP))
3155     {
3156         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3157
3158         setup_neighbor_relations(dd);
3159     }
3160
3161     /* Set overallocation to avoid frequent reallocation of arrays */
3162     set_over_alloc_dd(TRUE);
3163
3164     dd->atomSets = atomSets;
3165
3166     return dd;
3167 }
3168
3169 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger      &mdlog,
3170                                                        t_commrec           *cr,
3171                                                        const DomdecOptions &options,
3172                                                        const MdrunOptions  &mdrunOptions,
3173                                                        const bool           prefer1DAnd1Pulse,
3174                                                        const gmx_mtop_t    &mtop,
3175                                                        const t_inputrec    &ir,
3176                                                        const matrix         box,
3177                                                        ArrayRef<const RVec> xGlobal)
3178     : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3179 {
3180 }
3181
3182 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3183 {
3184     return impl_->build(atomSets);
3185 }
3186
3187 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3188
3189 } // namespace gmx
3190
3191 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3192                                const matrix                   box,
3193                                gmx::ArrayRef<const gmx::RVec> x,
3194                                real                           cutoffRequested)
3195 {
3196     gmx_domdec_t *dd;
3197     gmx_ddbox_t   ddbox;
3198     int           d, dim, np;
3199     real          inv_cell_size;
3200     int           LocallyLimited;
3201
3202     dd = cr->dd;
3203
3204     set_ddbox(*dd, false, box, true, x, &ddbox);
3205
3206     LocallyLimited = 0;
3207
3208     for (d = 0; d < dd->ndim; d++)
3209     {
3210         dim = dd->dim[d];
3211
3212         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3213         if (dd->unitCellInfo.ddBoxIsDynamic)
3214         {
3215             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3216         }
3217
3218         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3219
3220         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3221         {
3222             return FALSE;
3223         }
3224
3225         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3226         {
3227             if (np > dd->comm->cd[d].np_dlb)
3228             {
3229                 return FALSE;
3230             }
3231
3232             /* If a current local cell size is smaller than the requested
3233              * cut-off, we could still fix it, but this gets very complicated.
3234              * Without fixing here, we might actually need more checks.
3235              */
3236             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3237             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3238             {
3239                 LocallyLimited = 1;
3240             }
3241         }
3242     }
3243
3244     if (!isDlbDisabled(dd->comm))
3245     {
3246         /* If DLB is not active yet, we don't need to check the grid jumps.
3247          * Actually we shouldn't, because then the grid jump data is not set.
3248          */
3249         if (isDlbOn(dd->comm) &&
3250             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3251         {
3252             LocallyLimited = 1;
3253         }
3254
3255         gmx_sumi(1, &LocallyLimited, cr);
3256
3257         if (LocallyLimited > 0)
3258         {
3259             return FALSE;
3260         }
3261     }
3262
3263     return TRUE;
3264 }
3265
3266 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3267                           const matrix                   box,
3268                           gmx::ArrayRef<const gmx::RVec> x,
3269                           real                           cutoffRequested)
3270 {
3271     gmx_bool bCutoffAllowed;
3272
3273     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3274
3275     if (bCutoffAllowed)
3276     {
3277         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3278     }
3279
3280     return bCutoffAllowed;
3281 }