src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 #include "gmxpre.h"
  38
  39 #include "domdec.h"
  40
  41 #include "config.h"
  42
  43 #include <cassert>
  44 #include <cinttypes>
  45 #include <climits>
  46 #include <cmath>
  47 #include <cstdio>
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <memory>
  53
  54 #include "gromacs/domdec/builder.h"
  55 #include "gromacs/domdec/collect.h"
  56 #include "gromacs/domdec/dlb.h"
  57 #include "gromacs/domdec/dlbtiming.h"
  58 #include "gromacs/domdec/domdec_network.h"
  59 #include "gromacs/domdec/ga2la.h"
  60 #include "gromacs/domdec/gpuhaloexchange.h"
  61 #include "gromacs/domdec/options.h"
  62 #include "gromacs/domdec/partition.h"
  63 #include "gromacs/gmxlib/network.h"
  64 #include "gromacs/gmxlib/nrnb.h"
  65 #include "gromacs/gpu_utils/gpu_utils.h"
  66 #include "gromacs/hardware/hw_info.h"
  67 #include "gromacs/listed_forces/manage_threading.h"
  68 #include "gromacs/math/vec.h"
  69 #include "gromacs/math/vectypes.h"
  70 #include "gromacs/mdlib/calc_verletbuf.h"
  71 #include "gromacs/mdlib/constr.h"
  72 #include "gromacs/mdlib/constraintrange.h"
  73 #include "gromacs/mdlib/updategroups.h"
  74 #include "gromacs/mdlib/vsite.h"
  75 #include "gromacs/mdtypes/commrec.h"
  76 #include "gromacs/mdtypes/forceoutput.h"
  77 #include "gromacs/mdtypes/inputrec.h"
  78 #include "gromacs/mdtypes/mdrunoptions.h"
  79 #include "gromacs/mdtypes/state.h"
  80 #include "gromacs/pbcutil/ishift.h"
  81 #include "gromacs/pbcutil/pbc.h"
  82 #include "gromacs/pulling/pull.h"
  83 #include "gromacs/timing/wallcycle.h"
  84 #include "gromacs/topology/block.h"
  85 #include "gromacs/topology/idef.h"
  86 #include "gromacs/topology/ifunc.h"
  87 #include "gromacs/topology/mtop_lookup.h"
  88 #include "gromacs/topology/mtop_util.h"
  89 #include "gromacs/topology/topology.h"
  90 #include "gromacs/utility/basedefinitions.h"
  91 #include "gromacs/utility/basenetwork.h"
  92 #include "gromacs/utility/cstringutil.h"
  93 #include "gromacs/utility/exceptions.h"
  94 #include "gromacs/utility/fatalerror.h"
  95 #include "gromacs/utility/gmxmpi.h"
  96 #include "gromacs/utility/logger.h"
  97 #include "gromacs/utility/real.h"
  98 #include "gromacs/utility/smalloc.h"
  99 #include "gromacs/utility/strconvert.h"
 100 #include "gromacs/utility/stringstream.h"
 101 #include "gromacs/utility/stringutil.h"
 102 #include "gromacs/utility/textwriter.h"
 103
 104 #include "atomdistribution.h"
 105 #include "box.h"
 106 #include "cellsizes.h"
 107 #include "distribute.h"
 108 #include "domdec_constraints.h"
 109 #include "domdec_internal.h"
 110 #include "domdec_setup.h"
 111 #include "domdec_vsite.h"
 112 #include "redistribute.h"
 113 #include "utility.h"
 114
 115 // TODO remove this when moving domdec into gmx namespace
 116 using gmx::DdRankOrder;
 117 using gmx::DlbOption;
 118 using gmx::DomdecOptions;
 119
 120 static const char* edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 121
 122 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 123 #define DD_CGIBS 2
 124
 125 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 126 #define DD_FLAG_NRCG 65535
 127 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 128 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 129
 130 /* The DD zone order */
 131 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 132                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 133
 134 /* The non-bonded zone-pair setup for domain decomposition
 135  * The first number is the i-zone, the second number the first j-zone seen by
 136  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 137  * As is, this is for 3D decomposition, where there are 4 i-zones.
 138  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 139  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 140  */
 141 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 142                                                                { 1, 3, 6 },
 143                                                                { 2, 5, 6 },
 144                                                                { 3, 5, 7 } };
 145
 146 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 147 {
 148     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 149     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 150     xyz[ZZ] = ind % nc[ZZ];
 151 }
 152
 153 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 154 {
 155     int ddnodeid = -1;
 156
 157     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 158     const int                 ddindex   = dd_index(dd->numCells, c);
 159     if (cartSetup.bCartesianPP_PME)
 160     {
 161         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 162     }
 163     else if (cartSetup.bCartesianPP)
 164     {
 165 #if GMX_MPI
 166         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 167 #endif
 168     }
 169     else
 170     {
 171         ddnodeid = ddindex;
 172     }
 173
 174     return ddnodeid;
 175 }
 176
 177 int ddglatnr(const gmx_domdec_t* dd, int i)
 178 {
 179     int atnr;
 180
 181     if (dd == nullptr)
 182     {
 183         atnr = i + 1;
 184     }
 185     else
 186     {
 187         if (i >= dd->comm->atomRanges.numAtomsTotal())
 188         {
 189             gmx_fatal(FARGS,
 190                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 191                       i, dd->comm->atomRanges.numAtomsTotal());
 192         }
 193         atnr = dd->globalAtomIndices[i] + 1;
 194     }
 195
 196     return atnr;
 197 }
 198
 199 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
 200 {
 201     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 202     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 203 }
 204
 205 void dd_store_state(gmx_domdec_t* dd, t_state* state)
 206 {
 207     int i;
 208
 209     if (state->ddp_count != dd->ddp_count)
 210     {
 211         gmx_incons("The MD state does not match the domain decomposition state");
 212     }
 213
 214     state->cg_gl.resize(dd->ncg_home);
 215     for (i = 0; i < dd->ncg_home; i++)
 216     {
 217         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 218     }
 219
 220     state->ddp_count_cg_gl = dd->ddp_count;
 221 }
 222
 223 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 224 {
 225     return &dd->comm->zones;
 226 }
 227
 228 int dd_numAtomsZones(const gmx_domdec_t& dd)
 229 {
 230     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 231 }
 232
 233 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 234 {
 235     return dd.comm->atomRanges.numHomeAtoms();
 236 }
 237
 238 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
 239 {
 240     /* We currently set mdatoms entries for all atoms:
 241      * local + non-local + communicated for vsite + constraints
 242      */
 243
 244     return dd->comm->atomRanges.numAtomsTotal();
 245 }
 246
 247 int dd_natoms_vsite(const gmx_domdec_t* dd)
 248 {
 249     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 250 }
 251
 252 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
 253 {
 254     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 255     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 256 }
 257
 258 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 259 {
 260     wallcycle_start(wcycle, ewcMOVEX);
 261
 262     int                    nzone, nat_tot;
 263     gmx_domdec_comm_t*     comm;
 264     gmx_domdec_comm_dim_t* cd;
 265     rvec                   shift = { 0, 0, 0 };
 266     gmx_bool               bPBC, bScrew;
 267
 268     comm = dd->comm;
 269
 270     nzone   = 1;
 271     nat_tot = comm->atomRanges.numHomeAtoms();
 272     for (int d = 0; d < dd->ndim; d++)
 273     {
 274         bPBC   = (dd->ci[dd->dim[d]] == 0);
 275         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 276         if (bPBC)
 277         {
 278             copy_rvec(box[dd->dim[d]], shift);
 279         }
 280         cd = &comm->cd[d];
 281         for (const gmx_domdec_ind_t& ind : cd->ind)
 282         {
 283             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 284             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 285             int                       n          = 0;
 286             if (!bPBC)
 287             {
 288                 for (int j : ind.index)
 289                 {
 290                     sendBuffer[n] = x[j];
 291                     n++;
 292                 }
 293             }
 294             else if (!bScrew)
 295             {
 296                 for (int j : ind.index)
 297                 {
 298                     /* We need to shift the coordinates */
 299                     for (int d = 0; d < DIM; d++)
 300                     {
 301                         sendBuffer[n][d] = x[j][d] + shift[d];
 302                     }
 303                     n++;
 304                 }
 305             }
 306             else
 307             {
 308                 for (int j : ind.index)
 309                 {
 310                     /* Shift x */
 311                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 312                     /* Rotate y and z.
 313                      * This operation requires a special shift force
 314                      * treatment, which is performed in calc_vir.
 315                      */
 316                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 317                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 318                     n++;
 319                 }
 320             }
 321
 322             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 323                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 324
 325             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 326             if (cd->receiveInPlace)
 327             {
 328                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 329             }
 330             else
 331             {
 332                 receiveBuffer = receiveBufferAccess.buffer;
 333             }
 334             /* Send and receive the coordinates */
 335             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 336
 337             if (!cd->receiveInPlace)
 338             {
 339                 int j = 0;
 340                 for (int zone = 0; zone < nzone; zone++)
 341                 {
 342                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 343                     {
 344                         x[i] = receiveBuffer[j++];
 345                     }
 346                 }
 347             }
 348             nat_tot += ind.nrecv[nzone + 1];
 349         }
 350         nzone += nzone;
 351     }
 352
 353     wallcycle_stop(wcycle, ewcMOVEX);
 354 }
 355
 356 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 357 {
 358     wallcycle_start(wcycle, ewcMOVEF);
 359
 360     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 361     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 362
 363     gmx_domdec_comm_t& comm    = *dd->comm;
 364     int                nzone   = comm.zones.n / 2;
 365     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 366     for (int d = dd->ndim - 1; d >= 0; d--)
 367     {
 368         /* Only forces in domains near the PBC boundaries need to
 369            consider PBC in the treatment of fshift */
 370         const bool shiftForcesNeedPbc =
 371                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 372         const bool applyScrewPbc =
 373                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 374         /* Determine which shift vector we need */
 375         ivec vis        = { 0, 0, 0 };
 376         vis[dd->dim[d]] = 1;
 377         const int is    = IVEC2IS(vis);
 378
 379         /* Loop over the pulses */
 380         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 381         for (int p = cd.numPulses() - 1; p >= 0; p--)
 382         {
 383             const gmx_domdec_ind_t&   ind = cd.ind[p];
 384             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 385             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 386
 387             nat_tot -= ind.nrecv[nzone + 1];
 388
 389             DDBufferAccess<gmx::RVec> sendBufferAccess(
 390                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 391
 392             gmx::ArrayRef<gmx::RVec> sendBuffer;
 393             if (cd.receiveInPlace)
 394             {
 395                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 396             }
 397             else
 398             {
 399                 sendBuffer = sendBufferAccess.buffer;
 400                 int j      = 0;
 401                 for (int zone = 0; zone < nzone; zone++)
 402                 {
 403                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 404                     {
 405                         sendBuffer[j++] = f[i];
 406                     }
 407                 }
 408             }
 409             /* Communicate the forces */
 410             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 411             /* Add the received forces */
 412             int n = 0;
 413             if (!shiftForcesNeedPbc)
 414             {
 415                 for (int j : ind.index)
 416                 {
 417                     for (int d = 0; d < DIM; d++)
 418                     {
 419                         f[j][d] += receiveBuffer[n][d];
 420                     }
 421                     n++;
 422                 }
 423             }
 424             else if (!applyScrewPbc)
 425             {
 426                 for (int j : ind.index)
 427                 {
 428                     for (int d = 0; d < DIM; d++)
 429                     {
 430                         f[j][d] += receiveBuffer[n][d];
 431                     }
 432                     /* Add this force to the shift force */
 433                     for (int d = 0; d < DIM; d++)
 434                     {
 435                         fshift[is][d] += receiveBuffer[n][d];
 436                     }
 437                     n++;
 438                 }
 439             }
 440             else
 441             {
 442                 for (int j : ind.index)
 443                 {
 444                     /* Rotate the force */
 445                     f[j][XX] += receiveBuffer[n][XX];
 446                     f[j][YY] -= receiveBuffer[n][YY];
 447                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 448                     if (shiftForcesNeedPbc)
 449                     {
 450                         /* Add this force to the shift force */
 451                         for (int d = 0; d < DIM; d++)
 452                         {
 453                             fshift[is][d] += receiveBuffer[n][d];
 454                         }
 455                     }
 456                     n++;
 457                 }
 458             }
 459         }
 460         nzone /= 2;
 461     }
 462     wallcycle_stop(wcycle, ewcMOVEF);
 463 }
 464
 465 /* Convenience function for extracting a real buffer from an rvec buffer
 466  *
 467  * To reduce the number of temporary communication buffers and avoid
 468  * cache polution, we reuse gmx::RVec buffers for storing reals.
 469  * This functions return a real buffer reference with the same number
 470  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 471  */
 472 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 473 {
 474     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 475 }
 476
 477 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 478 {
 479     int                    nzone, nat_tot;
 480     gmx_domdec_comm_t*     comm;
 481     gmx_domdec_comm_dim_t* cd;
 482
 483     comm = dd->comm;
 484
 485     nzone   = 1;
 486     nat_tot = comm->atomRanges.numHomeAtoms();
 487     for (int d = 0; d < dd->ndim; d++)
 488     {
 489         cd = &comm->cd[d];
 490         for (const gmx_domdec_ind_t& ind : cd->ind)
 491         {
 492             /* Note: We provision for RVec instead of real, so a factor of 3
 493              * more than needed. The buffer actually already has this size
 494              * and we pass a plain pointer below, so this does not matter.
 495              */
 496             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 497             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 498             int                 n          = 0;
 499             for (int j : ind.index)
 500             {
 501                 sendBuffer[n++] = v[j];
 502             }
 503
 504             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 505                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 506
 507             gmx::ArrayRef<real> receiveBuffer;
 508             if (cd->receiveInPlace)
 509             {
 510                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 511             }
 512             else
 513             {
 514                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 515             }
 516             /* Send and receive the data */
 517             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 518             if (!cd->receiveInPlace)
 519             {
 520                 int j = 0;
 521                 for (int zone = 0; zone < nzone; zone++)
 522                 {
 523                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 524                     {
 525                         v[i] = receiveBuffer[j++];
 526                     }
 527                 }
 528             }
 529             nat_tot += ind.nrecv[nzone + 1];
 530         }
 531         nzone += nzone;
 532     }
 533 }
 534
 535 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 536 {
 537     int                    nzone, nat_tot;
 538     gmx_domdec_comm_t*     comm;
 539     gmx_domdec_comm_dim_t* cd;
 540
 541     comm = dd->comm;
 542
 543     nzone   = comm->zones.n / 2;
 544     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 545     for (int d = dd->ndim - 1; d >= 0; d--)
 546     {
 547         cd = &comm->cd[d];
 548         for (int p = cd->numPulses() - 1; p >= 0; p--)
 549         {
 550             const gmx_domdec_ind_t& ind = cd->ind[p];
 551
 552             /* Note: We provision for RVec instead of real, so a factor of 3
 553              * more than needed. The buffer actually already has this size
 554              * and we typecast, so this works as intended.
 555              */
 556             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 557             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 558             nat_tot -= ind.nrecv[nzone + 1];
 559
 560             DDBufferAccess<gmx::RVec> sendBufferAccess(
 561                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 562
 563             gmx::ArrayRef<real> sendBuffer;
 564             if (cd->receiveInPlace)
 565             {
 566                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 567             }
 568             else
 569             {
 570                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 571                 int j      = 0;
 572                 for (int zone = 0; zone < nzone; zone++)
 573                 {
 574                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 575                     {
 576                         sendBuffer[j++] = v[i];
 577                     }
 578                 }
 579             }
 580             /* Communicate the forces */
 581             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 582             /* Add the received forces */
 583             int n = 0;
 584             for (int j : ind.index)
 585             {
 586                 v[j] += receiveBuffer[n];
 587                 n++;
 588             }
 589         }
 590         nzone /= 2;
 591     }
 592 }
 593
 594 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 595 {
 596     const gmx_domdec_comm_t& comm       = *dd->comm;
 597     const DDSystemInfo&      systemInfo = comm.systemInfo;
 598
 599     real r = -1;
 600     if (systemInfo.haveInterDomainMultiBodyBondeds)
 601     {
 602         if (comm.cutoff_mbody > 0)
 603         {
 604             r = comm.cutoff_mbody;
 605         }
 606         else
 607         {
 608             /* cutoff_mbody=0 means we do not have DLB */
 609             r = comm.cellsize_min[dd->dim[0]];
 610             for (int di = 1; di < dd->ndim; di++)
 611             {
 612                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 613             }
 614             if (comm.systemInfo.filterBondedCommunication)
 615             {
 616                 r = std::max(r, comm.cutoff_mbody);
 617             }
 618             else
 619             {
 620                 r = std::min(r, systemInfo.cutoff);
 621             }
 622         }
 623     }
 624
 625     return r;
 626 }
 627
 628 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 629 {
 630     real r_mb;
 631
 632     r_mb = dd_cutoff_multibody(dd);
 633
 634     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 635 }
 636
 637
 638 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 639                                    const CartesianRankSetup& cartSetup,
 640                                    const ivec                coord,
 641                                    ivec                      coord_pme)
 642 {
 643     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 644     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 645     copy_ivec(coord, coord_pme);
 646     coord_pme[cartSetup.cartpmedim] =
 647             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 648 }
 649
 650 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 651 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 652 {
 653     const int npp  = ddRankSetup.numPPRanks;
 654     const int npme = ddRankSetup.numRanksDoingPme;
 655
 656     /* Here we assign a PME node to communicate with this DD node
 657      * by assuming that the major index of both is x.
 658      * We add npme/2 to obtain an even distribution.
 659      */
 660     return (ddCellIndex * npme + npme / 2) / npp;
 661 }
 662
 663 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 664 {
 665     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 666
 667     int n = 0;
 668     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 669     {
 670         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 671         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 672         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 673         {
 674             if (debug)
 675             {
 676                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 677             }
 678             pmeRanks[n] = i + 1 + n;
 679             n++;
 680         }
 681     }
 682
 683     return pmeRanks;
 684 }
 685
 686 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
 687 {
 688     gmx_domdec_t* dd;
 689     ivec          coords;
 690     int           slab;
 691
 692     dd         = cr->dd;
 693     coords[XX] = x;
 694     coords[YY] = y;
 695     coords[ZZ] = z;
 696     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
 697
 698     return slab;
 699 }
 700
 701 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 702 {
 703     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 704     ivec                      coords    = { x, y, z };
 705     int                       nodeid    = -1;
 706
 707     if (cartSetup.bCartesianPP_PME)
 708     {
 709 #if GMX_MPI
 710         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 711 #endif
 712     }
 713     else
 714     {
 715         int ddindex = dd_index(cr->dd->numCells, coords);
 716         if (cartSetup.bCartesianPP)
 717         {
 718             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 719         }
 720         else
 721         {
 722             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 723             {
 724                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 725             }
 726             else
 727             {
 728                 nodeid = ddindex;
 729             }
 730         }
 731     }
 732
 733     return nodeid;
 734 }
 735
 736 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 737                               const CartesianRankSetup& cartSetup,
 738                               gmx::ArrayRef<const int>  pmeRanks,
 739                               const t_commrec gmx_unused* cr,
 740                               const int                   sim_nodeid)
 741 {
 742     int pmenode = -1;
 743
 744     /* This assumes a uniform x domain decomposition grid cell size */
 745     if (cartSetup.bCartesianPP_PME)
 746     {
 747 #if GMX_MPI
 748         ivec coord, coord_pme;
 749         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 750         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 751         {
 752             /* This is a PP rank */
 753             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 754             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 755         }
 756 #endif
 757     }
 758     else if (cartSetup.bCartesianPP)
 759     {
 760         if (sim_nodeid < ddRankSetup.numPPRanks)
 761         {
 762             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 763         }
 764     }
 765     else
 766     {
 767         /* This assumes DD cells with identical x coordinates
 768          * are numbered sequentially.
 769          */
 770         if (pmeRanks.empty())
 771         {
 772             if (sim_nodeid < ddRankSetup.numPPRanks)
 773             {
 774                 /* The DD index equals the nodeid */
 775                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 776             }
 777         }
 778         else
 779         {
 780             int i = 0;
 781             while (sim_nodeid > pmeRanks[i])
 782             {
 783                 i++;
 784             }
 785             if (sim_nodeid < pmeRanks[i])
 786             {
 787                 pmenode = pmeRanks[i];
 788             }
 789         }
 790     }
 791
 792     return pmenode;
 793 }
 794
 795 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 796 {
 797     if (dd != nullptr)
 798     {
 799         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 800     }
 801     else
 802     {
 803         return { 1, 1 };
 804     }
 805 }
 806
 807 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 808 {
 809     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 810     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 811     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 812                        "This function should only be called when PME-only ranks are in use");
 813     std::vector<int> ddranks;
 814     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 815
 816     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 817     {
 818         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 819         {
 820             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 821             {
 822                 if (cartSetup.bCartesianPP_PME)
 823                 {
 824                     ivec coord = { x, y, z };
 825                     ivec coord_pme;
 826                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 827                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 828                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 829                     {
 830                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 831                     }
 832                 }
 833                 else
 834                 {
 835                     /* The slab corresponds to the nodeid in the PME group */
 836                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 837                     {
 838                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 839                     }
 840                 }
 841             }
 842         }
 843     }
 844     return ddranks;
 845 }
 846
 847 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 848 {
 849     gmx_bool bReceive = TRUE;
 850
 851     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 852     if (ddRankSetup.usePmeOnlyRanks)
 853     {
 854         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 855         if (cartSetup.bCartesianPP_PME)
 856         {
 857 #if GMX_MPI
 858             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 859             ivec coords;
 860             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 861             coords[cartSetup.cartpmedim]++;
 862             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
 863             {
 864                 int rank;
 865                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 866                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 867                 {
 868                     /* This is not the last PP node for pmenode */
 869                     bReceive = FALSE;
 870                 }
 871             }
 872 #else
 873             GMX_RELEASE_ASSERT(
 874                     false,
 875                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 876 #endif
 877         }
 878         else
 879         {
 880             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 881             if (cr->sim_nodeid + 1 < cr->nnodes
 882                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 883             {
 884                 /* This is not the last PP node for pmenode */
 885                 bReceive = FALSE;
 886             }
 887         }
 888     }
 889
 890     return bReceive;
 891 }
 892
 893 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 894 {
 895     gmx_domdec_comm_t* comm;
 896     int                i;
 897
 898     comm = dd->comm;
 899
 900     snew(*dim_f, dd->numCells[dim] + 1);
 901     (*dim_f)[0] = 0;
 902     for (i = 1; i < dd->numCells[dim]; i++)
 903     {
 904         if (comm->slb_frac[dim])
 905         {
 906             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 907         }
 908         else
 909         {
 910             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
 911         }
 912     }
 913     (*dim_f)[dd->numCells[dim]] = 1;
 914 }
 915
 916 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 917 {
 918     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 919
 920     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 921     {
 922         ddpme->dim = YY;
 923     }
 924     else
 925     {
 926         ddpme->dim = dimind;
 927     }
 928     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 929
 930     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 931
 932     if (ddpme->nslab <= 1)
 933     {
 934         return;
 935     }
 936
 937     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 938     /* Determine for each PME slab the PP location range for dimension dim */
 939     snew(ddpme->pp_min, ddpme->nslab);
 940     snew(ddpme->pp_max, ddpme->nslab);
 941     for (int slab = 0; slab < ddpme->nslab; slab++)
 942     {
 943         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
 944         ddpme->pp_max[slab] = 0;
 945     }
 946     for (int i = 0; i < dd->nnodes; i++)
 947     {
 948         ivec xyz;
 949         ddindex2xyz(dd->numCells, i, xyz);
 950         /* For y only use our y/z slab.
 951          * This assumes that the PME x grid size matches the DD grid size.
 952          */
 953         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 954         {
 955             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 956             int       slab;
 957             if (dimind == 0)
 958             {
 959                 slab = pmeindex / nso;
 960             }
 961             else
 962             {
 963                 slab = pmeindex % ddpme->nslab;
 964             }
 965             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 966             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 967         }
 968     }
 969
 970     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 971 }
 972
 973 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
 974 {
 975     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 976
 977     if (ddRankSetup.ddpme[0].dim == XX)
 978     {
 979         return ddRankSetup.ddpme[0].maxshift;
 980     }
 981     else
 982     {
 983         return 0;
 984     }
 985 }
 986
 987 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
 988 {
 989     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 990
 991     if (ddRankSetup.ddpme[0].dim == YY)
 992     {
 993         return ddRankSetup.ddpme[0].maxshift;
 994     }
 995     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
 996     {
 997         return ddRankSetup.ddpme[1].maxshift;
 998     }
 999     else
1000     {
1001         return 0;
1002     }
1003 }
1004
1005 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1006 {
1007     return dd.comm->systemInfo.haveSplitConstraints;
1008 }
1009
1010 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1011 {
1012     return dd.comm->systemInfo.useUpdateGroups;
1013 }
1014
1015 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1016 {
1017     /* Note that the cycles value can be incorrect, either 0 or some
1018      * extremely large value, when our thread migrated to another core
1019      * with an unsynchronized cycle counter. If this happens less often
1020      * that once per nstlist steps, this will not cause issues, since
1021      * we later subtract the maximum value from the sum over nstlist steps.
1022      * A zero count will slightly lower the total, but that's a small effect.
1023      * Note that the main purpose of the subtraction of the maximum value
1024      * is to avoid throwing off the load balancing when stalls occur due
1025      * e.g. system activity or network congestion.
1026      */
1027     dd->comm->cycl[ddCycl] += cycles;
1028     dd->comm->cycl_n[ddCycl]++;
1029     if (cycles > dd->comm->cycl_max[ddCycl])
1030     {
1031         dd->comm->cycl_max[ddCycl] = cycles;
1032     }
1033 }
1034
1035 #if GMX_MPI
1036 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1037 {
1038     MPI_Comm c_row;
1039     int      dim, i, rank;
1040     ivec     loc_c;
1041     gmx_bool bPartOfGroup = FALSE;
1042
1043     dim = dd->dim[dim_ind];
1044     copy_ivec(loc, loc_c);
1045     for (i = 0; i < dd->numCells[dim]; i++)
1046     {
1047         loc_c[dim] = i;
1048         rank       = dd_index(dd->numCells, loc_c);
1049         if (rank == dd->rank)
1050         {
1051             /* This process is part of the group */
1052             bPartOfGroup = TRUE;
1053         }
1054     }
1055     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1056     if (bPartOfGroup)
1057     {
1058         dd->comm->mpi_comm_load[dim_ind] = c_row;
1059         if (!isDlbDisabled(dd->comm))
1060         {
1061             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1062
1063             if (dd->ci[dim] == dd->master_ci[dim])
1064             {
1065                 /* This is the root process of this row */
1066                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1067
1068                 RowMaster& rowMaster = *cellsizes.rowMaster;
1069                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1070                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1071                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1072                 if (dim_ind > 0)
1073                 {
1074                     rowMaster.bounds.resize(dd->numCells[dim]);
1075                 }
1076                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1077             }
1078             else
1079             {
1080                 /* This is not a root process, we only need to receive cell_f */
1081                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1082             }
1083         }
1084         if (dd->ci[dim] == dd->master_ci[dim])
1085         {
1086             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1087         }
1088     }
1089 }
1090 #endif
1091
1092 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1093 {
1094 #if GMX_MPI
1095     int           physicalnode_id_hash;
1096     gmx_domdec_t* dd;
1097     MPI_Comm      mpi_comm_pp_physicalnode;
1098
1099     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1100     {
1101         /* Only ranks with short-ranged tasks (currently) use GPUs.
1102          * If we don't have GPUs assigned, there are no resources to share.
1103          */
1104         return;
1105     }
1106
1107     physicalnode_id_hash = gmx_physicalnode_id_hash();
1108
1109     dd = cr->dd;
1110
1111     if (debug)
1112     {
1113         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1114         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1115                 physicalnode_id_hash, gpu_id);
1116     }
1117     /* Split the PP communicator over the physical nodes */
1118     /* TODO: See if we should store this (before), as it's also used for
1119      * for the nodecomm summation.
1120      */
1121     // TODO PhysicalNodeCommunicator could be extended/used to handle
1122     // the need for per-node per-group communicators.
1123     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1124     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1125     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1126     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1127
1128     if (debug)
1129     {
1130         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1131     }
1132
1133     /* Note that some ranks could share a GPU, while others don't */
1134
1135     if (dd->comm->nrank_gpu_shared == 1)
1136     {
1137         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1138     }
1139 #else
1140     GMX_UNUSED_VALUE(cr);
1141     GMX_UNUSED_VALUE(gpu_id);
1142 #endif
1143 }
1144
1145 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1146 {
1147 #if GMX_MPI
1148     int  dim0, dim1, i, j;
1149     ivec loc;
1150
1151     if (debug)
1152     {
1153         fprintf(debug, "Making load communicators\n");
1154     }
1155
1156     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1157     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1158
1159     if (dd->ndim == 0)
1160     {
1161         return;
1162     }
1163
1164     clear_ivec(loc);
1165     make_load_communicator(dd, 0, loc);
1166     if (dd->ndim > 1)
1167     {
1168         dim0 = dd->dim[0];
1169         for (i = 0; i < dd->numCells[dim0]; i++)
1170         {
1171             loc[dim0] = i;
1172             make_load_communicator(dd, 1, loc);
1173         }
1174     }
1175     if (dd->ndim > 2)
1176     {
1177         dim0 = dd->dim[0];
1178         for (i = 0; i < dd->numCells[dim0]; i++)
1179         {
1180             loc[dim0] = i;
1181             dim1      = dd->dim[1];
1182             for (j = 0; j < dd->numCells[dim1]; j++)
1183             {
1184                 loc[dim1] = j;
1185                 make_load_communicator(dd, 2, loc);
1186             }
1187         }
1188     }
1189
1190     if (debug)
1191     {
1192         fprintf(debug, "Finished making load communicators\n");
1193     }
1194 #endif
1195 }
1196
1197 /*! \brief Sets up the relation between neighboring domains and zones */
1198 static void setup_neighbor_relations(gmx_domdec_t* dd)
1199 {
1200     int                 d, dim, m;
1201     ivec                tmp, s;
1202     gmx_domdec_zones_t* zones;
1203     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1204
1205     for (d = 0; d < dd->ndim; d++)
1206     {
1207         dim = dd->dim[d];
1208         copy_ivec(dd->ci, tmp);
1209         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1210         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1211         copy_ivec(dd->ci, tmp);
1212         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1213         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1214         if (debug)
1215         {
1216             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1217                     dd->neighbor[d][0], dd->neighbor[d][1]);
1218         }
1219     }
1220
1221     int nzone  = (1 << dd->ndim);
1222     int nizone = (1 << std::max(dd->ndim - 1, 0));
1223     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1224
1225     zones = &dd->comm->zones;
1226
1227     for (int i = 0; i < nzone; i++)
1228     {
1229         m = 0;
1230         clear_ivec(zones->shift[i]);
1231         for (d = 0; d < dd->ndim; d++)
1232         {
1233             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1234         }
1235     }
1236
1237     zones->n = nzone;
1238     for (int i = 0; i < nzone; i++)
1239     {
1240         for (d = 0; d < DIM; d++)
1241         {
1242             s[d] = dd->ci[d] - zones->shift[i][d];
1243             if (s[d] < 0)
1244             {
1245                 s[d] += dd->numCells[d];
1246             }
1247             else if (s[d] >= dd->numCells[d])
1248             {
1249                 s[d] -= dd->numCells[d];
1250             }
1251         }
1252     }
1253     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1254     {
1255         GMX_RELEASE_ASSERT(
1256                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1257                 "The first element for each ddNonbondedZonePairRanges should match its index");
1258
1259         DDPairInteractionRanges iZone;
1260         iZone.iZoneIndex = iZoneIndex;
1261         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1262          * j-zones up to nzone.
1263          */
1264         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1265                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1266         for (dim = 0; dim < DIM; dim++)
1267         {
1268             if (dd->numCells[dim] == 1)
1269             {
1270                 /* All shifts should be allowed */
1271                 iZone.shift0[dim] = -1;
1272                 iZone.shift1[dim] = 1;
1273             }
1274             else
1275             {
1276                 /* Determine the min/max j-zone shift wrt the i-zone */
1277                 iZone.shift0[dim] = 1;
1278                 iZone.shift1[dim] = -1;
1279                 for (int jZone : iZone.jZoneRange)
1280                 {
1281                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1282                     if (shift_diff < iZone.shift0[dim])
1283                     {
1284                         iZone.shift0[dim] = shift_diff;
1285                     }
1286                     if (shift_diff > iZone.shift1[dim])
1287                     {
1288                         iZone.shift1[dim] = shift_diff;
1289                     }
1290                 }
1291             }
1292         }
1293
1294         zones->iZones.push_back(iZone);
1295     }
1296
1297     if (!isDlbDisabled(dd->comm))
1298     {
1299         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1300     }
1301
1302     if (dd->comm->ddSettings.recordLoad)
1303     {
1304         make_load_communicators(dd);
1305     }
1306 }
1307
1308 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1309                                  gmx_domdec_t*        dd,
1310                                  t_commrec gmx_unused* cr,
1311                                  bool gmx_unused reorder)
1312 {
1313 #if GMX_MPI
1314     gmx_domdec_comm_t*  comm      = dd->comm;
1315     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1316
1317     if (cartSetup.bCartesianPP)
1318     {
1319         /* Set up cartesian communication for the particle-particle part */
1320         GMX_LOG(mdlog.info)
1321                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1322                                      dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
1323
1324         ivec periods;
1325         for (int i = 0; i < DIM; i++)
1326         {
1327             periods[i] = TRUE;
1328         }
1329         MPI_Comm comm_cart;
1330         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
1331                         &comm_cart);
1332         /* We overwrite the old communicator with the new cartesian one */
1333         cr->mpi_comm_mygroup = comm_cart;
1334     }
1335
1336     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1337     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1338
1339     if (cartSetup.bCartesianPP_PME)
1340     {
1341         /* Since we want to use the original cartesian setup for sim,
1342          * and not the one after split, we need to make an index.
1343          */
1344         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1345         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1346         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1347         /* Get the rank of the DD master,
1348          * above we made sure that the master node is a PP node.
1349          */
1350         int rank;
1351         if (MASTER(cr))
1352         {
1353             rank = dd->rank;
1354         }
1355         else
1356         {
1357             rank = 0;
1358         }
1359         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1360     }
1361     else if (cartSetup.bCartesianPP)
1362     {
1363         if (!comm->ddRankSetup.usePmeOnlyRanks)
1364         {
1365             /* The PP communicator is also
1366              * the communicator for this simulation
1367              */
1368             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1369         }
1370         cr->nodeid = dd->rank;
1371
1372         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1373
1374         /* We need to make an index to go from the coordinates
1375          * to the nodeid of this simulation.
1376          */
1377         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1378         std::vector<int> buf(dd->nnodes);
1379         if (thisRankHasDuty(cr, DUTY_PP))
1380         {
1381             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1382         }
1383         /* Communicate the ddindex to simulation nodeid index */
1384         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1385                       cr->mpi_comm_mysim);
1386
1387         /* Determine the master coordinates and rank.
1388          * The DD master should be the same node as the master of this sim.
1389          */
1390         for (int i = 0; i < dd->nnodes; i++)
1391         {
1392             if (cartSetup.ddindex2simnodeid[i] == 0)
1393             {
1394                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1395                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1396             }
1397         }
1398         if (debug)
1399         {
1400             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1401         }
1402     }
1403     else
1404     {
1405         /* No Cartesian communicators */
1406         /* We use the rank in dd->comm->all as DD index */
1407         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1408         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1409         dd->masterrank = 0;
1410         clear_ivec(dd->master_ci);
1411     }
1412 #endif
1413
1414     GMX_LOG(mdlog.info)
1415             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1416                                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1417     if (debug)
1418     {
1419         fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1420                 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1421     }
1422 }
1423
1424 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1425 {
1426 #if GMX_MPI
1427     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1428
1429     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1430     {
1431         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1432         std::vector<int> buf(dd->nnodes);
1433         if (thisRankHasDuty(cr, DUTY_PP))
1434         {
1435             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1436         }
1437         /* Communicate the ddindex to simulation nodeid index */
1438         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1439                       cr->mpi_comm_mysim);
1440     }
1441 #else
1442     GMX_UNUSED_VALUE(dd);
1443     GMX_UNUSED_VALUE(cr);
1444 #endif
1445 }
1446
1447 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1448                                              t_commrec*           cr,
1449                                              const DdRankOrder    ddRankOrder,
1450                                              bool gmx_unused    reorder,
1451                                              const DDRankSetup& ddRankSetup,
1452                                              ivec               ddCellIndex,
1453                                              std::vector<int>*  pmeRanks)
1454 {
1455     CartesianRankSetup cartSetup;
1456
1457     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1458     cartSetup.bCartesianPP_PME = false;
1459
1460     const ivec& numDDCells = ddRankSetup.numPPCells;
1461     /* Initially we set ntot to the number of PP cells */
1462     copy_ivec(numDDCells, cartSetup.ntot);
1463
1464     if (cartSetup.bCartesianPP)
1465     {
1466         const int numDDCellsTot = ddRankSetup.numPPRanks;
1467         bool      bDiv[DIM];
1468         for (int i = 1; i < DIM; i++)
1469         {
1470             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1471         }
1472         if (bDiv[YY] || bDiv[ZZ])
1473         {
1474             cartSetup.bCartesianPP_PME = TRUE;
1475             /* If we have 2D PME decomposition, which is always in x+y,
1476              * we stack the PME only nodes in z.
1477              * Otherwise we choose the direction that provides the thinnest slab
1478              * of PME only nodes as this will have the least effect
1479              * on the PP communication.
1480              * But for the PME communication the opposite might be better.
1481              */
1482             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1483             {
1484                 cartSetup.cartpmedim = ZZ;
1485             }
1486             else
1487             {
1488                 cartSetup.cartpmedim = YY;
1489             }
1490             cartSetup.ntot[cartSetup.cartpmedim] +=
1491                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1492         }
1493         else
1494         {
1495             GMX_LOG(mdlog.info)
1496                     .appendTextFormatted(
1497                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1498                             "nx*nz (%d*%d)",
1499                             ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1500                             numDDCells[XX], numDDCells[ZZ]);
1501             GMX_LOG(mdlog.info)
1502                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1503         }
1504     }
1505
1506     if (cartSetup.bCartesianPP_PME)
1507     {
1508 #if GMX_MPI
1509         int  rank;
1510         ivec periods;
1511
1512         GMX_LOG(mdlog.info)
1513                 .appendTextFormatted(
1514                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1515                         cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1516
1517         for (int i = 0; i < DIM; i++)
1518         {
1519             periods[i] = TRUE;
1520         }
1521         MPI_Comm comm_cart;
1522         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1523                         &comm_cart);
1524         MPI_Comm_rank(comm_cart, &rank);
1525         if (MASTER(cr) && rank != 0)
1526         {
1527             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1528         }
1529
1530         /* With this assigment we loose the link to the original communicator
1531          * which will usually be MPI_COMM_WORLD, unless have multisim.
1532          */
1533         cr->mpi_comm_mysim = comm_cart;
1534         cr->sim_nodeid     = rank;
1535
1536         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1537
1538         GMX_LOG(mdlog.info)
1539                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1540                                      ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1541
1542         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1543         {
1544             cr->duty = DUTY_PP;
1545         }
1546         if (!ddRankSetup.usePmeOnlyRanks
1547             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1548         {
1549             cr->duty = DUTY_PME;
1550         }
1551
1552         /* Split the sim communicator into PP and PME only nodes */
1553         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1554                        dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1555 #else
1556         GMX_UNUSED_VALUE(ddCellIndex);
1557 #endif
1558     }
1559     else
1560     {
1561         switch (ddRankOrder)
1562         {
1563             case DdRankOrder::pp_pme:
1564                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1565                 break;
1566             case DdRankOrder::interleave:
1567                 /* Interleave the PP-only and PME-only ranks */
1568                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1569                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1570                 break;
1571             case DdRankOrder::cartesian: break;
1572             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1573         }
1574
1575         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1576         {
1577             cr->duty = DUTY_PME;
1578         }
1579         else
1580         {
1581             cr->duty = DUTY_PP;
1582         }
1583 #if GMX_MPI
1584         /* Split the sim communicator into PP and PME only nodes */
1585         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1586         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1587 #endif
1588     }
1589
1590     GMX_LOG(mdlog.info)
1591             .appendTextFormatted("This rank does only %s work.\n",
1592                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1593
1594     return cartSetup;
1595 }
1596
1597 /*! \brief Makes the PP communicator and the PME communicator, when needed
1598  *
1599  * Returns the Cartesian rank setup.
1600  * Sets \p cr->mpi_comm_mygroup
1601  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1602  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1603  */
1604 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1605                                                  const DDSettings&    ddSettings,
1606                                                  const DdRankOrder    ddRankOrder,
1607                                                  const DDRankSetup&   ddRankSetup,
1608                                                  t_commrec*           cr,
1609                                                  ivec                 ddCellIndex,
1610                                                  std::vector<int>*    pmeRanks)
1611 {
1612     CartesianRankSetup cartSetup;
1613
1614     if (ddRankSetup.usePmeOnlyRanks)
1615     {
1616         /* Split the communicator into a PP and PME part */
1617         cartSetup = split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1618                                        ddRankSetup, ddCellIndex, pmeRanks);
1619     }
1620     else
1621     {
1622         /* All nodes do PP and PME */
1623         /* We do not require separate communicators */
1624         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1625
1626         cartSetup.bCartesianPP     = false;
1627         cartSetup.bCartesianPP_PME = false;
1628     }
1629
1630     return cartSetup;
1631 }
1632
1633 /*! \brief For PP ranks, sets or makes the communicator
1634  *
1635  * For PME ranks get the rank id.
1636  * For PP only ranks, sets the PME-only rank.
1637  */
1638 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1639                                     const DDSettings&        ddSettings,
1640                                     gmx::ArrayRef<const int> pmeRanks,
1641                                     t_commrec*               cr,
1642                                     const int                numAtomsInSystem,
1643                                     gmx_domdec_t*            dd)
1644 {
1645     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1646     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1647
1648     if (thisRankHasDuty(cr, DUTY_PP))
1649     {
1650         /* Copy or make a new PP communicator */
1651
1652         /* We (possibly) reordered the nodes in split_communicator,
1653          * so it is no longer required in make_pp_communicator.
1654          */
1655         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1656
1657         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1658     }
1659     else
1660     {
1661         receive_ddindex2simnodeid(dd, cr);
1662     }
1663
1664     if (!thisRankHasDuty(cr, DUTY_PME))
1665     {
1666         /* Set up the commnuication to our PME node */
1667         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1668         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1669         if (debug)
1670         {
1671             fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1672                     gmx::boolToString(dd->pme_receive_vir_ener));
1673         }
1674     }
1675     else
1676     {
1677         dd->pme_nodeid = -1;
1678     }
1679
1680     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1681     if (MASTER(cr))
1682     {
1683         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1684     }
1685 }
1686
1687 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1688 {
1689     real * slb_frac, tot;
1690     int    i, n;
1691     double dbl;
1692
1693     slb_frac = nullptr;
1694     if (nc > 1 && size_string != nullptr)
1695     {
1696         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1697         snew(slb_frac, nc);
1698         tot = 0;
1699         for (i = 0; i < nc; i++)
1700         {
1701             dbl = 0;
1702             sscanf(size_string, "%20lf%n", &dbl, &n);
1703             if (dbl == 0)
1704             {
1705                 gmx_fatal(FARGS,
1706                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1707                           dir, size_string);
1708             }
1709             slb_frac[i] = dbl;
1710             size_string += n;
1711             tot += slb_frac[i];
1712         }
1713         /* Normalize */
1714         std::string relativeCellSizes = "Relative cell sizes:";
1715         for (i = 0; i < nc; i++)
1716         {
1717             slb_frac[i] /= tot;
1718             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1719         }
1720         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1721     }
1722
1723     return slb_frac;
1724 }
1725
1726 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1727 {
1728     int                  n     = 0;
1729     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1730     int                  nmol;
1731     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1732     {
1733         for (auto& ilist : extractILists(*ilists, IF_BOND))
1734         {
1735             if (NRAL(ilist.functionType) > 2)
1736             {
1737                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1738             }
1739         }
1740     }
1741
1742     return n;
1743 }
1744
1745 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1746 {
1747     char* val;
1748     int   nst;
1749
1750     nst = def;
1751     val = getenv(env_var);
1752     if (val)
1753     {
1754         if (sscanf(val, "%20d", &nst) <= 0)
1755         {
1756             nst = 1;
1757         }
1758         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1759     }
1760
1761     return nst;
1762 }
1763
1764 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1765 {
1766     if (ir->pbcType == PbcType::Screw
1767         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1768     {
1769         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1770                   c_pbcTypeNames[ir->pbcType].c_str());
1771     }
1772
1773     if (ir->nstlist == 0)
1774     {
1775         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1776     }
1777
1778     if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1779     {
1780         GMX_LOG(mdlog.warning)
1781                 .appendText(
1782                         "comm-mode angular will give incorrect results when the comm group "
1783                         "partially crosses a periodic boundary");
1784     }
1785 }
1786
1787 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1788 {
1789     real r = ddbox.box_size[XX];
1790     for (int d = 0; d < DIM; d++)
1791     {
1792         if (numDomains[d] > 1)
1793         {
1794             /* Check using the initial average cell size */
1795             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1796         }
1797     }
1798
1799     return r;
1800 }
1801
1802 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1803  */
1804 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1805                                   const std::string&   reasonStr,
1806                                   const gmx::MDLogger& mdlog)
1807 {
1808     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1809     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1810
1811     if (cmdlineDlbState == DlbState::onUser)
1812     {
1813         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1814     }
1815     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1816     {
1817         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1818     }
1819     return DlbState::offForever;
1820 }
1821
1822 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1823  *
1824  * This function parses the parameters of "-dlb" command line option setting
1825  * corresponding state values. Then it checks the consistency of the determined
1826  * state with other run parameters and settings. As a result, the initial state
1827  * may be altered or an error may be thrown if incompatibility of options is detected.
1828  *
1829  * \param [in] mdlog       Logger.
1830  * \param [in] dlbOption   Enum value for the DLB option.
1831  * \param [in] bRecordLoad True if the load balancer is recording load information.
1832  * \param [in] mdrunOptions  Options for mdrun.
1833  * \param [in] ir          Pointer mdrun to input parameters.
1834  * \returns                DLB initial/startup state.
1835  */
1836 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1837                                          DlbOption                dlbOption,
1838                                          gmx_bool                 bRecordLoad,
1839                                          const gmx::MdrunOptions& mdrunOptions,
1840                                          const t_inputrec*        ir)
1841 {
1842     DlbState dlbState = DlbState::offCanTurnOn;
1843
1844     switch (dlbOption)
1845     {
1846         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1847         case DlbOption::no: dlbState = DlbState::offUser; break;
1848         case DlbOption::yes: dlbState = DlbState::onUser; break;
1849         default: gmx_incons("Invalid dlbOption enum value");
1850     }
1851
1852     /* Reruns don't support DLB: bail or override auto mode */
1853     if (mdrunOptions.rerun)
1854     {
1855         std::string reasonStr = "it is not supported in reruns.";
1856         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1857     }
1858
1859     /* Unsupported integrators */
1860     if (!EI_DYNAMICS(ir->eI))
1861     {
1862         auto reasonStr = gmx::formatString(
1863                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1864         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1865     }
1866
1867     /* Without cycle counters we can't time work to balance on */
1868     if (!bRecordLoad)
1869     {
1870         std::string reasonStr =
1871                 "cycle counters unsupported or not enabled in the operating system kernel.";
1872         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1873     }
1874
1875     if (mdrunOptions.reproducible)
1876     {
1877         std::string reasonStr = "you started a reproducible run.";
1878         switch (dlbState)
1879         {
1880             case DlbState::offUser: break;
1881             case DlbState::offForever:
1882                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1883                 break;
1884             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1885             case DlbState::onCanTurnOff:
1886                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1887                 break;
1888             case DlbState::onUser:
1889                 return forceDlbOffOrBail(
1890                         dlbState,
1891                         reasonStr
1892                                 + " In load balanced runs binary reproducibility cannot be "
1893                                   "ensured.",
1894                         mdlog);
1895             default:
1896                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1897                           static_cast<int>(dlbState));
1898         }
1899     }
1900
1901     return dlbState;
1902 }
1903
1904 static gmx_domdec_comm_t* init_dd_comm()
1905 {
1906     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1907
1908     comm->n_load_have    = 0;
1909     comm->n_load_collect = 0;
1910
1911     comm->haveTurnedOffDlb = false;
1912
1913     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1914     {
1915         comm->sum_nat[i] = 0;
1916     }
1917     comm->ndecomp   = 0;
1918     comm->nload     = 0;
1919     comm->load_step = 0;
1920     comm->load_sum  = 0;
1921     comm->load_max  = 0;
1922     clear_ivec(comm->load_lim);
1923     comm->load_mdf = 0;
1924     comm->load_pme = 0;
1925
1926     /* This should be replaced by a unique pointer */
1927     comm->balanceRegion = ddBalanceRegionAllocate();
1928
1929     return comm;
1930 }
1931
1932 /* Returns whether mtop contains constraints and/or vsites */
1933 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1934 {
1935     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1936     int  nmol;
1937     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1938     {
1939         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1940         {
1941             return true;
1942         }
1943     }
1944
1945     return false;
1946 }
1947
1948 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1949                               const gmx_mtop_t&    mtop,
1950                               const t_inputrec&    inputrec,
1951                               const real           cutoffMargin,
1952                               DDSystemInfo*        systemInfo)
1953 {
1954     /* When we have constraints and/or vsites, it is beneficial to use
1955      * update groups (when possible) to allow independent update of groups.
1956      */
1957     if (!systemHasConstraintsOrVsites(mtop))
1958     {
1959         /* No constraints or vsites, atoms can be updated independently */
1960         return;
1961     }
1962
1963     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1964     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1965                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1966
1967     if (systemInfo->useUpdateGroups)
1968     {
1969         int numUpdateGroups = 0;
1970         for (const auto& molblock : mtop.molblock)
1971         {
1972             numUpdateGroups += molblock.nmol
1973                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1974         }
1975
1976         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1977                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
1978
1979         /* To use update groups, the large domain-to-domain cutoff distance
1980          * should be compatible with the box size.
1981          */
1982         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1983
1984         if (systemInfo->useUpdateGroups)
1985         {
1986             GMX_LOG(mdlog.info)
1987                     .appendTextFormatted(
1988                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1989                             "nm\n",
1990                             numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
1991                             systemInfo->maxUpdateGroupRadius);
1992         }
1993         else
1994         {
1995             GMX_LOG(mdlog.info)
1996                     .appendTextFormatted(
1997                             "The combination of rlist and box size prohibits the use of update "
1998                             "groups\n");
1999             systemInfo->updateGroupingPerMoleculetype.clear();
2000         }
2001     }
2002 }
2003
2004 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2005     npbcdim(numPbcDimensions(ir.pbcType)),
2006     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2007     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2008     haveScrewPBC(ir.pbcType == PbcType::Screw)
2009 {
2010 }
2011
2012 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2013 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
2014                                     const bool                                  useUpdateGroups,
2015                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2016 {
2017     if (useUpdateGroups)
2018     {
2019         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2020                            "Need one grouping per moltype");
2021         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2022         {
2023             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2024             {
2025                 return false;
2026             }
2027         }
2028     }
2029     else
2030     {
2031         for (const auto& moltype : mtop.moltype)
2032         {
2033             if (moltype.atoms.nr > 1)
2034             {
2035                 return false;
2036             }
2037         }
2038     }
2039
2040     return true;
2041 }
2042
2043 /*! \brief Generate the simulation system information */
2044 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2045                                   const t_commrec*               cr,
2046                                   const DomdecOptions&           options,
2047                                   const gmx_mtop_t&              mtop,
2048                                   const t_inputrec&              ir,
2049                                   const matrix                   box,
2050                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2051 {
2052     const real tenPercentMargin = 1.1;
2053
2054     DDSystemInfo systemInfo;
2055
2056     /* We need to decide on update groups early, as this affects communication distances */
2057     systemInfo.useUpdateGroups = false;
2058     if (ir.cutoff_scheme == ecutsVERLET)
2059     {
2060         real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2061         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2062     }
2063
2064     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2065             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2066     systemInfo.haveInterDomainBondeds =
2067             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2068     systemInfo.haveInterDomainMultiBodyBondeds =
2069             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2070
2071     if (systemInfo.useUpdateGroups)
2072     {
2073         systemInfo.haveSplitConstraints = false;
2074         systemInfo.haveSplitSettles     = false;
2075     }
2076     else
2077     {
2078         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2079                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2080         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2081     }
2082
2083     if (ir.rlist == 0)
2084     {
2085         /* Set the cut-off to some very large value,
2086          * so we don't need if statements everywhere in the code.
2087          * We use sqrt, since the cut-off is squared in some places.
2088          */
2089         systemInfo.cutoff = GMX_CUTOFF_INF;
2090     }
2091     else
2092     {
2093         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2094     }
2095     systemInfo.minCutoffForMultiBody = 0;
2096
2097     /* Determine the minimum cell size limit, affected by many factors */
2098     systemInfo.cellsizeLimit             = 0;
2099     systemInfo.filterBondedCommunication = false;
2100
2101     /* We do not allow home atoms to move beyond the neighboring domain
2102      * between domain decomposition steps, which limits the cell size.
2103      * Get an estimate of cell size limit due to atom displacement.
2104      * In most cases this is a large overestimate, because it assumes
2105      * non-interaction atoms.
2106      * We set the chance to 1 in a trillion steps.
2107      */
2108     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2109     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2110             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2111     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2112
2113     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2114
2115     /* TODO: PME decomposition currently requires atoms not to be more than
2116      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2117      *       In nearly all cases, limitForAtomDisplacement will be smaller
2118      *       than 2/3*rlist, so the PME requirement is satisfied.
2119      *       But it would be better for both correctness and performance
2120      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2121      *       Note that we would need to improve the pairlist buffer case.
2122      */
2123
2124     if (systemInfo.haveInterDomainBondeds)
2125     {
2126         if (options.minimumCommunicationRange > 0)
2127         {
2128             systemInfo.minCutoffForMultiBody =
2129                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2130             if (options.useBondedCommunication)
2131             {
2132                 systemInfo.filterBondedCommunication =
2133                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2134             }
2135             else
2136             {
2137                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2138             }
2139         }
2140         else if (ir.bPeriodicMols)
2141         {
2142             /* Can not easily determine the required cut-off */
2143             GMX_LOG(mdlog.warning)
2144                     .appendText(
2145                             "NOTE: Periodic molecules are present in this system. Because of this, "
2146                             "the domain decomposition algorithm cannot easily determine the "
2147                             "minimum cell size that it requires for treating bonded interactions. "
2148                             "Instead, domain decomposition will assume that half the non-bonded "
2149                             "cut-off will be a suitable lower bound.");
2150             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2151         }
2152         else
2153         {
2154             real r_2b, r_mb;
2155
2156             if (MASTER(cr))
2157             {
2158                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2159                                       options.checkBondedInteractions, &r_2b, &r_mb);
2160             }
2161             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2162             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2163
2164             /* We use an initial margin of 10% for the minimum cell size,
2165              * except when we are just below the non-bonded cut-off.
2166              */
2167             if (options.useBondedCommunication)
2168             {
2169                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2170                 {
2171                     const real r_bonded              = std::max(r_2b, r_mb);
2172                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2173                     /* This is the (only) place where we turn on the filtering */
2174                     systemInfo.filterBondedCommunication = true;
2175                 }
2176                 else
2177                 {
2178                     const real r_bonded = r_mb;
2179                     systemInfo.minCutoffForMultiBody =
2180                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2181                 }
2182                 /* We determine cutoff_mbody later */
2183                 systemInfo.increaseMultiBodyCutoff = true;
2184             }
2185             else
2186             {
2187                 /* No special bonded communication,
2188                  * simply increase the DD cut-off.
2189                  */
2190                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2191                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2192             }
2193         }
2194         GMX_LOG(mdlog.info)
2195                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2196                                      systemInfo.minCutoffForMultiBody);
2197
2198         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2199     }
2200
2201     systemInfo.constraintCommunicationRange = 0;
2202     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2203     {
2204         /* There is a cell size limit due to the constraints (P-LINCS) */
2205         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2206         GMX_LOG(mdlog.info)
2207                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2208                                      systemInfo.constraintCommunicationRange);
2209         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2210         {
2211             GMX_LOG(mdlog.info)
2212                     .appendText(
2213                             "This distance will limit the DD cell size, you can override this with "
2214                             "-rcon");
2215         }
2216     }
2217     else if (options.constraintCommunicationRange > 0)
2218     {
2219         /* Here we do not check for dd->splitConstraints.
2220          * because one can also set a cell size limit for virtual sites only
2221          * and at this point we don't know yet if there are intercg v-sites.
2222          */
2223         GMX_LOG(mdlog.info)
2224                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2225                                      options.constraintCommunicationRange);
2226         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2227     }
2228     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2229
2230     return systemInfo;
2231 }
2232
2233 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2234  * implemented. */
2235 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2236                              const t_commrec*     cr,
2237                              const DomdecOptions& options,
2238                              const DDSettings&    ddSettings,
2239                              const DDSystemInfo&  systemInfo,
2240                              const real           cellsizeLimit,
2241                              const gmx_ddbox_t&   ddbox)
2242 {
2243     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2244     {
2245         char     buf[STRLEN];
2246         gmx_bool bC = (systemInfo.haveSplitConstraints
2247                        && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2248         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2249                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2250                 bC ? " or your LINCS settings" : "");
2251
2252         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2253                              "There is no domain decomposition for %d ranks that is compatible "
2254                              "with the given box and a minimum cell size of %g nm\n"
2255                              "%s\n"
2256                              "Look in the log file for details on the domain decomposition",
2257                              cr->nnodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2258     }
2259
2260     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2261     if (acs < cellsizeLimit)
2262     {
2263         if (options.numCells[XX] <= 0)
2264         {
2265             GMX_RELEASE_ASSERT(
2266                     false,
2267                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2268         }
2269         else
2270         {
2271             gmx_fatal_collective(
2272                     FARGS, cr->mpi_comm_mysim, MASTER(cr),
2273                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2274                     "options -dd, -rdd or -rcon, see the log file for details",
2275                     acs, cellsizeLimit);
2276         }
2277     }
2278
2279     const int numPPRanks =
2280             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2281     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2282     {
2283         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2284                              "The size of the domain decomposition grid (%d) does not match the "
2285                              "number of PP ranks (%d). The total number of ranks is %d",
2286                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2287     }
2288     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2289     {
2290         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2291                              "The number of separate PME ranks (%d) is larger than the number of "
2292                              "PP ranks (%d), this is not supported.",
2293                              ddGridSetup.numPmeOnlyRanks, numPPRanks);
2294     }
2295 }
2296
2297 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2298 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2299                                   t_commrec*           cr,
2300                                   const DDGridSetup&   ddGridSetup,
2301                                   const t_inputrec&    ir)
2302 {
2303     GMX_LOG(mdlog.info)
2304             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2305                                  ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2306                                  ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2307
2308     DDRankSetup ddRankSetup;
2309
2310     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2311     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2312
2313     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2314     if (ddRankSetup.usePmeOnlyRanks)
2315     {
2316         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2317     }
2318     else
2319     {
2320         ddRankSetup.numRanksDoingPme =
2321                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2322     }
2323
2324     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2325     {
2326         /* The following choices should match those
2327          * in comm_cost_est in domdec_setup.c.
2328          * Note that here the checks have to take into account
2329          * that the decomposition might occur in a different order than xyz
2330          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2331          * in which case they will not match those in comm_cost_est,
2332          * but since that is mainly for testing purposes that's fine.
2333          */
2334         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2335             && ddGridSetup.ddDimensions[1] == YY
2336             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2337             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2338             && getenv("GMX_PMEONEDD") == nullptr)
2339         {
2340             ddRankSetup.npmedecompdim = 2;
2341             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2342             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2343         }
2344         else
2345         {
2346             /* In case nc is 1 in both x and y we could still choose to
2347              * decompose pme in y instead of x, but we use x for simplicity.
2348              */
2349             ddRankSetup.npmedecompdim = 1;
2350             if (ddGridSetup.ddDimensions[0] == YY)
2351             {
2352                 ddRankSetup.npmenodes_x = 1;
2353                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2354             }
2355             else
2356             {
2357                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2358                 ddRankSetup.npmenodes_y = 1;
2359             }
2360         }
2361         GMX_LOG(mdlog.info)
2362                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2363                                      ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2364     }
2365     else
2366     {
2367         ddRankSetup.npmedecompdim = 0;
2368         ddRankSetup.npmenodes_x   = 0;
2369         ddRankSetup.npmenodes_y   = 0;
2370     }
2371
2372     return ddRankSetup;
2373 }
2374
2375 /*! \brief Set the cell size and interaction limits */
2376 static void set_dd_limits(const gmx::MDLogger& mdlog,
2377                           t_commrec*           cr,
2378                           gmx_domdec_t*        dd,
2379                           const DomdecOptions& options,
2380                           const DDSettings&    ddSettings,
2381                           const DDSystemInfo&  systemInfo,
2382                           const DDGridSetup&   ddGridSetup,
2383                           const int            numPPRanks,
2384                           const gmx_mtop_t*    mtop,
2385                           const t_inputrec*    ir,
2386                           const gmx_ddbox_t&   ddbox)
2387 {
2388     gmx_domdec_comm_t* comm = dd->comm;
2389     comm->ddSettings        = ddSettings;
2390
2391     /* Initialize to GPU share count to 0, might change later */
2392     comm->nrank_gpu_shared = 0;
2393
2394     comm->dlbState = comm->ddSettings.initialDlbState;
2395     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2396     /* To consider turning DLB on after 2*nstlist steps we need to check
2397      * at partitioning count 3. Thus we need to increase the first count by 2.
2398      */
2399     comm->ddPartioningCountFirstDlbOff += 2;
2400
2401     comm->bPMELoadBalDLBLimits = FALSE;
2402
2403     /* Allocate the charge group/atom sorting struct */
2404     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2405
2406     comm->systemInfo = systemInfo;
2407
2408     if (systemInfo.useUpdateGroups)
2409     {
2410         /* Note: We would like to use dd->nnodes for the atom count estimate,
2411          *       but that is not yet available here. But this anyhow only
2412          *       affect performance up to the second dd_partition_system call.
2413          */
2414         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2415         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2416                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2417                 homeAtomCountEstimate);
2418     }
2419
2420     /* Set the DD setup given by ddGridSetup */
2421     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2422     dd->ndim = ddGridSetup.numDDDimensions;
2423     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2424
2425     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2426
2427     snew(comm->slb_frac, DIM);
2428     if (isDlbDisabled(comm))
2429     {
2430         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2431         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2432         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2433     }
2434
2435     /* Set the multi-body cut-off and cellsize limit for DLB */
2436     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2437     comm->cellsize_limit = systemInfo.cellsizeLimit;
2438     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2439     {
2440         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2441         {
2442             /* Set the bonded communication distance to halfway
2443              * the minimum and the maximum,
2444              * since the extra communication cost is nearly zero.
2445              */
2446             real acs           = average_cellsize_min(ddbox, dd->numCells);
2447             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2448             if (!isDlbDisabled(comm))
2449             {
2450                 /* Check if this does not limit the scaling */
2451                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2452             }
2453             if (!systemInfo.filterBondedCommunication)
2454             {
2455                 /* Without bBondComm do not go beyond the n.b. cut-off */
2456                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2457                 if (comm->cellsize_limit >= systemInfo.cutoff)
2458                 {
2459                     /* We don't loose a lot of efficieny
2460                      * when increasing it to the n.b. cut-off.
2461                      * It can even be slightly faster, because we need
2462                      * less checks for the communication setup.
2463                      */
2464                     comm->cutoff_mbody = systemInfo.cutoff;
2465                 }
2466             }
2467             /* Check if we did not end up below our original limit */
2468             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2469
2470             if (comm->cutoff_mbody > comm->cellsize_limit)
2471             {
2472                 comm->cellsize_limit = comm->cutoff_mbody;
2473             }
2474         }
2475         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2476     }
2477
2478     if (debug)
2479     {
2480         fprintf(debug,
2481                 "Bonded atom communication beyond the cut-off: %s\n"
2482                 "cellsize limit %f\n",
2483                 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2484     }
2485
2486     if (MASTER(cr))
2487     {
2488         check_dd_restrictions(dd, ir, mdlog);
2489     }
2490 }
2491
2492 void dd_init_bondeds(FILE*                      fplog,
2493                      gmx_domdec_t*              dd,
2494                      const gmx_mtop_t&          mtop,
2495                      const gmx_vsite_t*         vsite,
2496                      const t_inputrec*          ir,
2497                      gmx_bool                   bBCheck,
2498                      gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
2499 {
2500     gmx_domdec_comm_t* comm;
2501
2502     dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2503
2504     comm = dd->comm;
2505
2506     if (comm->systemInfo.filterBondedCommunication)
2507     {
2508         /* Communicate atoms beyond the cut-off for bonded interactions */
2509         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2510     }
2511     else
2512     {
2513         /* Only communicate atoms based on cut-off */
2514         comm->bondedLinks = nullptr;
2515     }
2516 }
2517
2518 static void writeSettings(gmx::TextWriter*   log,
2519                           gmx_domdec_t*      dd,
2520                           const gmx_mtop_t*  mtop,
2521                           const t_inputrec*  ir,
2522                           gmx_bool           bDynLoadBal,
2523                           real               dlb_scale,
2524                           const gmx_ddbox_t* ddbox)
2525 {
2526     gmx_domdec_comm_t* comm;
2527     int                d;
2528     ivec               np;
2529     real               limit, shrink;
2530
2531     comm = dd->comm;
2532
2533     if (bDynLoadBal)
2534     {
2535         log->writeString("The maximum number of communication pulses is:");
2536         for (d = 0; d < dd->ndim; d++)
2537         {
2538             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2539         }
2540         log->ensureLineBreak();
2541         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2542                                 comm->cellsize_limit);
2543         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2544         log->writeString("The allowed shrink of domain decomposition cells is:");
2545         for (d = 0; d < DIM; d++)
2546         {
2547             if (dd->numCells[d] > 1)
2548             {
2549                 if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2550                 {
2551                     shrink = 0;
2552                 }
2553                 else
2554                 {
2555                     shrink = comm->cellsize_min_dlb[d]
2556                              / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2557                 }
2558                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2559             }
2560         }
2561         log->ensureLineBreak();
2562     }
2563     else
2564     {
2565         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2566         log->writeString("The initial number of communication pulses is:");
2567         for (d = 0; d < dd->ndim; d++)
2568         {
2569             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2570         }
2571         log->ensureLineBreak();
2572         log->writeString("The initial domain decomposition cell size is:");
2573         for (d = 0; d < DIM; d++)
2574         {
2575             if (dd->numCells[d] > 1)
2576             {
2577                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2578             }
2579         }
2580         log->ensureLineBreak();
2581         log->writeLine();
2582     }
2583
2584     const bool haveInterDomainVsites =
2585             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2586
2587     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2588         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2589     {
2590         std::string decompUnits;
2591         if (comm->systemInfo.useUpdateGroups)
2592         {
2593             decompUnits = "atom groups";
2594         }
2595         else
2596         {
2597             decompUnits = "atoms";
2598         }
2599
2600         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2601                                 decompUnits.c_str());
2602         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "",
2603                                 comm->systemInfo.cutoff);
2604
2605         if (bDynLoadBal)
2606         {
2607             limit = dd->comm->cellsize_limit;
2608         }
2609         else
2610         {
2611             if (dd->unitCellInfo.ddBoxIsDynamic)
2612             {
2613                 log->writeLine(
2614                         "(the following are initial values, they could change due to box "
2615                         "deformation)");
2616             }
2617             limit = dd->comm->cellsize_min[XX];
2618             for (d = 1; d < DIM; d++)
2619             {
2620                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2621             }
2622         }
2623
2624         if (comm->systemInfo.haveInterDomainBondeds)
2625         {
2626             log->writeLineFormatted("%40s  %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2627                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2628             log->writeLineFormatted("%40s  %-7s %6.3f nm", "multi-body bonded interactions",
2629                                     "(-rdd)",
2630                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2631                                             ? comm->cutoff_mbody
2632                                             : std::min(comm->systemInfo.cutoff, limit));
2633         }
2634         if (haveInterDomainVsites)
2635         {
2636             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2637         }
2638         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2639         {
2640             std::string separation =
2641                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2642             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2643         }
2644         log->ensureLineBreak();
2645     }
2646 }
2647
2648 static void logSettings(const gmx::MDLogger& mdlog,
2649                         gmx_domdec_t*        dd,
2650                         const gmx_mtop_t*    mtop,
2651                         const t_inputrec*    ir,
2652                         real                 dlb_scale,
2653                         const gmx_ddbox_t*   ddbox)
2654 {
2655     gmx::StringOutputStream stream;
2656     gmx::TextWriter         log(&stream);
2657     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2658     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2659     {
2660         {
2661             log.ensureEmptyLine();
2662             log.writeLine(
2663                     "When dynamic load balancing gets turned on, these settings will change to:");
2664         }
2665         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2666     }
2667     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2668 }
2669
2670 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2671                                 gmx_domdec_t*        dd,
2672                                 real                 dlb_scale,
2673                                 const t_inputrec*    ir,
2674                                 const gmx_ddbox_t*   ddbox)
2675 {
2676     gmx_domdec_comm_t* comm;
2677     int                d, dim, npulse, npulse_d_max, npulse_d;
2678     gmx_bool           bNoCutOff;
2679
2680     comm = dd->comm;
2681
2682     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2683
2684     /* Determine the maximum number of comm. pulses in one dimension */
2685
2686     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2687
2688     /* Determine the maximum required number of grid pulses */
2689     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2690     {
2691         /* Only a single pulse is required */
2692         npulse = 1;
2693     }
2694     else if (!bNoCutOff && comm->cellsize_limit > 0)
2695     {
2696         /* We round down slightly here to avoid overhead due to the latency
2697          * of extra communication calls when the cut-off
2698          * would be only slightly longer than the cell size.
2699          * Later cellsize_limit is redetermined,
2700          * so we can not miss interactions due to this rounding.
2701          */
2702         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2703     }
2704     else
2705     {
2706         /* There is no cell size limit */
2707         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2708     }
2709
2710     if (!bNoCutOff && npulse > 1)
2711     {
2712         /* See if we can do with less pulses, based on dlb_scale */
2713         npulse_d_max = 0;
2714         for (d = 0; d < dd->ndim; d++)
2715         {
2716             dim      = dd->dim[d];
2717             npulse_d = static_cast<int>(
2718                     1
2719                     + dd->numCells[dim] * comm->systemInfo.cutoff
2720                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2721             npulse_d_max = std::max(npulse_d_max, npulse_d);
2722         }
2723         npulse = std::min(npulse, npulse_d_max);
2724     }
2725
2726     /* This env var can override npulse */
2727     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2728     if (d > 0)
2729     {
2730         npulse = d;
2731     }
2732
2733     comm->maxpulse       = 1;
2734     comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2735     for (d = 0; d < dd->ndim; d++)
2736     {
2737         if (comm->ddSettings.request1DAnd1Pulse)
2738         {
2739             comm->cd[d].np_dlb = 1;
2740         }
2741         else
2742         {
2743             comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2744             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2745         }
2746         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2747         {
2748             comm->bVacDLBNoLimit = FALSE;
2749         }
2750     }
2751
2752     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2753     if (!comm->bVacDLBNoLimit)
2754     {
2755         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2756     }
2757     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2758     /* Set the minimum cell size for each DD dimension */
2759     for (d = 0; d < dd->ndim; d++)
2760     {
2761         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2762         {
2763             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2764         }
2765         else
2766         {
2767             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2768         }
2769     }
2770     if (comm->cutoff_mbody <= 0)
2771     {
2772         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2773     }
2774     if (isDlbOn(comm))
2775     {
2776         set_dlb_limits(dd);
2777     }
2778 }
2779
2780 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2781 {
2782     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2783 }
2784
2785 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2786 {
2787     /* If each molecule is a single charge group
2788      * or we use domain decomposition for each periodic dimension,
2789      * we do not need to take pbc into account for the bonded interactions.
2790      */
2791     return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2792             && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2793                  && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2794 }
2795
2796 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2797 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2798                                   gmx_domdec_t*        dd,
2799                                   real                 dlb_scale,
2800                                   const gmx_mtop_t*    mtop,
2801                                   const t_inputrec*    ir,
2802                                   const gmx_ddbox_t*   ddbox)
2803 {
2804     gmx_domdec_comm_t* comm        = dd->comm;
2805     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2806
2807     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2808     {
2809         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2810         if (ddRankSetup.npmedecompdim >= 2)
2811         {
2812             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2813         }
2814     }
2815     else
2816     {
2817         ddRankSetup.numRanksDoingPme = 0;
2818         if (dd->pme_nodeid >= 0)
2819         {
2820             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2821                                  "Can not have separate PME ranks without PME electrostatics");
2822         }
2823     }
2824
2825     if (debug)
2826     {
2827         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2828     }
2829     if (!isDlbDisabled(comm))
2830     {
2831         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2832     }
2833
2834     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2835
2836     real vol_frac;
2837     if (ir->pbcType == PbcType::No)
2838     {
2839         vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2840     }
2841     else
2842     {
2843         vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2844                    / static_cast<double>(dd->nnodes);
2845     }
2846     if (debug)
2847     {
2848         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2849     }
2850     int natoms_tot = mtop->natoms;
2851
2852     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2853 }
2854
2855 /*! \brief Get some important DD parameters which can be modified by env.vars */
2856 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2857                                 const DomdecOptions&     options,
2858                                 const gmx::MdrunOptions& mdrunOptions,
2859                                 const t_inputrec&        ir)
2860 {
2861     DDSettings ddSettings;
2862
2863     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2864     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2865     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2866     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2867     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2868     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2869     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2870     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2871     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2872     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2873
2874     if (ddSettings.useSendRecv2)
2875     {
2876         GMX_LOG(mdlog.info)
2877                 .appendText(
2878                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2879                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2880                         "communication");
2881     }
2882
2883     if (ddSettings.eFlop)
2884     {
2885         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2886         ddSettings.recordLoad = true;
2887     }
2888     else
2889     {
2890         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2891     }
2892
2893     ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2894                                                           ddSettings.recordLoad, mdrunOptions, &ir);
2895     GMX_LOG(mdlog.info)
2896             .appendTextFormatted("Dynamic load balancing: %s",
2897                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2898
2899     return ddSettings;
2900 }
2901
2902 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2903
2904 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2905  *
2906  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2907  * generally requires a larger box than other possible decompositions
2908  * with the same rank count, so the calling code might need to decide
2909  * what is the most appropriate way to run the simulation based on
2910  * whether such a DD is possible.
2911  *
2912  * This function works like init_domain_decomposition(), but will not
2913  * give a fatal error, and only uses \c cr for communicating between
2914  * ranks.
2915  *
2916  * It is safe to call before thread-MPI spawns ranks, so that
2917  * thread-MPI can decide whether and how to trigger the GPU halo
2918  * exchange code path. The number of PME ranks, if any, should be set
2919  * in \c options.numPmeRanks.
2920  */
2921 static bool canMake1DAnd1PulseDomainDecomposition(const DDSettings&              ddSettingsOriginal,
2922                                                   const t_commrec*               cr,
2923                                                   const int                      numRanksRequested,
2924                                                   const DomdecOptions&           options,
2925                                                   const gmx_mtop_t&              mtop,
2926                                                   const t_inputrec&              ir,
2927                                                   const matrix                   box,
2928                                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2929 {
2930     // Ensure we don't write any output from this checking routine
2931     gmx::MDLogger dummyLogger;
2932
2933     DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2934
2935     DDSettings ddSettings             = ddSettingsOriginal;
2936     ddSettings.request1DAnd1Pulse     = true;
2937     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
2938             dummyLogger, ddSettings.request1DAnd1Pulse, !isDlbDisabled(ddSettings.initialDlbState),
2939             options.dlbScaling, ir, systemInfo.cellsizeLimit);
2940     gmx_ddbox_t ddbox = { 0 };
2941     DDGridSetup ddGridSetup =
2942             getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
2943                            gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
2944
2945     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2946
2947     return canMakeDDWith1DAnd1Pulse;
2948 }
2949
2950 bool is1DAnd1PulseDD(const gmx_domdec_t& dd)
2951 {
2952     const int maxDimensionSize = std::max(dd.numCells[XX], std::max(dd.numCells[YY], dd.numCells[ZZ]));
2953     const int  productOfDimensionSizes      = dd.numCells[XX] * dd.numCells[YY] * dd.numCells[ZZ];
2954     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2955
2956     const bool hasMax1Pulse =
2957             ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff)
2958              || (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
2959
2960     return decompositionHasOneDimension && hasMax1Pulse;
2961 }
2962
2963 namespace gmx
2964 {
2965
2966 // TODO once the functionality stablizes, move this class and
2967 // supporting functionality into builder.cpp
2968 /*! \brief Impl class for DD builder */
2969 class DomainDecompositionBuilder::Impl
2970 {
2971 public:
2972     //! Constructor
2973     Impl(const MDLogger&      mdlog,
2974          t_commrec*           cr,
2975          const DomdecOptions& options,
2976          const MdrunOptions&  mdrunOptions,
2977          bool                 prefer1DAnd1Pulse,
2978          const gmx_mtop_t&    mtop,
2979          const t_inputrec&    ir,
2980          const matrix         box,
2981          ArrayRef<const RVec> xGlobal);
2982
2983     //! Build the resulting DD manager
2984     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2985
2986     //! Objects used in constructing and configuring DD
2987     //! {
2988     //! Logging object
2989     const MDLogger& mdlog_;
2990     //! Communication object
2991     t_commrec* cr_;
2992     //! User-supplied options configuring DD behavior
2993     const DomdecOptions options_;
2994     //! Global system topology
2995     const gmx_mtop_t& mtop_;
2996     //! User input values from the tpr file
2997     const t_inputrec& ir_;
2998     //! }
2999
3000     //! Internal objects used in constructing DD
3001     //! {
3002     //! Settings combined from the user input
3003     DDSettings ddSettings_;
3004     //! Information derived from the simulation system
3005     DDSystemInfo systemInfo_;
3006     //! Box structure
3007     gmx_ddbox_t ddbox_ = { 0 };
3008     //! Organization of the DD grids
3009     DDGridSetup ddGridSetup_;
3010     //! Organzation of the DD ranks
3011     DDRankSetup ddRankSetup_;
3012     //! Number of DD cells in each dimension
3013     ivec ddCellIndex_ = { 0, 0, 0 };
3014     //! IDs of PME-only ranks
3015     std::vector<int> pmeRanks_;
3016     //! Contains a valid Cartesian-communicator-based setup, or defaults.
3017     CartesianRankSetup cartSetup_;
3018     //! }
3019 };
3020
3021 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
3022                                        t_commrec*           cr,
3023                                        const DomdecOptions& options,
3024                                        const MdrunOptions&  mdrunOptions,
3025                                        const bool           prefer1DAnd1Pulse,
3026                                        const gmx_mtop_t&    mtop,
3027                                        const t_inputrec&    ir,
3028                                        const matrix         box,
3029                                        ArrayRef<const RVec> xGlobal) :
3030     mdlog_(mdlog),
3031     cr_(cr),
3032     options_(options),
3033     mtop_(mtop),
3034     ir_(ir)
3035 {
3036     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3037
3038     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3039
3040     if (prefer1DAnd1Pulse
3041         && canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_,
3042                                                  ir_, box, xGlobal))
3043     {
3044         ddSettings_.request1DAnd1Pulse = true;
3045     }
3046
3047     if (ddSettings_.eFlop > 1)
3048     {
3049         /* Ensure that we have different random flop counts on different ranks */
3050         srand(1 + cr_->nodeid);
3051     }
3052
3053     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3054
3055     const int numRanksRequested = cr_->nnodes;
3056     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3057
3058     // DD grid setup uses a more different cell size limit for
3059     // automated setup than the one in systemInfo_. The latter is used
3060     // in set_dd_limits() to configure DLB, for example.
3061     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
3062             mdlog_, ddSettings_.request1DAnd1Pulse, !isDlbDisabled(ddSettings_.initialDlbState),
3063             options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3064     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
3065                                   gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3066     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3067
3068     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3069
3070     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3071
3072     /* Generate the group communicator, also decides the duty of each rank */
3073     cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_,
3074                                         ddCellIndex_, &pmeRanks_);
3075 }
3076
3077 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3078 {
3079     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3080
3081     copy_ivec(ddCellIndex_, dd->ci);
3082
3083     dd->comm = init_dd_comm();
3084
3085     dd->comm->ddRankSetup        = ddRankSetup_;
3086     dd->comm->cartesianRankSetup = cartSetup_;
3087
3088     set_dd_limits(mdlog_, cr_, dd, options_, ddSettings_, systemInfo_, ddGridSetup_,
3089                   ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3090
3091     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3092
3093     if (thisRankHasDuty(cr_, DUTY_PP))
3094     {
3095         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3096
3097         setup_neighbor_relations(dd);
3098     }
3099
3100     /* Set overallocation to avoid frequent reallocation of arrays */
3101     set_over_alloc_dd(TRUE);
3102
3103     dd->atomSets = atomSets;
3104
3105     return dd;
3106 }
3107
3108 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3109                                                        t_commrec*           cr,
3110                                                        const DomdecOptions& options,
3111                                                        const MdrunOptions&  mdrunOptions,
3112                                                        const bool           prefer1DAnd1Pulse,
3113                                                        const gmx_mtop_t&    mtop,
3114                                                        const t_inputrec&    ir,
3115                                                        const matrix         box,
3116                                                        ArrayRef<const RVec> xGlobal) :
3117     impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3118 {
3119 }
3120
3121 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3122 {
3123     return impl_->build(atomSets);
3124 }
3125
3126 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3127
3128 } // namespace gmx
3129
3130 static gmx_bool test_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3131 {
3132     gmx_domdec_t* dd;
3133     gmx_ddbox_t   ddbox;
3134     int           d, dim, np;
3135     real          inv_cell_size;
3136     int           LocallyLimited;
3137
3138     dd = cr->dd;
3139
3140     set_ddbox(*dd, false, box, true, x, &ddbox);
3141
3142     LocallyLimited = 0;
3143
3144     for (d = 0; d < dd->ndim; d++)
3145     {
3146         dim = dd->dim[d];
3147
3148         inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3149         if (dd->unitCellInfo.ddBoxIsDynamic)
3150         {
3151             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3152         }
3153
3154         np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3155
3156         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3157         {
3158             return FALSE;
3159         }
3160
3161         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3162         {
3163             if (np > dd->comm->cd[d].np_dlb)
3164             {
3165                 return FALSE;
3166             }
3167
3168             /* If a current local cell size is smaller than the requested
3169              * cut-off, we could still fix it, but this gets very complicated.
3170              * Without fixing here, we might actually need more checks.
3171              */
3172             real cellSizeAlongDim =
3173                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3174             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3175             {
3176                 LocallyLimited = 1;
3177             }
3178         }
3179     }
3180
3181     if (!isDlbDisabled(dd->comm))
3182     {
3183         /* If DLB is not active yet, we don't need to check the grid jumps.
3184          * Actually we shouldn't, because then the grid jump data is not set.
3185          */
3186         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3187         {
3188             LocallyLimited = 1;
3189         }
3190
3191         gmx_sumi(1, &LocallyLimited, cr);
3192
3193         if (LocallyLimited > 0)
3194         {
3195             return FALSE;
3196         }
3197     }
3198
3199     return TRUE;
3200 }
3201
3202 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3203 {
3204     gmx_bool bCutoffAllowed;
3205
3206     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3207
3208     if (bCutoffAllowed)
3209     {
3210         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3211     }
3212
3213     return bCutoffAllowed;
3214 }