src/gromacs/ewald/pme-pp.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file contains function definitions necessary for
  40  * managing the offload of long-ranged PME work to separate MPI rank,
  41  * for computing energies and forces (Coulomb and LJ).
  42  *
  43  * \author Berk Hess <hess@kth.se>
  44  * \ingroup module_ewald
  45  */
  46
  47 #include "gmxpre.h"
  48
  49 #include "config.h"
  50
  51 #include <math.h>
  52 #include <stdio.h>
  53 #include <string.h>
  54
  55 #include "gromacs/domdec/domdec.h"
  56 #include "gromacs/domdec/domdec_struct.h"
  57 #include "gromacs/ewald/pme.h"
  58 #include "gromacs/gmxlib/network.h"
  59 #include "gromacs/math/vec.h"
  60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  61 #include "gromacs/mdlib/sighandler.h"
  62 #include "gromacs/mdtypes/commrec.h"
  63 #include "gromacs/mdtypes/md_enums.h"
  64 #include "gromacs/utility/fatalerror.h"
  65 #include "gromacs/utility/gmxmpi.h"
  66 #include "gromacs/utility/smalloc.h"
  67
  68 #include "pme-internal.h"
  69
  70 /*! \brief MPI Tags used to separate communication of different types of quantities */
  71 enum {
  72     eCommType_ChargeA, eCommType_ChargeB, eCommType_SQRTC6A, eCommType_SQRTC6B,
  73     eCommType_SigmaA, eCommType_SigmaB, eCommType_NR, eCommType_COORD,
  74     eCommType_CNB
  75 };
  76
  77 //@{
  78 /*! \brief Flags used to coordinate PP-PME communication and computation phases
  79  *
  80  * Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
  81  * that the six first flags are exactly in this order.
  82  */
  83
  84 #define PP_PME_CHARGE         (1<<0)
  85 #define PP_PME_CHARGEB        (1<<1)
  86 #define PP_PME_SQRTC6         (1<<2)
  87 #define PP_PME_SQRTC6B        (1<<3)
  88 #define PP_PME_SIGMA          (1<<4)
  89 #define PP_PME_SIGMAB         (1<<5)
  90 #define PP_PME_COORD          (1<<6)
  91 #define PP_PME_ENER_VIR       (1<<9)
  92 #define PP_PME_FINISH         (1<<10)
  93 #define PP_PME_SWITCHGRID     (1<<11)
  94 #define PP_PME_RESETCOUNTERS  (1<<12)
  95
  96 #define PME_PP_SIGSTOP        (1<<0)
  97 #define PME_PP_SIGSTOPNSS     (1<<1)
  98 //@}
  99
 100 /*! \brief Master PP-PME communication data structure */
 101 struct gmx_pme_pp {
 102 #if GMX_MPI
 103     MPI_Comm     mpi_comm_mysim; /**< MPI communicator for this simulation */
 104 #endif
 105     int          nnode;          /**< The number of PP node to communicate with  */
 106     int         *node;           /**< The PP node ranks                          */
 107     int          node_peer;      /**< The peer PP node rank                      */
 108     int         *nat;            /**< The number of atom for each PP node        */
 109     //@{
 110     /**< Vectors of A- and B-state parameters used to transfer vectors to PME ranks  */
 111     real        *chargeA;
 112     real        *chargeB;
 113     real        *sqrt_c6A;
 114     real        *sqrt_c6B;
 115     real        *sigmaA;
 116     real        *sigmaB;
 117     //@}
 118     rvec        *x;             /**< Vector of atom coordinates to transfer to PME ranks */
 119     rvec        *f;             /**< Vector of atom forces received from PME ranks */
 120     int          nalloc;        /**< Allocation size of transfer vectors (>= \p nat) */
 121 #if GMX_MPI
 122     //@{
 123     /**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
 124     MPI_Request *req;
 125     MPI_Status  *stat;
 126     //@}
 127 #endif
 128 };
 129
 130 /*! \brief Helper struct for PP-PME communication of parameters */
 131 struct gmx_pme_comm_n_box_t {
 132     int             natoms;     /**< Number of atoms */
 133     matrix          box;        /**< Box */
 134     int             maxshift_x; /**< Maximum shift in x direction */
 135     int             maxshift_y; /**< Maximum shift in y direction */
 136     real            lambda_q;   /**< Free-energy lambda for electrostatics */
 137     real            lambda_lj;  /**< Free-energy lambda for Lennard-Jones */
 138     unsigned int    flags;      /**< Control flags */
 139     gmx_int64_t     step;       /**< MD integration step number */
 140     //@{
 141     /*! \brief Used in PME grid tuning */
 142     ivec            grid_size;
 143     real            ewaldcoeff_q;
 144     real            ewaldcoeff_lj;
 145     //@}
 146 };
 147
 148 /*! \brief Helper struct for PP-PME communication of virial and energy */
 149 typedef struct {
 150     //@{
 151     /*! \brief Virial, energy, and derivative of potential w.r.t. lambda for charge and Lennard-Jones */
 152     matrix          vir_q;
 153     matrix          vir_lj;
 154     real            energy_q;
 155     real            energy_lj;
 156     real            dvdlambda_q;
 157     real            dvdlambda_lj;
 158     //@}
 159     float           cycles;     /**< Counter of CPU cycles used */
 160     gmx_stop_cond_t stop_cond;  /**< Flag used in responding to an external signal to terminate */
 161 } gmx_pme_comm_vir_ene_t;
 162
 163 gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
 164 {
 165     struct gmx_pme_pp *pme_pp;
 166
 167     snew(pme_pp, 1);
 168
 169 #if GMX_MPI
 170     int rank;
 171
 172     pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
 173     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 174     get_pme_ddnodes(cr, rank, &pme_pp->nnode, &pme_pp->node, &pme_pp->node_peer);
 175     snew(pme_pp->nat, pme_pp->nnode);
 176     snew(pme_pp->req, eCommType_NR*pme_pp->nnode);
 177     snew(pme_pp->stat, eCommType_NR*pme_pp->nnode);
 178     pme_pp->nalloc       = 0;
 179 #else
 180     GMX_UNUSED_VALUE(cr);
 181 #endif
 182
 183     return pme_pp;
 184 }
 185
 186 /*! \brief Block to wait for communication to PME ranks to complete
 187  *
 188  * This should be faster with a real non-blocking MPI implementation */
 189 /* #define GMX_PME_DELAYED_WAIT */
 190
 191 static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused *dd)
 192 {
 193 #if GMX_MPI
 194     if (dd->nreq_pme)
 195     {
 196         MPI_Waitall(dd->nreq_pme, dd->req_pme, MPI_STATUSES_IGNORE);
 197         dd->nreq_pme = 0;
 198     }
 199 #endif
 200 }
 201
 202 /*! \brief Send data to PME ranks */
 203 static void gmx_pme_send_coeffs_coords(t_commrec *cr, unsigned int flags,
 204                                        real gmx_unused *chargeA, real gmx_unused *chargeB,
 205                                        real gmx_unused *c6A, real gmx_unused *c6B,
 206                                        real gmx_unused *sigmaA, real gmx_unused *sigmaB,
 207                                        matrix box, rvec gmx_unused *x,
 208                                        real lambda_q, real lambda_lj,
 209                                        int maxshift_x, int maxshift_y,
 210                                        gmx_int64_t step)
 211 {
 212     gmx_domdec_t         *dd;
 213     gmx_pme_comm_n_box_t *cnb;
 214     int                   n;
 215
 216     dd = cr->dd;
 217     n  = dd->nat_home;
 218
 219     if (debug)
 220     {
 221         fprintf(debug, "PP rank %d sending to PME rank %d: %d%s%s%s%s\n",
 222                 cr->sim_nodeid, dd->pme_nodeid, n,
 223                 (flags & PP_PME_CHARGE) ? " charges" : "",
 224                 (flags & PP_PME_SQRTC6) ? " sqrtC6" : "",
 225                 (flags & PP_PME_SIGMA)  ? " sigma" : "",
 226                 (flags & PP_PME_COORD)  ? " coordinates" : "");
 227     }
 228
 229 #ifdef GMX_PME_DELAYED_WAIT
 230     /* When can not use cnb until pending communication has finished */
 231     gmx_pme_send_coeffs_coords_wait(dd);
 232 #endif
 233
 234     if (dd->pme_receive_vir_ener)
 235     {
 236         /* Peer PP node: communicate all data */
 237         if (dd->cnb == nullptr)
 238         {
 239             snew(dd->cnb, 1);
 240         }
 241         cnb = dd->cnb;
 242
 243         cnb->flags      = flags;
 244         cnb->natoms     = n;
 245         cnb->maxshift_x = maxshift_x;
 246         cnb->maxshift_y = maxshift_y;
 247         cnb->lambda_q   = lambda_q;
 248         cnb->lambda_lj  = lambda_lj;
 249         cnb->step       = step;
 250         if (flags & PP_PME_COORD)
 251         {
 252             copy_mat(box, cnb->box);
 253         }
 254 #if GMX_MPI
 255         MPI_Isend(cnb, sizeof(*cnb), MPI_BYTE,
 256                   dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
 257                   &dd->req_pme[dd->nreq_pme++]);
 258 #endif
 259     }
 260     else if (flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
 261     {
 262 #if GMX_MPI
 263         /* Communicate only the number of atoms */
 264         MPI_Isend(&n, sizeof(n), MPI_BYTE,
 265                   dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
 266                   &dd->req_pme[dd->nreq_pme++]);
 267 #endif
 268     }
 269
 270 #if GMX_MPI
 271     if (n > 0)
 272     {
 273         if (flags & PP_PME_CHARGE)
 274         {
 275             MPI_Isend(chargeA, n*sizeof(real), MPI_BYTE,
 276                       dd->pme_nodeid, eCommType_ChargeA, cr->mpi_comm_mysim,
 277                       &dd->req_pme[dd->nreq_pme++]);
 278         }
 279         if (flags & PP_PME_CHARGEB)
 280         {
 281             MPI_Isend(chargeB, n*sizeof(real), MPI_BYTE,
 282                       dd->pme_nodeid, eCommType_ChargeB, cr->mpi_comm_mysim,
 283                       &dd->req_pme[dd->nreq_pme++]);
 284         }
 285         if (flags & PP_PME_SQRTC6)
 286         {
 287             MPI_Isend(c6A, n*sizeof(real), MPI_BYTE,
 288                       dd->pme_nodeid, eCommType_SQRTC6A, cr->mpi_comm_mysim,
 289                       &dd->req_pme[dd->nreq_pme++]);
 290         }
 291         if (flags & PP_PME_SQRTC6B)
 292         {
 293             MPI_Isend(c6B, n*sizeof(real), MPI_BYTE,
 294                       dd->pme_nodeid, eCommType_SQRTC6B, cr->mpi_comm_mysim,
 295                       &dd->req_pme[dd->nreq_pme++]);
 296         }
 297         if (flags & PP_PME_SIGMA)
 298         {
 299             MPI_Isend(sigmaA, n*sizeof(real), MPI_BYTE,
 300                       dd->pme_nodeid, eCommType_SigmaA, cr->mpi_comm_mysim,
 301                       &dd->req_pme[dd->nreq_pme++]);
 302         }
 303         if (flags & PP_PME_SIGMAB)
 304         {
 305             MPI_Isend(sigmaB, n*sizeof(real), MPI_BYTE,
 306                       dd->pme_nodeid, eCommType_SigmaB, cr->mpi_comm_mysim,
 307                       &dd->req_pme[dd->nreq_pme++]);
 308         }
 309         if (flags & PP_PME_COORD)
 310         {
 311             MPI_Isend(x[0], n*sizeof(rvec), MPI_BYTE,
 312                       dd->pme_nodeid, eCommType_COORD, cr->mpi_comm_mysim,
 313                       &dd->req_pme[dd->nreq_pme++]);
 314         }
 315     }
 316
 317 #ifndef GMX_PME_DELAYED_WAIT
 318     /* Wait for the data to arrive */
 319     /* We can skip this wait as we are sure x and q will not be modified
 320      * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
 321      */
 322     gmx_pme_send_coeffs_coords_wait(dd);
 323 #endif
 324 #endif
 325 }
 326
 327 void gmx_pme_send_parameters(t_commrec *cr,
 328                              const interaction_const_t *ic,
 329                              gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
 330                              real *chargeA, real *chargeB,
 331                              real *sqrt_c6A, real *sqrt_c6B,
 332                              real *sigmaA, real *sigmaB,
 333                              int maxshift_x, int maxshift_y)
 334 {
 335     unsigned int flags = 0;
 336
 337     if (EEL_PME(ic->eeltype))
 338     {
 339         flags |= PP_PME_CHARGE;
 340     }
 341     if (EVDW_PME(ic->vdwtype))
 342     {
 343         flags |= (PP_PME_SQRTC6 | PP_PME_SIGMA);
 344     }
 345     if (bFreeEnergy_q || bFreeEnergy_lj)
 346     {
 347         /* Assumes that the B state flags are in the bits just above
 348          * the ones for the A state. */
 349         flags |= (flags << 1);
 350     }
 351
 352     gmx_pme_send_coeffs_coords(cr, flags,
 353                                chargeA, chargeB,
 354                                sqrt_c6A, sqrt_c6B, sigmaA, sigmaB,
 355                                nullptr, nullptr, 0, 0, maxshift_x, maxshift_y, -1);
 356 }
 357
 358 void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
 359                               real lambda_q, real lambda_lj,
 360                               gmx_bool bEnerVir,
 361                               gmx_int64_t step)
 362 {
 363     unsigned int flags = PP_PME_COORD;
 364     if (bEnerVir)
 365     {
 366         flags |= PP_PME_ENER_VIR;
 367     }
 368     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
 369                                box, x, lambda_q, lambda_lj, 0, 0, step);
 370 }
 371
 372 void gmx_pme_send_finish(t_commrec *cr)
 373 {
 374     unsigned int flags = PP_PME_FINISH;
 375
 376     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, 0, -1);
 377 }
 378
 379 void gmx_pme_send_switchgrid(t_commrec gmx_unused *cr,
 380                              ivec gmx_unused       grid_size,
 381                              real gmx_unused       ewaldcoeff_q,
 382                              real gmx_unused       ewaldcoeff_lj)
 383 {
 384 #if GMX_MPI
 385     gmx_pme_comm_n_box_t cnb;
 386
 387     /* Only let one PP node signal each PME node */
 388     if (cr->dd->pme_receive_vir_ener)
 389     {
 390         cnb.flags = PP_PME_SWITCHGRID;
 391         copy_ivec(grid_size, cnb.grid_size);
 392         cnb.ewaldcoeff_q  = ewaldcoeff_q;
 393         cnb.ewaldcoeff_lj = ewaldcoeff_lj;
 394
 395         /* We send this, uncommon, message blocking to simplify the code */
 396         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
 397                  cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
 398     }
 399 #endif
 400 }
 401
 402 void gmx_pme_send_resetcounters(t_commrec gmx_unused *cr, gmx_int64_t gmx_unused step)
 403 {
 404 #if GMX_MPI
 405     gmx_pme_comm_n_box_t cnb;
 406
 407     /* Only let one PP node signal each PME node */
 408     if (cr->dd->pme_receive_vir_ener)
 409     {
 410         cnb.flags = PP_PME_RESETCOUNTERS;
 411         cnb.step  = step;
 412
 413         /* We send this, uncommon, message blocking to simplify the code */
 414         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
 415                  cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
 416     }
 417 #endif
 418 }
 419
 420 int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
 421                                int               *natoms,
 422                                real             **chargeA,
 423                                real             **chargeB,
 424                                real             **sqrt_c6A,
 425                                real             **sqrt_c6B,
 426                                real             **sigmaA,
 427                                real             **sigmaB,
 428                                matrix             box,
 429                                rvec             **x,
 430                                rvec             **f,
 431                                int               *maxshift_x,
 432                                int               *maxshift_y,
 433                                real              *lambda_q,
 434                                real              *lambda_lj,
 435                                gmx_bool          *bEnerVir,
 436                                gmx_int64_t       *step,
 437                                ivec               grid_size,
 438                                real              *ewaldcoeff_q,
 439                                real              *ewaldcoeff_lj,
 440                                bool              *atomSetChanged)
 441 {
 442     int status = -1;
 443     int nat    = 0;
 444
 445 #if GMX_MPI
 446     unsigned int flags    = 0;
 447     int          messages = 0;
 448
 449     do
 450     {
 451         gmx_pme_comm_n_box_t cnb;
 452         cnb.flags = 0;
 453
 454         /* Receive the send count, box and time step from the peer PP node */
 455         MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
 456                  pme_pp->node_peer, eCommType_CNB,
 457                  pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE);
 458
 459         /* We accumulate all received flags */
 460         flags |= cnb.flags;
 461
 462         *step  = cnb.step;
 463
 464         if (debug)
 465         {
 466             fprintf(debug, "PME only rank receiving:%s%s%s%s%s\n",
 467                     (cnb.flags & PP_PME_CHARGE)        ? " charges" : "",
 468                     (cnb.flags & PP_PME_COORD )        ? " coordinates" : "",
 469                     (cnb.flags & PP_PME_FINISH)        ? " finish" : "",
 470                     (cnb.flags & PP_PME_SWITCHGRID)    ? " switch grid" : "",
 471                     (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
 472         }
 473
 474         if (cnb.flags & PP_PME_FINISH)
 475         {
 476             status = pmerecvqxFINISH;
 477         }
 478
 479         if (cnb.flags & PP_PME_SWITCHGRID)
 480         {
 481             /* Special case, receive the new parameters and return */
 482             copy_ivec(cnb.grid_size, grid_size);
 483             *ewaldcoeff_q  = cnb.ewaldcoeff_q;
 484             *ewaldcoeff_lj = cnb.ewaldcoeff_lj;
 485
 486             status         = pmerecvqxSWITCHGRID;
 487         }
 488
 489         if (cnb.flags & PP_PME_RESETCOUNTERS)
 490         {
 491             /* Special case, receive the step (set above) and return */
 492             status = pmerecvqxRESETCOUNTERS;
 493         }
 494
 495         if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
 496         {
 497             *atomSetChanged = true;
 498
 499             /* Receive the send counts from the other PP nodes */
 500             for (int sender = 0; sender < pme_pp->nnode; sender++)
 501             {
 502                 if (pme_pp->node[sender] == pme_pp->node_peer)
 503                 {
 504                     pme_pp->nat[sender] = cnb.natoms;
 505                 }
 506                 else
 507                 {
 508                     MPI_Irecv(&(pme_pp->nat[sender]), sizeof(pme_pp->nat[0]),
 509                               MPI_BYTE,
 510                               pme_pp->node[sender], eCommType_CNB,
 511                               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 512                 }
 513             }
 514             MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
 515             messages = 0;
 516
 517             nat = 0;
 518             for (int sender = 0; sender < pme_pp->nnode; sender++)
 519             {
 520                 nat += pme_pp->nat[sender];
 521             }
 522
 523             if (nat > pme_pp->nalloc)
 524             {
 525                 pme_pp->nalloc = over_alloc_dd(nat);
 526                 if (cnb.flags & PP_PME_CHARGE)
 527                 {
 528                     srenew(pme_pp->chargeA, pme_pp->nalloc);
 529                 }
 530                 if (cnb.flags & PP_PME_CHARGEB)
 531                 {
 532                     srenew(pme_pp->chargeB, pme_pp->nalloc);
 533                 }
 534                 if (cnb.flags & PP_PME_SQRTC6)
 535                 {
 536                     srenew(pme_pp->sqrt_c6A, pme_pp->nalloc);
 537                 }
 538                 if (cnb.flags & PP_PME_SQRTC6B)
 539                 {
 540                     srenew(pme_pp->sqrt_c6B, pme_pp->nalloc);
 541                 }
 542                 if (cnb.flags & PP_PME_SIGMA)
 543                 {
 544                     srenew(pme_pp->sigmaA, pme_pp->nalloc);
 545                 }
 546                 if (cnb.flags & PP_PME_SIGMAB)
 547                 {
 548                     srenew(pme_pp->sigmaB, pme_pp->nalloc);
 549                 }
 550                 srenew(pme_pp->x, pme_pp->nalloc);
 551                 srenew(pme_pp->f, pme_pp->nalloc);
 552             }
 553
 554             /* maxshift is sent when the charges are sent */
 555             *maxshift_x = cnb.maxshift_x;
 556             *maxshift_y = cnb.maxshift_y;
 557
 558             /* Receive the charges in place */
 559             for (int q = 0; q < eCommType_NR; q++)
 560             {
 561                 real *charge_pp;
 562
 563                 if (!(cnb.flags & (PP_PME_CHARGE<<q)))
 564                 {
 565                     continue;
 566                 }
 567                 switch (q)
 568                 {
 569                     case eCommType_ChargeA: charge_pp = pme_pp->chargeA;  break;
 570                     case eCommType_ChargeB: charge_pp = pme_pp->chargeB;  break;
 571                     case eCommType_SQRTC6A: charge_pp = pme_pp->sqrt_c6A; break;
 572                     case eCommType_SQRTC6B: charge_pp = pme_pp->sqrt_c6B; break;
 573                     case eCommType_SigmaA:  charge_pp = pme_pp->sigmaA;   break;
 574                     case eCommType_SigmaB:  charge_pp = pme_pp->sigmaB;   break;
 575                     default: gmx_incons("Wrong eCommType");
 576                 }
 577                 nat = 0;
 578                 for (int sender = 0; sender < pme_pp->nnode; sender++)
 579                 {
 580                     if (pme_pp->nat[sender] > 0)
 581                     {
 582                         MPI_Irecv(charge_pp+nat,
 583                                   pme_pp->nat[sender]*sizeof(real),
 584                                   MPI_BYTE,
 585                                   pme_pp->node[sender], q,
 586                                   pme_pp->mpi_comm_mysim,
 587                                   &pme_pp->req[messages++]);
 588                         nat += pme_pp->nat[sender];
 589                         if (debug)
 590                         {
 591                             fprintf(debug, "Received from PP rank %d: %d %s\n",
 592                                     pme_pp->node[sender], pme_pp->nat[sender],
 593                                     (q == eCommType_ChargeA ||
 594                                      q == eCommType_ChargeB) ? "charges" : "params");
 595                         }
 596                     }
 597                 }
 598             }
 599         }
 600
 601         if (cnb.flags & PP_PME_COORD)
 602         {
 603             /* The box, FE flag and lambda are sent along with the coordinates
 604              *  */
 605             copy_mat(cnb.box, box);
 606             *lambda_q       = cnb.lambda_q;
 607             *lambda_lj      = cnb.lambda_lj;
 608             *bEnerVir       = (cnb.flags & PP_PME_ENER_VIR);
 609             *step           = cnb.step;
 610
 611             /* Receive the coordinates in place */
 612             nat = 0;
 613             for (int sender = 0; sender < pme_pp->nnode; sender++)
 614             {
 615                 if (pme_pp->nat[sender] > 0)
 616                 {
 617                     MPI_Irecv(pme_pp->x[nat], pme_pp->nat[sender]*sizeof(rvec),
 618                               MPI_BYTE,
 619                               pme_pp->node[sender], eCommType_COORD,
 620                               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 621                     nat += pme_pp->nat[sender];
 622                     if (debug)
 623                     {
 624                         fprintf(debug, "Received from PP rank %d: %d "
 625                                 "coordinates\n",
 626                                 pme_pp->node[sender], pme_pp->nat[sender]);
 627                     }
 628                 }
 629             }
 630
 631             status = pmerecvqxX;
 632         }
 633
 634         /* Wait for the coordinates and/or charges to arrive */
 635         MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
 636         messages = 0;
 637     }
 638     while (status == -1);
 639 #else
 640     GMX_UNUSED_VALUE(box);
 641     GMX_UNUSED_VALUE(maxshift_x);
 642     GMX_UNUSED_VALUE(maxshift_y);
 643     GMX_UNUSED_VALUE(lambda_q);
 644     GMX_UNUSED_VALUE(lambda_lj);
 645     GMX_UNUSED_VALUE(bEnerVir);
 646     GMX_UNUSED_VALUE(step);
 647     GMX_UNUSED_VALUE(grid_size);
 648     GMX_UNUSED_VALUE(ewaldcoeff_q);
 649     GMX_UNUSED_VALUE(ewaldcoeff_lj);
 650     GMX_UNUSED_VALUE(atomSetChanged);
 651
 652     status = pmerecvqxX;
 653 #endif
 654
 655     if (status == pmerecvqxX)
 656     {
 657         *natoms   = nat;
 658         *chargeA  = pme_pp->chargeA;
 659         *chargeB  = pme_pp->chargeB;
 660         *sqrt_c6A = pme_pp->sqrt_c6A;
 661         *sqrt_c6B = pme_pp->sqrt_c6B;
 662         *sigmaA   = pme_pp->sigmaA;
 663         *sigmaB   = pme_pp->sigmaB;
 664         *x        = pme_pp->x;
 665         *f        = pme_pp->f;
 666     }
 667
 668     return status;
 669 }
 670 /*! \brief Receive virial and energy from PME rank */
 671 static void receive_virial_energy(t_commrec *cr,
 672                                   matrix vir_q, real *energy_q,
 673                                   matrix vir_lj, real *energy_lj,
 674                                   real *dvdlambda_q, real *dvdlambda_lj,
 675                                   float *pme_cycles)
 676 {
 677     gmx_pme_comm_vir_ene_t cve;
 678
 679     if (cr->dd->pme_receive_vir_ener)
 680     {
 681         if (debug)
 682         {
 683             fprintf(debug,
 684                     "PP rank %d receiving from PME rank %d: virial and energy\n",
 685                     cr->sim_nodeid, cr->dd->pme_nodeid);
 686         }
 687 #if GMX_MPI
 688         MPI_Recv(&cve, sizeof(cve), MPI_BYTE, cr->dd->pme_nodeid, 1, cr->mpi_comm_mysim,
 689                  MPI_STATUS_IGNORE);
 690 #else
 691         memset(&cve, 0, sizeof(cve));
 692 #endif
 693
 694         m_add(vir_q, cve.vir_q, vir_q);
 695         m_add(vir_lj, cve.vir_lj, vir_lj);
 696         *energy_q      = cve.energy_q;
 697         *energy_lj     = cve.energy_lj;
 698         *dvdlambda_q  += cve.dvdlambda_q;
 699         *dvdlambda_lj += cve.dvdlambda_lj;
 700         *pme_cycles    = cve.cycles;
 701
 702         if (cve.stop_cond != gmx_stop_cond_none)
 703         {
 704             gmx_set_stop_condition(cve.stop_cond);
 705         }
 706     }
 707     else
 708     {
 709         *energy_q   = 0;
 710         *energy_lj  = 0;
 711         *pme_cycles = 0;
 712     }
 713 }
 714
 715 void gmx_pme_receive_f(t_commrec *cr,
 716                        rvec f[], matrix vir_q, real *energy_q,
 717                        matrix vir_lj, real *energy_lj,
 718                        real *dvdlambda_q, real *dvdlambda_lj,
 719                        float *pme_cycles)
 720 {
 721 #ifdef GMX_PME_DELAYED_WAIT
 722     /* Wait for the x request to finish */
 723     gmx_pme_send_coeffs_coords_wait(cr->dd);
 724 #endif
 725
 726     int natoms = cr->dd->nat_home;
 727
 728     if (natoms > cr->dd->pme_recv_f_alloc)
 729     {
 730         cr->dd->pme_recv_f_alloc = over_alloc_dd(natoms);
 731         srenew(cr->dd->pme_recv_f_buf, cr->dd->pme_recv_f_alloc);
 732     }
 733
 734 #if GMX_MPI
 735     MPI_Recv(cr->dd->pme_recv_f_buf[0],
 736              natoms*sizeof(rvec), MPI_BYTE,
 737              cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim,
 738              MPI_STATUS_IGNORE);
 739 #endif
 740
 741     int nt = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, natoms);
 742
 743     /* Note that we would like to avoid this conditional by putting it
 744      * into the omp pragma instead, but then we still take the full
 745      * omp parallel for overhead (at least with gcc5).
 746      */
 747     if (nt == 1)
 748     {
 749         for (int i = 0; i < natoms; i++)
 750         {
 751             rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
 752         }
 753     }
 754     else
 755     {
 756 #pragma omp parallel for num_threads(nt) schedule(static)
 757         for (int i = 0; i < natoms; i++)
 758         {
 759             rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
 760         }
 761     }
 762
 763     receive_virial_energy(cr, vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, pme_cycles);
 764 }
 765
 766 void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
 767                                  rvec gmx_unused *f,
 768                                  matrix vir_q, real energy_q,
 769                                  matrix vir_lj, real energy_lj,
 770                                  real dvdlambda_q, real dvdlambda_lj,
 771                                  float cycles)
 772 {
 773 #if GMX_MPI
 774     gmx_pme_comm_vir_ene_t cve;
 775     int                    messages, ind_start, ind_end;
 776     cve.cycles = cycles;
 777
 778     /* Now the evaluated forces have to be transferred to the PP nodes */
 779     messages = 0;
 780     ind_end  = 0;
 781     for (int receiver = 0; receiver < pme_pp->nnode; receiver++)
 782     {
 783         ind_start = ind_end;
 784         ind_end   = ind_start + pme_pp->nat[receiver];
 785         if (MPI_Isend(f[ind_start], (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
 786                       pme_pp->node[receiver], 0,
 787                       pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
 788         {
 789             gmx_comm("MPI_Isend failed in do_pmeonly");
 790         }
 791     }
 792
 793     /* send virial and energy to our last PP node */
 794     copy_mat(vir_q, cve.vir_q);
 795     copy_mat(vir_lj, cve.vir_lj);
 796     cve.energy_q     = energy_q;
 797     cve.energy_lj    = energy_lj;
 798     cve.dvdlambda_q  = dvdlambda_q;
 799     cve.dvdlambda_lj = dvdlambda_lj;
 800     /* check for the signals to send back to a PP node */
 801     cve.stop_cond = gmx_get_stop_condition();
 802
 803     cve.cycles = cycles;
 804
 805     if (debug)
 806     {
 807         fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
 808                 pme_pp->node_peer);
 809     }
 810     MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
 811               pme_pp->node_peer, 1,
 812               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 813
 814     /* Wait for the forces to arrive */
 815     MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
 816 #else
 817     gmx_call("MPI not enabled");
 818     GMX_UNUSED_VALUE(pme_pp);
 819     GMX_UNUSED_VALUE(f);
 820     GMX_UNUSED_VALUE(vir_q);
 821     GMX_UNUSED_VALUE(energy_q);
 822     GMX_UNUSED_VALUE(vir_lj);
 823     GMX_UNUSED_VALUE(energy_lj);
 824     GMX_UNUSED_VALUE(dvdlambda_q);
 825     GMX_UNUSED_VALUE(dvdlambda_lj);
 826     GMX_UNUSED_VALUE(cycles);
 827 #endif
 828 }