PME GPU/CUDA data framework.
[gromacs.git] / src / gromacs / ewald / pme-pp.cpp
blob6a1de5199c103b647d309fda4fa0c318bcc2a68c
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \internal \file
39 * \brief This file contains function definitions necessary for
40 * managing the offload of long-ranged PME work to separate MPI rank,
41 * for computing energies and forces (Coulomb and LJ).
43 * \author Berk Hess <hess@kth.se>
44 * \ingroup module_ewald
47 #include "gmxpre.h"
49 #include "config.h"
51 #include <math.h>
52 #include <stdio.h>
53 #include <string.h>
55 #include "gromacs/domdec/domdec.h"
56 #include "gromacs/domdec/domdec_struct.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/math/vec.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/sighandler.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/utility/fatalerror.h"
65 #include "gromacs/utility/gmxmpi.h"
66 #include "gromacs/utility/smalloc.h"
68 #include "pme-internal.h"
70 /*! \brief MPI Tags used to separate communication of different types of quantities */
71 enum {
72 eCommType_ChargeA, eCommType_ChargeB, eCommType_SQRTC6A, eCommType_SQRTC6B,
73 eCommType_SigmaA, eCommType_SigmaB, eCommType_NR, eCommType_COORD,
74 eCommType_CNB
77 //@{
78 /*! \brief Flags used to coordinate PP-PME communication and computation phases
80 * Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
81 * that the six first flags are exactly in this order.
84 #define PP_PME_CHARGE (1<<0)
85 #define PP_PME_CHARGEB (1<<1)
86 #define PP_PME_SQRTC6 (1<<2)
87 #define PP_PME_SQRTC6B (1<<3)
88 #define PP_PME_SIGMA (1<<4)
89 #define PP_PME_SIGMAB (1<<5)
90 #define PP_PME_COORD (1<<6)
91 #define PP_PME_ENER_VIR (1<<9)
92 #define PP_PME_FINISH (1<<10)
93 #define PP_PME_SWITCHGRID (1<<11)
94 #define PP_PME_RESETCOUNTERS (1<<12)
96 #define PME_PP_SIGSTOP (1<<0)
97 #define PME_PP_SIGSTOPNSS (1<<1)
98 //@}
100 /*! \brief Master PP-PME communication data structure */
101 struct gmx_pme_pp {
102 #if GMX_MPI
103 MPI_Comm mpi_comm_mysim; /**< MPI communicator for this simulation */
104 #endif
105 int nnode; /**< The number of PP node to communicate with */
106 int *node; /**< The PP node ranks */
107 int node_peer; /**< The peer PP node rank */
108 int *nat; /**< The number of atom for each PP node */
109 //@{
110 /**< Vectors of A- and B-state parameters used to transfer vectors to PME ranks */
111 real *chargeA;
112 real *chargeB;
113 real *sqrt_c6A;
114 real *sqrt_c6B;
115 real *sigmaA;
116 real *sigmaB;
117 //@}
118 rvec *x; /**< Vector of atom coordinates to transfer to PME ranks */
119 rvec *f; /**< Vector of atom forces received from PME ranks */
120 int nalloc; /**< Allocation size of transfer vectors (>= \p nat) */
121 #if GMX_MPI
122 //@{
123 /**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
124 MPI_Request *req;
125 MPI_Status *stat;
126 //@}
127 #endif
130 /*! \brief Helper struct for PP-PME communication of parameters */
131 struct gmx_pme_comm_n_box_t {
132 int natoms; /**< Number of atoms */
133 matrix box; /**< Box */
134 int maxshift_x; /**< Maximum shift in x direction */
135 int maxshift_y; /**< Maximum shift in y direction */
136 real lambda_q; /**< Free-energy lambda for electrostatics */
137 real lambda_lj; /**< Free-energy lambda for Lennard-Jones */
138 unsigned int flags; /**< Control flags */
139 gmx_int64_t step; /**< MD integration step number */
140 //@{
141 /*! \brief Used in PME grid tuning */
142 ivec grid_size;
143 real ewaldcoeff_q;
144 real ewaldcoeff_lj;
145 //@}
148 /*! \brief Helper struct for PP-PME communication of virial and energy */
149 typedef struct {
150 //@{
151 /*! \brief Virial, energy, and derivative of potential w.r.t. lambda for charge and Lennard-Jones */
152 matrix vir_q;
153 matrix vir_lj;
154 real energy_q;
155 real energy_lj;
156 real dvdlambda_q;
157 real dvdlambda_lj;
158 //@}
159 float cycles; /**< Counter of CPU cycles used */
160 gmx_stop_cond_t stop_cond; /**< Flag used in responding to an external signal to terminate */
161 } gmx_pme_comm_vir_ene_t;
163 gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
165 struct gmx_pme_pp *pme_pp;
167 snew(pme_pp, 1);
169 #if GMX_MPI
170 int rank;
172 pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
173 MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
174 get_pme_ddnodes(cr, rank, &pme_pp->nnode, &pme_pp->node, &pme_pp->node_peer);
175 snew(pme_pp->nat, pme_pp->nnode);
176 snew(pme_pp->req, eCommType_NR*pme_pp->nnode);
177 snew(pme_pp->stat, eCommType_NR*pme_pp->nnode);
178 pme_pp->nalloc = 0;
179 #else
180 GMX_UNUSED_VALUE(cr);
181 #endif
183 return pme_pp;
186 /*! \brief Block to wait for communication to PME ranks to complete
188 * This should be faster with a real non-blocking MPI implementation */
189 /* #define GMX_PME_DELAYED_WAIT */
191 static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused *dd)
193 #if GMX_MPI
194 if (dd->nreq_pme)
196 MPI_Waitall(dd->nreq_pme, dd->req_pme, MPI_STATUSES_IGNORE);
197 dd->nreq_pme = 0;
199 #endif
202 /*! \brief Send data to PME ranks */
203 static void gmx_pme_send_coeffs_coords(t_commrec *cr, unsigned int flags,
204 real gmx_unused *chargeA, real gmx_unused *chargeB,
205 real gmx_unused *c6A, real gmx_unused *c6B,
206 real gmx_unused *sigmaA, real gmx_unused *sigmaB,
207 matrix box, rvec gmx_unused *x,
208 real lambda_q, real lambda_lj,
209 int maxshift_x, int maxshift_y,
210 gmx_int64_t step)
212 gmx_domdec_t *dd;
213 gmx_pme_comm_n_box_t *cnb;
214 int n;
216 dd = cr->dd;
217 n = dd->nat_home;
219 if (debug)
221 fprintf(debug, "PP rank %d sending to PME rank %d: %d%s%s%s%s\n",
222 cr->sim_nodeid, dd->pme_nodeid, n,
223 (flags & PP_PME_CHARGE) ? " charges" : "",
224 (flags & PP_PME_SQRTC6) ? " sqrtC6" : "",
225 (flags & PP_PME_SIGMA) ? " sigma" : "",
226 (flags & PP_PME_COORD) ? " coordinates" : "");
229 #ifdef GMX_PME_DELAYED_WAIT
230 /* When can not use cnb until pending communication has finished */
231 gmx_pme_send_coeffs_coords_wait(dd);
232 #endif
234 if (dd->pme_receive_vir_ener)
236 /* Peer PP node: communicate all data */
237 if (dd->cnb == nullptr)
239 snew(dd->cnb, 1);
241 cnb = dd->cnb;
243 cnb->flags = flags;
244 cnb->natoms = n;
245 cnb->maxshift_x = maxshift_x;
246 cnb->maxshift_y = maxshift_y;
247 cnb->lambda_q = lambda_q;
248 cnb->lambda_lj = lambda_lj;
249 cnb->step = step;
250 if (flags & PP_PME_COORD)
252 copy_mat(box, cnb->box);
254 #if GMX_MPI
255 MPI_Isend(cnb, sizeof(*cnb), MPI_BYTE,
256 dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
257 &dd->req_pme[dd->nreq_pme++]);
258 #endif
260 else if (flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
262 #if GMX_MPI
263 /* Communicate only the number of atoms */
264 MPI_Isend(&n, sizeof(n), MPI_BYTE,
265 dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
266 &dd->req_pme[dd->nreq_pme++]);
267 #endif
270 #if GMX_MPI
271 if (n > 0)
273 if (flags & PP_PME_CHARGE)
275 MPI_Isend(chargeA, n*sizeof(real), MPI_BYTE,
276 dd->pme_nodeid, eCommType_ChargeA, cr->mpi_comm_mysim,
277 &dd->req_pme[dd->nreq_pme++]);
279 if (flags & PP_PME_CHARGEB)
281 MPI_Isend(chargeB, n*sizeof(real), MPI_BYTE,
282 dd->pme_nodeid, eCommType_ChargeB, cr->mpi_comm_mysim,
283 &dd->req_pme[dd->nreq_pme++]);
285 if (flags & PP_PME_SQRTC6)
287 MPI_Isend(c6A, n*sizeof(real), MPI_BYTE,
288 dd->pme_nodeid, eCommType_SQRTC6A, cr->mpi_comm_mysim,
289 &dd->req_pme[dd->nreq_pme++]);
291 if (flags & PP_PME_SQRTC6B)
293 MPI_Isend(c6B, n*sizeof(real), MPI_BYTE,
294 dd->pme_nodeid, eCommType_SQRTC6B, cr->mpi_comm_mysim,
295 &dd->req_pme[dd->nreq_pme++]);
297 if (flags & PP_PME_SIGMA)
299 MPI_Isend(sigmaA, n*sizeof(real), MPI_BYTE,
300 dd->pme_nodeid, eCommType_SigmaA, cr->mpi_comm_mysim,
301 &dd->req_pme[dd->nreq_pme++]);
303 if (flags & PP_PME_SIGMAB)
305 MPI_Isend(sigmaB, n*sizeof(real), MPI_BYTE,
306 dd->pme_nodeid, eCommType_SigmaB, cr->mpi_comm_mysim,
307 &dd->req_pme[dd->nreq_pme++]);
309 if (flags & PP_PME_COORD)
311 MPI_Isend(x[0], n*sizeof(rvec), MPI_BYTE,
312 dd->pme_nodeid, eCommType_COORD, cr->mpi_comm_mysim,
313 &dd->req_pme[dd->nreq_pme++]);
317 #ifndef GMX_PME_DELAYED_WAIT
318 /* Wait for the data to arrive */
319 /* We can skip this wait as we are sure x and q will not be modified
320 * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
322 gmx_pme_send_coeffs_coords_wait(dd);
323 #endif
324 #endif
327 void gmx_pme_send_parameters(t_commrec *cr,
328 const interaction_const_t *ic,
329 gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
330 real *chargeA, real *chargeB,
331 real *sqrt_c6A, real *sqrt_c6B,
332 real *sigmaA, real *sigmaB,
333 int maxshift_x, int maxshift_y)
335 unsigned int flags = 0;
337 if (EEL_PME(ic->eeltype))
339 flags |= PP_PME_CHARGE;
341 if (EVDW_PME(ic->vdwtype))
343 flags |= (PP_PME_SQRTC6 | PP_PME_SIGMA);
345 if (bFreeEnergy_q || bFreeEnergy_lj)
347 /* Assumes that the B state flags are in the bits just above
348 * the ones for the A state. */
349 flags |= (flags << 1);
352 gmx_pme_send_coeffs_coords(cr, flags,
353 chargeA, chargeB,
354 sqrt_c6A, sqrt_c6B, sigmaA, sigmaB,
355 nullptr, nullptr, 0, 0, maxshift_x, maxshift_y, -1);
358 void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
359 real lambda_q, real lambda_lj,
360 gmx_bool bEnerVir,
361 gmx_int64_t step)
363 unsigned int flags = PP_PME_COORD;
364 if (bEnerVir)
366 flags |= PP_PME_ENER_VIR;
368 gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
369 box, x, lambda_q, lambda_lj, 0, 0, step);
372 void gmx_pme_send_finish(t_commrec *cr)
374 unsigned int flags = PP_PME_FINISH;
376 gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, 0, -1);
379 void gmx_pme_send_switchgrid(t_commrec gmx_unused *cr,
380 ivec gmx_unused grid_size,
381 real gmx_unused ewaldcoeff_q,
382 real gmx_unused ewaldcoeff_lj)
384 #if GMX_MPI
385 gmx_pme_comm_n_box_t cnb;
387 /* Only let one PP node signal each PME node */
388 if (cr->dd->pme_receive_vir_ener)
390 cnb.flags = PP_PME_SWITCHGRID;
391 copy_ivec(grid_size, cnb.grid_size);
392 cnb.ewaldcoeff_q = ewaldcoeff_q;
393 cnb.ewaldcoeff_lj = ewaldcoeff_lj;
395 /* We send this, uncommon, message blocking to simplify the code */
396 MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
397 cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
399 #endif
402 void gmx_pme_send_resetcounters(t_commrec gmx_unused *cr, gmx_int64_t gmx_unused step)
404 #if GMX_MPI
405 gmx_pme_comm_n_box_t cnb;
407 /* Only let one PP node signal each PME node */
408 if (cr->dd->pme_receive_vir_ener)
410 cnb.flags = PP_PME_RESETCOUNTERS;
411 cnb.step = step;
413 /* We send this, uncommon, message blocking to simplify the code */
414 MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
415 cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
417 #endif
420 int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
421 int *natoms,
422 real **chargeA,
423 real **chargeB,
424 real **sqrt_c6A,
425 real **sqrt_c6B,
426 real **sigmaA,
427 real **sigmaB,
428 matrix box,
429 rvec **x,
430 rvec **f,
431 int *maxshift_x,
432 int *maxshift_y,
433 real *lambda_q,
434 real *lambda_lj,
435 gmx_bool *bEnerVir,
436 gmx_int64_t *step,
437 ivec grid_size,
438 real *ewaldcoeff_q,
439 real *ewaldcoeff_lj,
440 bool *atomSetChanged)
442 int status = -1;
443 int nat = 0;
445 #if GMX_MPI
446 unsigned int flags = 0;
447 int messages = 0;
451 gmx_pme_comm_n_box_t cnb;
452 cnb.flags = 0;
454 /* Receive the send count, box and time step from the peer PP node */
455 MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
456 pme_pp->node_peer, eCommType_CNB,
457 pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE);
459 /* We accumulate all received flags */
460 flags |= cnb.flags;
462 *step = cnb.step;
464 if (debug)
466 fprintf(debug, "PME only rank receiving:%s%s%s%s%s\n",
467 (cnb.flags & PP_PME_CHARGE) ? " charges" : "",
468 (cnb.flags & PP_PME_COORD ) ? " coordinates" : "",
469 (cnb.flags & PP_PME_FINISH) ? " finish" : "",
470 (cnb.flags & PP_PME_SWITCHGRID) ? " switch grid" : "",
471 (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
474 if (cnb.flags & PP_PME_FINISH)
476 status = pmerecvqxFINISH;
479 if (cnb.flags & PP_PME_SWITCHGRID)
481 /* Special case, receive the new parameters and return */
482 copy_ivec(cnb.grid_size, grid_size);
483 *ewaldcoeff_q = cnb.ewaldcoeff_q;
484 *ewaldcoeff_lj = cnb.ewaldcoeff_lj;
486 status = pmerecvqxSWITCHGRID;
489 if (cnb.flags & PP_PME_RESETCOUNTERS)
491 /* Special case, receive the step (set above) and return */
492 status = pmerecvqxRESETCOUNTERS;
495 if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
497 *atomSetChanged = true;
499 /* Receive the send counts from the other PP nodes */
500 for (int sender = 0; sender < pme_pp->nnode; sender++)
502 if (pme_pp->node[sender] == pme_pp->node_peer)
504 pme_pp->nat[sender] = cnb.natoms;
506 else
508 MPI_Irecv(&(pme_pp->nat[sender]), sizeof(pme_pp->nat[0]),
509 MPI_BYTE,
510 pme_pp->node[sender], eCommType_CNB,
511 pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
514 MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
515 messages = 0;
517 nat = 0;
518 for (int sender = 0; sender < pme_pp->nnode; sender++)
520 nat += pme_pp->nat[sender];
523 if (nat > pme_pp->nalloc)
525 pme_pp->nalloc = over_alloc_dd(nat);
526 if (cnb.flags & PP_PME_CHARGE)
528 srenew(pme_pp->chargeA, pme_pp->nalloc);
530 if (cnb.flags & PP_PME_CHARGEB)
532 srenew(pme_pp->chargeB, pme_pp->nalloc);
534 if (cnb.flags & PP_PME_SQRTC6)
536 srenew(pme_pp->sqrt_c6A, pme_pp->nalloc);
538 if (cnb.flags & PP_PME_SQRTC6B)
540 srenew(pme_pp->sqrt_c6B, pme_pp->nalloc);
542 if (cnb.flags & PP_PME_SIGMA)
544 srenew(pme_pp->sigmaA, pme_pp->nalloc);
546 if (cnb.flags & PP_PME_SIGMAB)
548 srenew(pme_pp->sigmaB, pme_pp->nalloc);
550 srenew(pme_pp->x, pme_pp->nalloc);
551 srenew(pme_pp->f, pme_pp->nalloc);
554 /* maxshift is sent when the charges are sent */
555 *maxshift_x = cnb.maxshift_x;
556 *maxshift_y = cnb.maxshift_y;
558 /* Receive the charges in place */
559 for (int q = 0; q < eCommType_NR; q++)
561 real *charge_pp;
563 if (!(cnb.flags & (PP_PME_CHARGE<<q)))
565 continue;
567 switch (q)
569 case eCommType_ChargeA: charge_pp = pme_pp->chargeA; break;
570 case eCommType_ChargeB: charge_pp = pme_pp->chargeB; break;
571 case eCommType_SQRTC6A: charge_pp = pme_pp->sqrt_c6A; break;
572 case eCommType_SQRTC6B: charge_pp = pme_pp->sqrt_c6B; break;
573 case eCommType_SigmaA: charge_pp = pme_pp->sigmaA; break;
574 case eCommType_SigmaB: charge_pp = pme_pp->sigmaB; break;
575 default: gmx_incons("Wrong eCommType");
577 nat = 0;
578 for (int sender = 0; sender < pme_pp->nnode; sender++)
580 if (pme_pp->nat[sender] > 0)
582 MPI_Irecv(charge_pp+nat,
583 pme_pp->nat[sender]*sizeof(real),
584 MPI_BYTE,
585 pme_pp->node[sender], q,
586 pme_pp->mpi_comm_mysim,
587 &pme_pp->req[messages++]);
588 nat += pme_pp->nat[sender];
589 if (debug)
591 fprintf(debug, "Received from PP rank %d: %d %s\n",
592 pme_pp->node[sender], pme_pp->nat[sender],
593 (q == eCommType_ChargeA ||
594 q == eCommType_ChargeB) ? "charges" : "params");
601 if (cnb.flags & PP_PME_COORD)
603 /* The box, FE flag and lambda are sent along with the coordinates
604 * */
605 copy_mat(cnb.box, box);
606 *lambda_q = cnb.lambda_q;
607 *lambda_lj = cnb.lambda_lj;
608 *bEnerVir = (cnb.flags & PP_PME_ENER_VIR);
609 *step = cnb.step;
611 /* Receive the coordinates in place */
612 nat = 0;
613 for (int sender = 0; sender < pme_pp->nnode; sender++)
615 if (pme_pp->nat[sender] > 0)
617 MPI_Irecv(pme_pp->x[nat], pme_pp->nat[sender]*sizeof(rvec),
618 MPI_BYTE,
619 pme_pp->node[sender], eCommType_COORD,
620 pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
621 nat += pme_pp->nat[sender];
622 if (debug)
624 fprintf(debug, "Received from PP rank %d: %d "
625 "coordinates\n",
626 pme_pp->node[sender], pme_pp->nat[sender]);
631 status = pmerecvqxX;
634 /* Wait for the coordinates and/or charges to arrive */
635 MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
636 messages = 0;
638 while (status == -1);
639 #else
640 GMX_UNUSED_VALUE(box);
641 GMX_UNUSED_VALUE(maxshift_x);
642 GMX_UNUSED_VALUE(maxshift_y);
643 GMX_UNUSED_VALUE(lambda_q);
644 GMX_UNUSED_VALUE(lambda_lj);
645 GMX_UNUSED_VALUE(bEnerVir);
646 GMX_UNUSED_VALUE(step);
647 GMX_UNUSED_VALUE(grid_size);
648 GMX_UNUSED_VALUE(ewaldcoeff_q);
649 GMX_UNUSED_VALUE(ewaldcoeff_lj);
650 GMX_UNUSED_VALUE(atomSetChanged);
652 status = pmerecvqxX;
653 #endif
655 if (status == pmerecvqxX)
657 *natoms = nat;
658 *chargeA = pme_pp->chargeA;
659 *chargeB = pme_pp->chargeB;
660 *sqrt_c6A = pme_pp->sqrt_c6A;
661 *sqrt_c6B = pme_pp->sqrt_c6B;
662 *sigmaA = pme_pp->sigmaA;
663 *sigmaB = pme_pp->sigmaB;
664 *x = pme_pp->x;
665 *f = pme_pp->f;
668 return status;
670 /*! \brief Receive virial and energy from PME rank */
671 static void receive_virial_energy(t_commrec *cr,
672 matrix vir_q, real *energy_q,
673 matrix vir_lj, real *energy_lj,
674 real *dvdlambda_q, real *dvdlambda_lj,
675 float *pme_cycles)
677 gmx_pme_comm_vir_ene_t cve;
679 if (cr->dd->pme_receive_vir_ener)
681 if (debug)
683 fprintf(debug,
684 "PP rank %d receiving from PME rank %d: virial and energy\n",
685 cr->sim_nodeid, cr->dd->pme_nodeid);
687 #if GMX_MPI
688 MPI_Recv(&cve, sizeof(cve), MPI_BYTE, cr->dd->pme_nodeid, 1, cr->mpi_comm_mysim,
689 MPI_STATUS_IGNORE);
690 #else
691 memset(&cve, 0, sizeof(cve));
692 #endif
694 m_add(vir_q, cve.vir_q, vir_q);
695 m_add(vir_lj, cve.vir_lj, vir_lj);
696 *energy_q = cve.energy_q;
697 *energy_lj = cve.energy_lj;
698 *dvdlambda_q += cve.dvdlambda_q;
699 *dvdlambda_lj += cve.dvdlambda_lj;
700 *pme_cycles = cve.cycles;
702 if (cve.stop_cond != gmx_stop_cond_none)
704 gmx_set_stop_condition(cve.stop_cond);
707 else
709 *energy_q = 0;
710 *energy_lj = 0;
711 *pme_cycles = 0;
715 void gmx_pme_receive_f(t_commrec *cr,
716 rvec f[], matrix vir_q, real *energy_q,
717 matrix vir_lj, real *energy_lj,
718 real *dvdlambda_q, real *dvdlambda_lj,
719 float *pme_cycles)
721 #ifdef GMX_PME_DELAYED_WAIT
722 /* Wait for the x request to finish */
723 gmx_pme_send_coeffs_coords_wait(cr->dd);
724 #endif
726 int natoms = cr->dd->nat_home;
728 if (natoms > cr->dd->pme_recv_f_alloc)
730 cr->dd->pme_recv_f_alloc = over_alloc_dd(natoms);
731 srenew(cr->dd->pme_recv_f_buf, cr->dd->pme_recv_f_alloc);
734 #if GMX_MPI
735 MPI_Recv(cr->dd->pme_recv_f_buf[0],
736 natoms*sizeof(rvec), MPI_BYTE,
737 cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim,
738 MPI_STATUS_IGNORE);
739 #endif
741 int nt = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, natoms);
743 /* Note that we would like to avoid this conditional by putting it
744 * into the omp pragma instead, but then we still take the full
745 * omp parallel for overhead (at least with gcc5).
747 if (nt == 1)
749 for (int i = 0; i < natoms; i++)
751 rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
754 else
756 #pragma omp parallel for num_threads(nt) schedule(static)
757 for (int i = 0; i < natoms; i++)
759 rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
763 receive_virial_energy(cr, vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, pme_cycles);
766 void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
767 rvec gmx_unused *f,
768 matrix vir_q, real energy_q,
769 matrix vir_lj, real energy_lj,
770 real dvdlambda_q, real dvdlambda_lj,
771 float cycles)
773 #if GMX_MPI
774 gmx_pme_comm_vir_ene_t cve;
775 int messages, ind_start, ind_end;
776 cve.cycles = cycles;
778 /* Now the evaluated forces have to be transferred to the PP nodes */
779 messages = 0;
780 ind_end = 0;
781 for (int receiver = 0; receiver < pme_pp->nnode; receiver++)
783 ind_start = ind_end;
784 ind_end = ind_start + pme_pp->nat[receiver];
785 if (MPI_Isend(f[ind_start], (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
786 pme_pp->node[receiver], 0,
787 pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
789 gmx_comm("MPI_Isend failed in do_pmeonly");
793 /* send virial and energy to our last PP node */
794 copy_mat(vir_q, cve.vir_q);
795 copy_mat(vir_lj, cve.vir_lj);
796 cve.energy_q = energy_q;
797 cve.energy_lj = energy_lj;
798 cve.dvdlambda_q = dvdlambda_q;
799 cve.dvdlambda_lj = dvdlambda_lj;
800 /* check for the signals to send back to a PP node */
801 cve.stop_cond = gmx_get_stop_condition();
803 cve.cycles = cycles;
805 if (debug)
807 fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
808 pme_pp->node_peer);
810 MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
811 pme_pp->node_peer, 1,
812 pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
814 /* Wait for the forces to arrive */
815 MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
816 #else
817 gmx_call("MPI not enabled");
818 GMX_UNUSED_VALUE(pme_pp);
819 GMX_UNUSED_VALUE(f);
820 GMX_UNUSED_VALUE(vir_q);
821 GMX_UNUSED_VALUE(energy_q);
822 GMX_UNUSED_VALUE(vir_lj);
823 GMX_UNUSED_VALUE(energy_lj);
824 GMX_UNUSED_VALUE(dvdlambda_q);
825 GMX_UNUSED_VALUE(dvdlambda_lj);
826 GMX_UNUSED_VALUE(cycles);
827 #endif