2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief This file contains function definitions necessary for
40 * managing the offload of long-ranged PME work to separate MPI rank,
41 * for computing energies and forces (Coulomb and LJ).
43 * \author Berk Hess <hess@kth.se>
44 * \ingroup module_ewald
55 #include "gromacs/domdec/domdec.h"
56 #include "gromacs/domdec/domdec_struct.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/math/vec.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/sighandler.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/utility/fatalerror.h"
65 #include "gromacs/utility/gmxmpi.h"
66 #include "gromacs/utility/smalloc.h"
68 #include "pme-internal.h"
70 /*! \brief MPI Tags used to separate communication of different types of quantities */
72 eCommType_ChargeA
, eCommType_ChargeB
, eCommType_SQRTC6A
, eCommType_SQRTC6B
,
73 eCommType_SigmaA
, eCommType_SigmaB
, eCommType_NR
, eCommType_COORD
,
78 /*! \brief Flags used to coordinate PP-PME communication and computation phases
80 * Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
81 * that the six first flags are exactly in this order.
84 #define PP_PME_CHARGE (1<<0)
85 #define PP_PME_CHARGEB (1<<1)
86 #define PP_PME_SQRTC6 (1<<2)
87 #define PP_PME_SQRTC6B (1<<3)
88 #define PP_PME_SIGMA (1<<4)
89 #define PP_PME_SIGMAB (1<<5)
90 #define PP_PME_COORD (1<<6)
91 #define PP_PME_ENER_VIR (1<<9)
92 #define PP_PME_FINISH (1<<10)
93 #define PP_PME_SWITCHGRID (1<<11)
94 #define PP_PME_RESETCOUNTERS (1<<12)
96 #define PME_PP_SIGSTOP (1<<0)
97 #define PME_PP_SIGSTOPNSS (1<<1)
100 /*! \brief Master PP-PME communication data structure */
103 MPI_Comm mpi_comm_mysim
; /**< MPI communicator for this simulation */
105 int nnode
; /**< The number of PP node to communicate with */
106 int *node
; /**< The PP node ranks */
107 int node_peer
; /**< The peer PP node rank */
108 int *nat
; /**< The number of atom for each PP node */
110 /**< Vectors of A- and B-state parameters used to transfer vectors to PME ranks */
118 rvec
*x
; /**< Vector of atom coordinates to transfer to PME ranks */
119 rvec
*f
; /**< Vector of atom forces received from PME ranks */
120 int nalloc
; /**< Allocation size of transfer vectors (>= \p nat) */
123 /**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
130 /*! \brief Helper struct for PP-PME communication of parameters */
131 struct gmx_pme_comm_n_box_t
{
132 int natoms
; /**< Number of atoms */
133 matrix box
; /**< Box */
134 int maxshift_x
; /**< Maximum shift in x direction */
135 int maxshift_y
; /**< Maximum shift in y direction */
136 real lambda_q
; /**< Free-energy lambda for electrostatics */
137 real lambda_lj
; /**< Free-energy lambda for Lennard-Jones */
138 unsigned int flags
; /**< Control flags */
139 gmx_int64_t step
; /**< MD integration step number */
141 /*! \brief Used in PME grid tuning */
148 /*! \brief Helper struct for PP-PME communication of virial and energy */
151 /*! \brief Virial, energy, and derivative of potential w.r.t. lambda for charge and Lennard-Jones */
159 float cycles
; /**< Counter of CPU cycles used */
160 gmx_stop_cond_t stop_cond
; /**< Flag used in responding to an external signal to terminate */
161 } gmx_pme_comm_vir_ene_t
;
163 gmx_pme_pp_t
gmx_pme_pp_init(t_commrec
*cr
)
165 struct gmx_pme_pp
*pme_pp
;
172 pme_pp
->mpi_comm_mysim
= cr
->mpi_comm_mysim
;
173 MPI_Comm_rank(cr
->mpi_comm_mygroup
, &rank
);
174 get_pme_ddnodes(cr
, rank
, &pme_pp
->nnode
, &pme_pp
->node
, &pme_pp
->node_peer
);
175 snew(pme_pp
->nat
, pme_pp
->nnode
);
176 snew(pme_pp
->req
, eCommType_NR
*pme_pp
->nnode
);
177 snew(pme_pp
->stat
, eCommType_NR
*pme_pp
->nnode
);
180 GMX_UNUSED_VALUE(cr
);
186 /*! \brief Block to wait for communication to PME ranks to complete
188 * This should be faster with a real non-blocking MPI implementation */
189 /* #define GMX_PME_DELAYED_WAIT */
191 static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused
*dd
)
196 MPI_Waitall(dd
->nreq_pme
, dd
->req_pme
, MPI_STATUSES_IGNORE
);
202 /*! \brief Send data to PME ranks */
203 static void gmx_pme_send_coeffs_coords(t_commrec
*cr
, unsigned int flags
,
204 real gmx_unused
*chargeA
, real gmx_unused
*chargeB
,
205 real gmx_unused
*c6A
, real gmx_unused
*c6B
,
206 real gmx_unused
*sigmaA
, real gmx_unused
*sigmaB
,
207 matrix box
, rvec gmx_unused
*x
,
208 real lambda_q
, real lambda_lj
,
209 int maxshift_x
, int maxshift_y
,
213 gmx_pme_comm_n_box_t
*cnb
;
221 fprintf(debug
, "PP rank %d sending to PME rank %d: %d%s%s%s%s\n",
222 cr
->sim_nodeid
, dd
->pme_nodeid
, n
,
223 (flags
& PP_PME_CHARGE
) ? " charges" : "",
224 (flags
& PP_PME_SQRTC6
) ? " sqrtC6" : "",
225 (flags
& PP_PME_SIGMA
) ? " sigma" : "",
226 (flags
& PP_PME_COORD
) ? " coordinates" : "");
229 #ifdef GMX_PME_DELAYED_WAIT
230 /* When can not use cnb until pending communication has finished */
231 gmx_pme_send_coeffs_coords_wait(dd
);
234 if (dd
->pme_receive_vir_ener
)
236 /* Peer PP node: communicate all data */
237 if (dd
->cnb
== nullptr)
245 cnb
->maxshift_x
= maxshift_x
;
246 cnb
->maxshift_y
= maxshift_y
;
247 cnb
->lambda_q
= lambda_q
;
248 cnb
->lambda_lj
= lambda_lj
;
250 if (flags
& PP_PME_COORD
)
252 copy_mat(box
, cnb
->box
);
255 MPI_Isend(cnb
, sizeof(*cnb
), MPI_BYTE
,
256 dd
->pme_nodeid
, eCommType_CNB
, cr
->mpi_comm_mysim
,
257 &dd
->req_pme
[dd
->nreq_pme
++]);
260 else if (flags
& (PP_PME_CHARGE
| PP_PME_SQRTC6
| PP_PME_SIGMA
))
263 /* Communicate only the number of atoms */
264 MPI_Isend(&n
, sizeof(n
), MPI_BYTE
,
265 dd
->pme_nodeid
, eCommType_CNB
, cr
->mpi_comm_mysim
,
266 &dd
->req_pme
[dd
->nreq_pme
++]);
273 if (flags
& PP_PME_CHARGE
)
275 MPI_Isend(chargeA
, n
*sizeof(real
), MPI_BYTE
,
276 dd
->pme_nodeid
, eCommType_ChargeA
, cr
->mpi_comm_mysim
,
277 &dd
->req_pme
[dd
->nreq_pme
++]);
279 if (flags
& PP_PME_CHARGEB
)
281 MPI_Isend(chargeB
, n
*sizeof(real
), MPI_BYTE
,
282 dd
->pme_nodeid
, eCommType_ChargeB
, cr
->mpi_comm_mysim
,
283 &dd
->req_pme
[dd
->nreq_pme
++]);
285 if (flags
& PP_PME_SQRTC6
)
287 MPI_Isend(c6A
, n
*sizeof(real
), MPI_BYTE
,
288 dd
->pme_nodeid
, eCommType_SQRTC6A
, cr
->mpi_comm_mysim
,
289 &dd
->req_pme
[dd
->nreq_pme
++]);
291 if (flags
& PP_PME_SQRTC6B
)
293 MPI_Isend(c6B
, n
*sizeof(real
), MPI_BYTE
,
294 dd
->pme_nodeid
, eCommType_SQRTC6B
, cr
->mpi_comm_mysim
,
295 &dd
->req_pme
[dd
->nreq_pme
++]);
297 if (flags
& PP_PME_SIGMA
)
299 MPI_Isend(sigmaA
, n
*sizeof(real
), MPI_BYTE
,
300 dd
->pme_nodeid
, eCommType_SigmaA
, cr
->mpi_comm_mysim
,
301 &dd
->req_pme
[dd
->nreq_pme
++]);
303 if (flags
& PP_PME_SIGMAB
)
305 MPI_Isend(sigmaB
, n
*sizeof(real
), MPI_BYTE
,
306 dd
->pme_nodeid
, eCommType_SigmaB
, cr
->mpi_comm_mysim
,
307 &dd
->req_pme
[dd
->nreq_pme
++]);
309 if (flags
& PP_PME_COORD
)
311 MPI_Isend(x
[0], n
*sizeof(rvec
), MPI_BYTE
,
312 dd
->pme_nodeid
, eCommType_COORD
, cr
->mpi_comm_mysim
,
313 &dd
->req_pme
[dd
->nreq_pme
++]);
317 #ifndef GMX_PME_DELAYED_WAIT
318 /* Wait for the data to arrive */
319 /* We can skip this wait as we are sure x and q will not be modified
320 * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
322 gmx_pme_send_coeffs_coords_wait(dd
);
327 void gmx_pme_send_parameters(t_commrec
*cr
,
328 const interaction_const_t
*ic
,
329 gmx_bool bFreeEnergy_q
, gmx_bool bFreeEnergy_lj
,
330 real
*chargeA
, real
*chargeB
,
331 real
*sqrt_c6A
, real
*sqrt_c6B
,
332 real
*sigmaA
, real
*sigmaB
,
333 int maxshift_x
, int maxshift_y
)
335 unsigned int flags
= 0;
337 if (EEL_PME(ic
->eeltype
))
339 flags
|= PP_PME_CHARGE
;
341 if (EVDW_PME(ic
->vdwtype
))
343 flags
|= (PP_PME_SQRTC6
| PP_PME_SIGMA
);
345 if (bFreeEnergy_q
|| bFreeEnergy_lj
)
347 /* Assumes that the B state flags are in the bits just above
348 * the ones for the A state. */
349 flags
|= (flags
<< 1);
352 gmx_pme_send_coeffs_coords(cr
, flags
,
354 sqrt_c6A
, sqrt_c6B
, sigmaA
, sigmaB
,
355 nullptr, nullptr, 0, 0, maxshift_x
, maxshift_y
, -1);
358 void gmx_pme_send_coordinates(t_commrec
*cr
, matrix box
, rvec
*x
,
359 real lambda_q
, real lambda_lj
,
363 unsigned int flags
= PP_PME_COORD
;
366 flags
|= PP_PME_ENER_VIR
;
368 gmx_pme_send_coeffs_coords(cr
, flags
, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
369 box
, x
, lambda_q
, lambda_lj
, 0, 0, step
);
372 void gmx_pme_send_finish(t_commrec
*cr
)
374 unsigned int flags
= PP_PME_FINISH
;
376 gmx_pme_send_coeffs_coords(cr
, flags
, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, 0, -1);
379 void gmx_pme_send_switchgrid(t_commrec gmx_unused
*cr
,
380 ivec gmx_unused grid_size
,
381 real gmx_unused ewaldcoeff_q
,
382 real gmx_unused ewaldcoeff_lj
)
385 gmx_pme_comm_n_box_t cnb
;
387 /* Only let one PP node signal each PME node */
388 if (cr
->dd
->pme_receive_vir_ener
)
390 cnb
.flags
= PP_PME_SWITCHGRID
;
391 copy_ivec(grid_size
, cnb
.grid_size
);
392 cnb
.ewaldcoeff_q
= ewaldcoeff_q
;
393 cnb
.ewaldcoeff_lj
= ewaldcoeff_lj
;
395 /* We send this, uncommon, message blocking to simplify the code */
396 MPI_Send(&cnb
, sizeof(cnb
), MPI_BYTE
,
397 cr
->dd
->pme_nodeid
, eCommType_CNB
, cr
->mpi_comm_mysim
);
402 void gmx_pme_send_resetcounters(t_commrec gmx_unused
*cr
, gmx_int64_t gmx_unused step
)
405 gmx_pme_comm_n_box_t cnb
;
407 /* Only let one PP node signal each PME node */
408 if (cr
->dd
->pme_receive_vir_ener
)
410 cnb
.flags
= PP_PME_RESETCOUNTERS
;
413 /* We send this, uncommon, message blocking to simplify the code */
414 MPI_Send(&cnb
, sizeof(cnb
), MPI_BYTE
,
415 cr
->dd
->pme_nodeid
, eCommType_CNB
, cr
->mpi_comm_mysim
);
420 int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp
*pme_pp
,
440 bool *atomSetChanged
)
446 unsigned int flags
= 0;
451 gmx_pme_comm_n_box_t cnb
;
454 /* Receive the send count, box and time step from the peer PP node */
455 MPI_Recv(&cnb
, sizeof(cnb
), MPI_BYTE
,
456 pme_pp
->node_peer
, eCommType_CNB
,
457 pme_pp
->mpi_comm_mysim
, MPI_STATUS_IGNORE
);
459 /* We accumulate all received flags */
466 fprintf(debug
, "PME only rank receiving:%s%s%s%s%s\n",
467 (cnb
.flags
& PP_PME_CHARGE
) ? " charges" : "",
468 (cnb
.flags
& PP_PME_COORD
) ? " coordinates" : "",
469 (cnb
.flags
& PP_PME_FINISH
) ? " finish" : "",
470 (cnb
.flags
& PP_PME_SWITCHGRID
) ? " switch grid" : "",
471 (cnb
.flags
& PP_PME_RESETCOUNTERS
) ? " reset counters" : "");
474 if (cnb
.flags
& PP_PME_FINISH
)
476 status
= pmerecvqxFINISH
;
479 if (cnb
.flags
& PP_PME_SWITCHGRID
)
481 /* Special case, receive the new parameters and return */
482 copy_ivec(cnb
.grid_size
, grid_size
);
483 *ewaldcoeff_q
= cnb
.ewaldcoeff_q
;
484 *ewaldcoeff_lj
= cnb
.ewaldcoeff_lj
;
486 status
= pmerecvqxSWITCHGRID
;
489 if (cnb
.flags
& PP_PME_RESETCOUNTERS
)
491 /* Special case, receive the step (set above) and return */
492 status
= pmerecvqxRESETCOUNTERS
;
495 if (cnb
.flags
& (PP_PME_CHARGE
| PP_PME_SQRTC6
| PP_PME_SIGMA
))
497 *atomSetChanged
= true;
499 /* Receive the send counts from the other PP nodes */
500 for (int sender
= 0; sender
< pme_pp
->nnode
; sender
++)
502 if (pme_pp
->node
[sender
] == pme_pp
->node_peer
)
504 pme_pp
->nat
[sender
] = cnb
.natoms
;
508 MPI_Irecv(&(pme_pp
->nat
[sender
]), sizeof(pme_pp
->nat
[0]),
510 pme_pp
->node
[sender
], eCommType_CNB
,
511 pme_pp
->mpi_comm_mysim
, &pme_pp
->req
[messages
++]);
514 MPI_Waitall(messages
, pme_pp
->req
, pme_pp
->stat
);
518 for (int sender
= 0; sender
< pme_pp
->nnode
; sender
++)
520 nat
+= pme_pp
->nat
[sender
];
523 if (nat
> pme_pp
->nalloc
)
525 pme_pp
->nalloc
= over_alloc_dd(nat
);
526 if (cnb
.flags
& PP_PME_CHARGE
)
528 srenew(pme_pp
->chargeA
, pme_pp
->nalloc
);
530 if (cnb
.flags
& PP_PME_CHARGEB
)
532 srenew(pme_pp
->chargeB
, pme_pp
->nalloc
);
534 if (cnb
.flags
& PP_PME_SQRTC6
)
536 srenew(pme_pp
->sqrt_c6A
, pme_pp
->nalloc
);
538 if (cnb
.flags
& PP_PME_SQRTC6B
)
540 srenew(pme_pp
->sqrt_c6B
, pme_pp
->nalloc
);
542 if (cnb
.flags
& PP_PME_SIGMA
)
544 srenew(pme_pp
->sigmaA
, pme_pp
->nalloc
);
546 if (cnb
.flags
& PP_PME_SIGMAB
)
548 srenew(pme_pp
->sigmaB
, pme_pp
->nalloc
);
550 srenew(pme_pp
->x
, pme_pp
->nalloc
);
551 srenew(pme_pp
->f
, pme_pp
->nalloc
);
554 /* maxshift is sent when the charges are sent */
555 *maxshift_x
= cnb
.maxshift_x
;
556 *maxshift_y
= cnb
.maxshift_y
;
558 /* Receive the charges in place */
559 for (int q
= 0; q
< eCommType_NR
; q
++)
563 if (!(cnb
.flags
& (PP_PME_CHARGE
<<q
)))
569 case eCommType_ChargeA
: charge_pp
= pme_pp
->chargeA
; break;
570 case eCommType_ChargeB
: charge_pp
= pme_pp
->chargeB
; break;
571 case eCommType_SQRTC6A
: charge_pp
= pme_pp
->sqrt_c6A
; break;
572 case eCommType_SQRTC6B
: charge_pp
= pme_pp
->sqrt_c6B
; break;
573 case eCommType_SigmaA
: charge_pp
= pme_pp
->sigmaA
; break;
574 case eCommType_SigmaB
: charge_pp
= pme_pp
->sigmaB
; break;
575 default: gmx_incons("Wrong eCommType");
578 for (int sender
= 0; sender
< pme_pp
->nnode
; sender
++)
580 if (pme_pp
->nat
[sender
] > 0)
582 MPI_Irecv(charge_pp
+nat
,
583 pme_pp
->nat
[sender
]*sizeof(real
),
585 pme_pp
->node
[sender
], q
,
586 pme_pp
->mpi_comm_mysim
,
587 &pme_pp
->req
[messages
++]);
588 nat
+= pme_pp
->nat
[sender
];
591 fprintf(debug
, "Received from PP rank %d: %d %s\n",
592 pme_pp
->node
[sender
], pme_pp
->nat
[sender
],
593 (q
== eCommType_ChargeA
||
594 q
== eCommType_ChargeB
) ? "charges" : "params");
601 if (cnb
.flags
& PP_PME_COORD
)
603 /* The box, FE flag and lambda are sent along with the coordinates
605 copy_mat(cnb
.box
, box
);
606 *lambda_q
= cnb
.lambda_q
;
607 *lambda_lj
= cnb
.lambda_lj
;
608 *bEnerVir
= (cnb
.flags
& PP_PME_ENER_VIR
);
611 /* Receive the coordinates in place */
613 for (int sender
= 0; sender
< pme_pp
->nnode
; sender
++)
615 if (pme_pp
->nat
[sender
] > 0)
617 MPI_Irecv(pme_pp
->x
[nat
], pme_pp
->nat
[sender
]*sizeof(rvec
),
619 pme_pp
->node
[sender
], eCommType_COORD
,
620 pme_pp
->mpi_comm_mysim
, &pme_pp
->req
[messages
++]);
621 nat
+= pme_pp
->nat
[sender
];
624 fprintf(debug
, "Received from PP rank %d: %d "
626 pme_pp
->node
[sender
], pme_pp
->nat
[sender
]);
634 /* Wait for the coordinates and/or charges to arrive */
635 MPI_Waitall(messages
, pme_pp
->req
, pme_pp
->stat
);
638 while (status
== -1);
640 GMX_UNUSED_VALUE(box
);
641 GMX_UNUSED_VALUE(maxshift_x
);
642 GMX_UNUSED_VALUE(maxshift_y
);
643 GMX_UNUSED_VALUE(lambda_q
);
644 GMX_UNUSED_VALUE(lambda_lj
);
645 GMX_UNUSED_VALUE(bEnerVir
);
646 GMX_UNUSED_VALUE(step
);
647 GMX_UNUSED_VALUE(grid_size
);
648 GMX_UNUSED_VALUE(ewaldcoeff_q
);
649 GMX_UNUSED_VALUE(ewaldcoeff_lj
);
650 GMX_UNUSED_VALUE(atomSetChanged
);
655 if (status
== pmerecvqxX
)
658 *chargeA
= pme_pp
->chargeA
;
659 *chargeB
= pme_pp
->chargeB
;
660 *sqrt_c6A
= pme_pp
->sqrt_c6A
;
661 *sqrt_c6B
= pme_pp
->sqrt_c6B
;
662 *sigmaA
= pme_pp
->sigmaA
;
663 *sigmaB
= pme_pp
->sigmaB
;
670 /*! \brief Receive virial and energy from PME rank */
671 static void receive_virial_energy(t_commrec
*cr
,
672 matrix vir_q
, real
*energy_q
,
673 matrix vir_lj
, real
*energy_lj
,
674 real
*dvdlambda_q
, real
*dvdlambda_lj
,
677 gmx_pme_comm_vir_ene_t cve
;
679 if (cr
->dd
->pme_receive_vir_ener
)
684 "PP rank %d receiving from PME rank %d: virial and energy\n",
685 cr
->sim_nodeid
, cr
->dd
->pme_nodeid
);
688 MPI_Recv(&cve
, sizeof(cve
), MPI_BYTE
, cr
->dd
->pme_nodeid
, 1, cr
->mpi_comm_mysim
,
691 memset(&cve
, 0, sizeof(cve
));
694 m_add(vir_q
, cve
.vir_q
, vir_q
);
695 m_add(vir_lj
, cve
.vir_lj
, vir_lj
);
696 *energy_q
= cve
.energy_q
;
697 *energy_lj
= cve
.energy_lj
;
698 *dvdlambda_q
+= cve
.dvdlambda_q
;
699 *dvdlambda_lj
+= cve
.dvdlambda_lj
;
700 *pme_cycles
= cve
.cycles
;
702 if (cve
.stop_cond
!= gmx_stop_cond_none
)
704 gmx_set_stop_condition(cve
.stop_cond
);
715 void gmx_pme_receive_f(t_commrec
*cr
,
716 rvec f
[], matrix vir_q
, real
*energy_q
,
717 matrix vir_lj
, real
*energy_lj
,
718 real
*dvdlambda_q
, real
*dvdlambda_lj
,
721 #ifdef GMX_PME_DELAYED_WAIT
722 /* Wait for the x request to finish */
723 gmx_pme_send_coeffs_coords_wait(cr
->dd
);
726 int natoms
= cr
->dd
->nat_home
;
728 if (natoms
> cr
->dd
->pme_recv_f_alloc
)
730 cr
->dd
->pme_recv_f_alloc
= over_alloc_dd(natoms
);
731 srenew(cr
->dd
->pme_recv_f_buf
, cr
->dd
->pme_recv_f_alloc
);
735 MPI_Recv(cr
->dd
->pme_recv_f_buf
[0],
736 natoms
*sizeof(rvec
), MPI_BYTE
,
737 cr
->dd
->pme_nodeid
, 0, cr
->mpi_comm_mysim
,
741 int nt
= gmx_omp_nthreads_get_simple_rvec_task(emntDefault
, natoms
);
743 /* Note that we would like to avoid this conditional by putting it
744 * into the omp pragma instead, but then we still take the full
745 * omp parallel for overhead (at least with gcc5).
749 for (int i
= 0; i
< natoms
; i
++)
751 rvec_inc(f
[i
], cr
->dd
->pme_recv_f_buf
[i
]);
756 #pragma omp parallel for num_threads(nt) schedule(static)
757 for (int i
= 0; i
< natoms
; i
++)
759 rvec_inc(f
[i
], cr
->dd
->pme_recv_f_buf
[i
]);
763 receive_virial_energy(cr
, vir_q
, energy_q
, vir_lj
, energy_lj
, dvdlambda_q
, dvdlambda_lj
, pme_cycles
);
766 void gmx_pme_send_force_vir_ener(struct gmx_pme_pp
*pme_pp
,
768 matrix vir_q
, real energy_q
,
769 matrix vir_lj
, real energy_lj
,
770 real dvdlambda_q
, real dvdlambda_lj
,
774 gmx_pme_comm_vir_ene_t cve
;
775 int messages
, ind_start
, ind_end
;
778 /* Now the evaluated forces have to be transferred to the PP nodes */
781 for (int receiver
= 0; receiver
< pme_pp
->nnode
; receiver
++)
784 ind_end
= ind_start
+ pme_pp
->nat
[receiver
];
785 if (MPI_Isend(f
[ind_start
], (ind_end
-ind_start
)*sizeof(rvec
), MPI_BYTE
,
786 pme_pp
->node
[receiver
], 0,
787 pme_pp
->mpi_comm_mysim
, &pme_pp
->req
[messages
++]) != 0)
789 gmx_comm("MPI_Isend failed in do_pmeonly");
793 /* send virial and energy to our last PP node */
794 copy_mat(vir_q
, cve
.vir_q
);
795 copy_mat(vir_lj
, cve
.vir_lj
);
796 cve
.energy_q
= energy_q
;
797 cve
.energy_lj
= energy_lj
;
798 cve
.dvdlambda_q
= dvdlambda_q
;
799 cve
.dvdlambda_lj
= dvdlambda_lj
;
800 /* check for the signals to send back to a PP node */
801 cve
.stop_cond
= gmx_get_stop_condition();
807 fprintf(debug
, "PME rank sending to PP rank %d: virial and energy\n",
810 MPI_Isend(&cve
, sizeof(cve
), MPI_BYTE
,
811 pme_pp
->node_peer
, 1,
812 pme_pp
->mpi_comm_mysim
, &pme_pp
->req
[messages
++]);
814 /* Wait for the forces to arrive */
815 MPI_Waitall(messages
, pme_pp
->req
, pme_pp
->stat
);
817 gmx_call("MPI not enabled");
818 GMX_UNUSED_VALUE(pme_pp
);
820 GMX_UNUSED_VALUE(vir_q
);
821 GMX_UNUSED_VALUE(energy_q
);
822 GMX_UNUSED_VALUE(vir_lj
);
823 GMX_UNUSED_VALUE(energy_lj
);
824 GMX_UNUSED_VALUE(dvdlambda_q
);
825 GMX_UNUSED_VALUE(dvdlambda_lj
);
826 GMX_UNUSED_VALUE(cycles
);