Generalize IForceProvider
[gromacs.git] / src / gromacs / mdlib / sim_util.cpp
blob44ff5d475a22fa6c70ab11f07635a00d20d00f71
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "sim_util.h"
41 #include "config.h"
43 #include <assert.h>
44 #include <math.h>
45 #include <stdio.h>
46 #include <string.h>
48 #include <cstdint>
50 #include <array>
52 #include "gromacs/domdec/domdec.h"
53 #include "gromacs/domdec/domdec_struct.h"
54 #include "gromacs/essentialdynamics/edsam.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/gmxlib/chargegroup.h"
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/gmxlib/nrnb.h"
59 #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
60 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
61 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
62 #include "gromacs/imd/imd.h"
63 #include "gromacs/listed-forces/bonded.h"
64 #include "gromacs/listed-forces/disre.h"
65 #include "gromacs/listed-forces/orires.h"
66 #include "gromacs/math/functions.h"
67 #include "gromacs/math/units.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vecdump.h"
70 #include "gromacs/mdlib/calcmu.h"
71 #include "gromacs/mdlib/constr.h"
72 #include "gromacs/mdlib/force.h"
73 #include "gromacs/mdlib/forcerec.h"
74 #include "gromacs/mdlib/genborn.h"
75 #include "gromacs/mdlib/gmx_omp_nthreads.h"
76 #include "gromacs/mdlib/mdrun.h"
77 #include "gromacs/mdlib/nb_verlet.h"
78 #include "gromacs/mdlib/nbnxn_atomdata.h"
79 #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
80 #include "gromacs/mdlib/nbnxn_grid.h"
81 #include "gromacs/mdlib/nbnxn_search.h"
82 #include "gromacs/mdlib/qmmm.h"
83 #include "gromacs/mdlib/update.h"
84 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
85 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
86 #include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
87 #include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
88 #include "gromacs/mdtypes/commrec.h"
89 #include "gromacs/mdtypes/iforceprovider.h"
90 #include "gromacs/mdtypes/inputrec.h"
91 #include "gromacs/mdtypes/md_enums.h"
92 #include "gromacs/mdtypes/state.h"
93 #include "gromacs/pbcutil/ishift.h"
94 #include "gromacs/pbcutil/mshift.h"
95 #include "gromacs/pbcutil/pbc.h"
96 #include "gromacs/pulling/pull.h"
97 #include "gromacs/pulling/pull_rotation.h"
98 #include "gromacs/timing/cyclecounter.h"
99 #include "gromacs/timing/gpu_timing.h"
100 #include "gromacs/timing/wallcycle.h"
101 #include "gromacs/timing/wallcyclereporting.h"
102 #include "gromacs/timing/walltime_accounting.h"
103 #include "gromacs/utility/arrayref.h"
104 #include "gromacs/utility/basedefinitions.h"
105 #include "gromacs/utility/cstringutil.h"
106 #include "gromacs/utility/exceptions.h"
107 #include "gromacs/utility/fatalerror.h"
108 #include "gromacs/utility/gmxassert.h"
109 #include "gromacs/utility/gmxmpi.h"
110 #include "gromacs/utility/logger.h"
111 #include "gromacs/utility/pleasecite.h"
112 #include "gromacs/utility/smalloc.h"
113 #include "gromacs/utility/sysinfo.h"
115 #include "nbnxn_gpu.h"
117 void print_time(FILE *out,
118 gmx_walltime_accounting_t walltime_accounting,
119 gmx_int64_t step,
120 t_inputrec *ir,
121 t_commrec gmx_unused *cr)
123 time_t finish;
124 char timebuf[STRLEN];
125 double dt, elapsed_seconds, time_per_step;
126 char buf[48];
128 #if !GMX_THREAD_MPI
129 if (!PAR(cr))
130 #endif
132 fprintf(out, "\r");
134 fprintf(out, "step %s", gmx_step_str(step, buf));
135 fflush(out);
137 if ((step >= ir->nstlist))
139 double seconds_since_epoch = gmx_gettime();
140 elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
141 time_per_step = elapsed_seconds/(step - ir->init_step + 1);
142 dt = (ir->nsteps + ir->init_step - step) * time_per_step;
144 if (ir->nsteps >= 0)
146 if (dt >= 300)
148 finish = (time_t) (seconds_since_epoch + dt);
149 gmx_ctime_r(&finish, timebuf, STRLEN);
150 sprintf(buf, "%s", timebuf);
151 buf[strlen(buf)-1] = '\0';
152 fprintf(out, ", will finish %s", buf);
154 else
156 fprintf(out, ", remaining wall clock time: %5d s ", (int)dt);
159 else
161 fprintf(out, " performance: %.1f ns/day ",
162 ir->delta_t/1000*24*60*60/time_per_step);
165 #if !GMX_THREAD_MPI
166 if (PAR(cr))
168 fprintf(out, "\n");
170 #endif
172 fflush(out);
175 void print_date_and_time(FILE *fplog, int nodeid, const char *title,
176 double the_time)
178 char time_string[STRLEN];
180 if (!fplog)
182 return;
186 int i;
187 char timebuf[STRLEN];
188 time_t temp_time = (time_t) the_time;
190 gmx_ctime_r(&temp_time, timebuf, STRLEN);
191 for (i = 0; timebuf[i] >= ' '; i++)
193 time_string[i] = timebuf[i];
195 time_string[i] = '\0';
198 fprintf(fplog, "%s on rank %d %s\n", title, nodeid, time_string);
201 void print_start(FILE *fplog, t_commrec *cr,
202 gmx_walltime_accounting_t walltime_accounting,
203 const char *name)
205 char buf[STRLEN];
207 sprintf(buf, "Started %s", name);
208 print_date_and_time(fplog, cr->nodeid, buf,
209 walltime_accounting_get_start_time_stamp(walltime_accounting));
212 static void sum_forces(rvec f[], const PaddedRVecVector *forceToAdd)
214 /* TODO: remove this - 1 when padding is properly implemented */
215 int end = forceToAdd->size() - 1;
216 const rvec *fAdd = as_rvec_array(forceToAdd->data());
218 // cppcheck-suppress unreadVariable
219 int gmx_unused nt = gmx_omp_nthreads_get(emntDefault);
220 #pragma omp parallel for num_threads(nt) schedule(static)
221 for (int i = 0; i < end; i++)
223 rvec_inc(f[i], fAdd[i]);
227 static void calc_virial(int start, int homenr, rvec x[], rvec f[],
228 tensor vir_part, t_graph *graph, matrix box,
229 t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
231 int i;
233 /* The short-range virial from surrounding boxes */
234 clear_mat(vir_part);
235 calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
236 inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
238 /* Calculate partial virial, for local atoms only, based on short range.
239 * Total virial is computed in global_stat, called from do_md
241 f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
242 inc_nrnb(nrnb, eNR_VIRIAL, homenr);
244 /* Add position restraint contribution */
245 for (i = 0; i < DIM; i++)
247 vir_part[i][i] += fr->vir_diag_posres[i];
250 /* Add wall contribution */
251 for (i = 0; i < DIM; i++)
253 vir_part[i][ZZ] += fr->vir_wall_z[i];
256 if (debug)
258 pr_rvecs(debug, 0, "vir_part", vir_part, DIM);
262 static void pull_potential_wrapper(t_commrec *cr,
263 t_inputrec *ir,
264 matrix box, rvec x[],
265 rvec f[],
266 tensor vir_force,
267 t_mdatoms *mdatoms,
268 gmx_enerdata_t *enerd,
269 real *lambda,
270 double t,
271 gmx_wallcycle_t wcycle)
273 t_pbc pbc;
274 real dvdl;
276 /* Calculate the center of mass forces, this requires communication,
277 * which is why pull_potential is called close to other communication.
278 * The virial contribution is calculated directly,
279 * which is why we call pull_potential after calc_virial.
281 wallcycle_start(wcycle, ewcPULLPOT);
282 set_pbc(&pbc, ir->ePBC, box);
283 dvdl = 0;
284 enerd->term[F_COM_PULL] +=
285 pull_potential(ir->pull_work, mdatoms, &pbc,
286 cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
287 enerd->dvdl_lin[efptRESTRAINT] += dvdl;
288 wallcycle_stop(wcycle, ewcPULLPOT);
291 static void pme_receive_force_ener(t_commrec *cr,
292 gmx_wallcycle_t wcycle,
293 gmx_enerdata_t *enerd,
294 t_forcerec *fr)
296 real e_q, e_lj, dvdl_q, dvdl_lj;
297 float cycles_ppdpme, cycles_seppme;
299 cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
300 dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
302 /* In case of node-splitting, the PP nodes receive the long-range
303 * forces, virial and energy from the PME nodes here.
305 wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
306 dvdl_q = 0;
307 dvdl_lj = 0;
308 gmx_pme_receive_f(cr, as_rvec_array(fr->f_novirsum->data()), fr->vir_el_recip, &e_q,
309 fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
310 &cycles_seppme);
311 enerd->term[F_COUL_RECIP] += e_q;
312 enerd->term[F_LJ_RECIP] += e_lj;
313 enerd->dvdl_lin[efptCOUL] += dvdl_q;
314 enerd->dvdl_lin[efptVDW] += dvdl_lj;
316 if (wcycle)
318 dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
320 wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
323 static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr,
324 gmx_int64_t step, real forceTolerance,
325 const rvec *x, const rvec *f)
327 real force2Tolerance = gmx::square(forceTolerance);
328 std::uintmax_t numNonFinite = 0;
329 for (int i = 0; i < md->homenr; i++)
331 real force2 = norm2(f[i]);
332 bool nonFinite = !std::isfinite(force2);
333 if (force2 >= force2Tolerance || nonFinite)
335 fprintf(fp, "step %" GMX_PRId64 " atom %6d x %8.3f %8.3f %8.3f force %12.5e\n",
336 step,
337 ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], std::sqrt(force2));
339 if (nonFinite)
341 numNonFinite++;
344 if (numNonFinite > 0)
346 /* Note that with MPI this fatal call on one rank might interrupt
347 * the printing on other ranks. But we can only avoid that with
348 * an expensive MPI barrier that we would need at each step.
350 gmx_fatal(FARGS, "At step %" GMX_PRId64 " detected non-finite forces on %ju atoms", step, numNonFinite);
354 static void post_process_forces(t_commrec *cr,
355 gmx_int64_t step,
356 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
357 gmx_localtop_t *top,
358 matrix box, rvec x[],
359 rvec f[],
360 tensor vir_force,
361 t_mdatoms *mdatoms,
362 t_graph *graph,
363 t_forcerec *fr, gmx_vsite_t *vsite,
364 int flags)
366 if (fr->bF_NoVirSum)
368 if (vsite)
370 /* Spread the mesh force on virtual sites to the other particles...
371 * This is parallellized. MPI communication is performed
372 * if the constructing atoms aren't local.
374 wallcycle_start(wcycle, ewcVSITESPREAD);
375 spread_vsite_f(vsite, x, as_rvec_array(fr->f_novirsum->data()), nullptr,
376 (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
377 nrnb,
378 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
379 wallcycle_stop(wcycle, ewcVSITESPREAD);
381 if (flags & GMX_FORCE_VIRIAL)
383 /* Now add the forces, this is local */
384 sum_forces(f, fr->f_novirsum);
386 if (EEL_FULL(fr->eeltype))
388 /* Add the mesh contribution to the virial */
389 m_add(vir_force, fr->vir_el_recip, vir_force);
391 if (EVDW_PME(fr->vdwtype))
393 /* Add the mesh contribution to the virial */
394 m_add(vir_force, fr->vir_lj_recip, vir_force);
396 if (debug)
398 pr_rvecs(debug, 0, "vir_force", vir_force, DIM);
403 if (fr->print_force >= 0)
405 print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f);
409 static void do_nb_verlet(t_forcerec *fr,
410 interaction_const_t *ic,
411 gmx_enerdata_t *enerd,
412 int flags, int ilocality,
413 int clearF,
414 t_nrnb *nrnb,
415 gmx_wallcycle_t wcycle)
417 int enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
418 nonbonded_verlet_group_t *nbvg;
419 gmx_bool bUsingGpuKernels;
421 if (!(flags & GMX_FORCE_NONBONDED))
423 /* skip non-bonded calculation */
424 return;
427 nbvg = &fr->nbv->grp[ilocality];
429 /* GPU kernel launch overhead is already timed separately */
430 if (fr->cutoff_scheme != ecutsVERLET)
432 gmx_incons("Invalid cut-off scheme passed!");
435 bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
437 if (!bUsingGpuKernels)
439 wallcycle_sub_start(wcycle, ewcsNONBONDED);
441 switch (nbvg->kernel_type)
443 case nbnxnk4x4_PlainC:
444 nbnxn_kernel_ref(&nbvg->nbl_lists,
445 nbvg->nbat, ic,
446 fr->shift_vec,
447 flags,
448 clearF,
449 fr->fshift[0],
450 enerd->grpp.ener[egCOULSR],
451 fr->bBHAM ?
452 enerd->grpp.ener[egBHAMSR] :
453 enerd->grpp.ener[egLJSR]);
454 break;
456 case nbnxnk4xN_SIMD_4xN:
457 nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
458 nbvg->nbat, ic,
459 nbvg->ewald_excl,
460 fr->shift_vec,
461 flags,
462 clearF,
463 fr->fshift[0],
464 enerd->grpp.ener[egCOULSR],
465 fr->bBHAM ?
466 enerd->grpp.ener[egBHAMSR] :
467 enerd->grpp.ener[egLJSR]);
468 break;
469 case nbnxnk4xN_SIMD_2xNN:
470 nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
471 nbvg->nbat, ic,
472 nbvg->ewald_excl,
473 fr->shift_vec,
474 flags,
475 clearF,
476 fr->fshift[0],
477 enerd->grpp.ener[egCOULSR],
478 fr->bBHAM ?
479 enerd->grpp.ener[egBHAMSR] :
480 enerd->grpp.ener[egLJSR]);
481 break;
483 case nbnxnk8x8x8_GPU:
484 nbnxn_gpu_launch_kernel(fr->nbv->gpu_nbv, nbvg->nbat, flags, ilocality);
485 break;
487 case nbnxnk8x8x8_PlainC:
488 nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
489 nbvg->nbat, ic,
490 fr->shift_vec,
491 flags,
492 clearF,
493 nbvg->nbat->out[0].f,
494 fr->fshift[0],
495 enerd->grpp.ener[egCOULSR],
496 fr->bBHAM ?
497 enerd->grpp.ener[egBHAMSR] :
498 enerd->grpp.ener[egLJSR]);
499 break;
501 default:
502 gmx_incons("Invalid nonbonded kernel type passed!");
505 if (!bUsingGpuKernels)
507 wallcycle_sub_stop(wcycle, ewcsNONBONDED);
510 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
512 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
514 else if ((!bUsingGpuKernels && nbvg->ewald_excl == ewaldexclAnalytical) ||
515 (bUsingGpuKernels && nbnxn_gpu_is_kernel_ewald_analytical(fr->nbv->gpu_nbv)))
517 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
519 else
521 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
523 enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
524 if (flags & GMX_FORCE_ENERGY)
526 /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
527 enr_nbnxn_kernel_ljc += 1;
528 enr_nbnxn_kernel_lj += 1;
531 inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
532 nbvg->nbl_lists.natpair_ljq);
533 inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
534 nbvg->nbl_lists.natpair_lj);
535 /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
536 inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
537 nbvg->nbl_lists.natpair_q);
539 if (ic->vdw_modifier == eintmodFORCESWITCH)
541 /* We add up the switch cost separately */
542 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
543 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
545 if (ic->vdw_modifier == eintmodPOTSWITCH)
547 /* We add up the switch cost separately */
548 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
549 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
551 if (ic->vdwtype == evdwPME)
553 /* We add up the LJ Ewald cost separately */
554 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
555 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
559 static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
560 t_forcerec *fr,
561 rvec x[],
562 rvec f[],
563 t_mdatoms *mdatoms,
564 t_lambda *fepvals,
565 real *lambda,
566 gmx_enerdata_t *enerd,
567 int flags,
568 t_nrnb *nrnb,
569 gmx_wallcycle_t wcycle)
571 int donb_flags;
572 nb_kernel_data_t kernel_data;
573 real lam_i[efptNR];
574 real dvdl_nb[efptNR];
575 int th;
576 int i, j;
578 donb_flags = 0;
579 /* Add short-range interactions */
580 donb_flags |= GMX_NONBONDED_DO_SR;
582 /* Currently all group scheme kernels always calculate (shift-)forces */
583 if (flags & GMX_FORCE_FORCES)
585 donb_flags |= GMX_NONBONDED_DO_FORCE;
587 if (flags & GMX_FORCE_VIRIAL)
589 donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
591 if (flags & GMX_FORCE_ENERGY)
593 donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
596 kernel_data.flags = donb_flags;
597 kernel_data.lambda = lambda;
598 kernel_data.dvdl = dvdl_nb;
600 kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
601 kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR];
603 /* reset free energy components */
604 for (i = 0; i < efptNR; i++)
606 dvdl_nb[i] = 0;
609 assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl);
611 wallcycle_sub_start(wcycle, ewcsNONBONDED);
612 #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
613 for (th = 0; th < nbl_lists->nnbl; th++)
617 gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
618 x, f, fr, mdatoms, &kernel_data, nrnb);
620 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
623 if (fepvals->sc_alpha != 0)
625 enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
626 enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
628 else
630 enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
631 enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
634 /* If we do foreign lambda and we have soft-core interactions
635 * we have to recalculate the (non-linear) energies contributions.
637 if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
639 kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
640 kernel_data.lambda = lam_i;
641 kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
642 kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR];
643 /* Note that we add to kernel_data.dvdl, but ignore the result */
645 for (i = 0; i < enerd->n_lambda; i++)
647 for (j = 0; j < efptNR; j++)
649 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
651 reset_foreign_enerdata(enerd);
652 #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
653 for (th = 0; th < nbl_lists->nnbl; th++)
657 gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
658 x, f, fr, mdatoms, &kernel_data, nrnb);
660 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
663 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
664 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
668 wallcycle_sub_stop(wcycle, ewcsNONBONDED);
671 gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
673 return nbv != nullptr && nbv->bUseGPU;
676 static gmx_inline void clear_rvecs_omp(int n, rvec v[])
678 int nth = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, n);
680 /* Note that we would like to avoid this conditional by putting it
681 * into the omp pragma instead, but then we still take the full
682 * omp parallel for overhead (at least with gcc5).
684 if (nth == 1)
686 for (int i = 0; i < n; i++)
688 clear_rvec(v[i]);
691 else
693 #pragma omp parallel for num_threads(nth) schedule(static)
694 for (int i = 0; i < n; i++)
696 clear_rvec(v[i]);
701 /*! \brief This routine checks if the potential energy is finite.
703 * Note that passing this check does not guarantee finite forces,
704 * since those use slightly different arithmetics. But in most cases
705 * there is just a narrow coordinate range where forces are not finite
706 * and energies are finite.
708 * \param[in] enerd The energy data; the non-bonded group energies need to be added in here before calling this routine
710 static void checkPotentialEnergyValidity(const gmx_enerdata_t *enerd)
712 if (!std::isfinite(enerd->term[F_EPOT]))
714 gmx_fatal(FARGS, "The total potential energy is %g, which is not finite. The LJ and electrostatic contributions to the energy are %g and %g, respectively. A non-finite potential energy can be caused by overlapping interactions in bonded interactions or very large or NaN coordinate values. Usually this is caused by a badly or non-equilibrated initial configuration or incorrect interactions or parameters in the topology.",
715 enerd->term[F_EPOT],
716 enerd->term[F_LJ],
717 enerd->term[F_COUL_SR]);
721 void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
722 t_inputrec *inputrec,
723 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
724 gmx_localtop_t *top,
725 gmx_groups_t gmx_unused *groups,
726 matrix box, rvec x[], history_t *hist,
727 PaddedRVecVector *force,
728 tensor vir_force,
729 t_mdatoms *mdatoms,
730 gmx_enerdata_t *enerd, t_fcdata *fcd,
731 real *lambda, t_graph *graph,
732 t_forcerec *fr, interaction_const_t *ic,
733 gmx_vsite_t *vsite, rvec mu_tot,
734 double t, gmx_edsam_t ed,
735 gmx_bool bBornRadii,
736 int flags)
738 int cg1, i, j;
739 double mu[2*DIM];
740 gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
741 gmx_bool bDoForces, bUseGPU, bUseOrEmulGPU;
742 gmx_bool bDiffKernels = FALSE;
743 rvec vzero, box_diag;
744 float cycles_pme, cycles_force, cycles_wait_gpu;
745 /* TODO To avoid loss of precision, float can't be used for a
746 * cycle count. Build an object that can do this right and perhaps
747 * also be used by gmx_wallcycle_t */
748 gmx_cycles_t cycleCountBeforeLocalWorkCompletes = 0;
749 nonbonded_verlet_t *nbv;
751 cycles_force = 0;
752 cycles_wait_gpu = 0;
753 nbv = fr->nbv;
755 const int start = 0;
756 const int homenr = mdatoms->homenr;
758 clear_mat(vir_force);
760 if (DOMAINDECOMP(cr))
762 cg1 = cr->dd->ncg_tot;
764 else
766 cg1 = top->cgs.nr;
768 if (fr->n_tpi > 0)
770 cg1--;
773 bStateChanged = (flags & GMX_FORCE_STATECHANGED);
774 bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
775 bFillGrid = (bNS && bStateChanged);
776 bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
777 bDoForces = (flags & GMX_FORCE_FORCES);
778 bUseGPU = fr->nbv->bUseGPU;
779 bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC);
781 if (bStateChanged)
783 update_forcerec(fr, box);
785 if (inputrecNeedMutot(inputrec))
787 /* Calculate total (local) dipole moment in a temporary common array.
788 * This makes it possible to sum them over nodes faster.
790 calc_mu(start, homenr,
791 x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
792 mu, mu+DIM);
796 if (fr->ePBC != epbcNONE)
798 /* Compute shift vectors every step,
799 * because of pressure coupling or box deformation!
801 if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
803 calc_shifts(box, fr->shift_vec);
806 if (bCalcCGCM)
808 put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
809 inc_nrnb(nrnb, eNR_SHIFTX, homenr);
811 else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
813 unshift_self(graph, box, x);
817 nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
818 fr->shift_vec, nbv->grp[0].nbat);
820 #if GMX_MPI
821 if (!(cr->duty & DUTY_PME))
823 gmx_bool bBS;
824 matrix boxs;
826 /* Send particle coordinates to the pme nodes.
827 * Since this is only implemented for domain decomposition
828 * and domain decomposition does not use the graph,
829 * we do not need to worry about shifting.
832 wallcycle_start(wcycle, ewcPP_PMESENDX);
834 bBS = (inputrec->nwall == 2);
835 if (bBS)
837 copy_mat(box, boxs);
838 svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
841 gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
842 lambda[efptCOUL], lambda[efptVDW],
843 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
844 step);
846 wallcycle_stop(wcycle, ewcPP_PMESENDX);
848 #endif /* GMX_MPI */
850 /* do gridding for pair search */
851 if (bNS)
853 if (graph && bStateChanged)
855 /* Calculate intramolecular shift vectors to make molecules whole */
856 mk_mshift(fplog, graph, fr->ePBC, box, x);
859 clear_rvec(vzero);
860 box_diag[XX] = box[XX][XX];
861 box_diag[YY] = box[YY][YY];
862 box_diag[ZZ] = box[ZZ][ZZ];
864 wallcycle_start(wcycle, ewcNS);
865 if (!fr->bDomDec)
867 wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
868 nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
869 0, vzero, box_diag,
870 0, mdatoms->homenr, -1, fr->cginfo, x,
871 0, nullptr,
872 nbv->grp[eintLocal].kernel_type,
873 nbv->grp[eintLocal].nbat);
874 wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
876 else
878 wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
879 nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
880 fr->cginfo, x,
881 nbv->grp[eintNonlocal].kernel_type,
882 nbv->grp[eintNonlocal].nbat);
883 wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
886 if (nbv->ngrp == 1 ||
887 nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
889 nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll,
890 nbv->nbs, mdatoms, fr->cginfo);
892 else
894 nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal,
895 nbv->nbs, mdatoms, fr->cginfo);
896 nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll,
897 nbv->nbs, mdatoms, fr->cginfo);
899 wallcycle_stop(wcycle, ewcNS);
902 /* initialize the GPU atom data and copy shift vector */
903 if (bUseGPU)
905 if (bNS)
907 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
908 nbnxn_gpu_init_atomdata(nbv->gpu_nbv, nbv->grp[eintLocal].nbat);
909 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
912 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
913 nbnxn_gpu_upload_shiftvec(nbv->gpu_nbv, nbv->grp[eintLocal].nbat);
914 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
917 /* do local pair search */
918 if (bNS)
920 wallcycle_start_nocount(wcycle, ewcNS);
921 wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
922 nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat,
923 &top->excls,
924 ic->rlist,
925 nbv->min_ci_balanced,
926 &nbv->grp[eintLocal].nbl_lists,
927 eintLocal,
928 nbv->grp[eintLocal].kernel_type,
929 nrnb);
930 wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
932 if (bUseGPU)
934 /* initialize local pair-list on the GPU */
935 nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
936 nbv->grp[eintLocal].nbl_lists.nbl[0],
937 eintLocal);
939 wallcycle_stop(wcycle, ewcNS);
941 else
943 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
944 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
945 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x,
946 nbv->grp[eintLocal].nbat);
947 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
948 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
951 if (bUseGPU)
953 wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
954 /* launch local nonbonded F on GPU */
955 do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
956 nrnb, wcycle);
957 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
960 /* Communicate coordinates and sum dipole if necessary +
961 do non-local pair search */
962 if (DOMAINDECOMP(cr))
964 bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
965 nbv->grp[eintLocal].kernel_type);
967 if (bDiffKernels)
969 /* With GPU+CPU non-bonded calculations we need to copy
970 * the local coordinates to the non-local nbat struct
971 * (in CPU format) as the non-local kernel call also
972 * calculates the local - non-local interactions.
974 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
975 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
976 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE, x,
977 nbv->grp[eintNonlocal].nbat);
978 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
979 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
982 if (bNS)
984 wallcycle_start_nocount(wcycle, ewcNS);
985 wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
987 if (bDiffKernels)
989 nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat);
992 nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat,
993 &top->excls,
994 ic->rlist,
995 nbv->min_ci_balanced,
996 &nbv->grp[eintNonlocal].nbl_lists,
997 eintNonlocal,
998 nbv->grp[eintNonlocal].kernel_type,
999 nrnb);
1001 wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1003 if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_GPU)
1005 /* initialize non-local pair-list on the GPU */
1006 nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
1007 nbv->grp[eintNonlocal].nbl_lists.nbl[0],
1008 eintNonlocal);
1010 wallcycle_stop(wcycle, ewcNS);
1012 else
1014 wallcycle_start(wcycle, ewcMOVEX);
1015 dd_move_x(cr->dd, box, x);
1016 wallcycle_stop(wcycle, ewcMOVEX);
1018 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1019 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1020 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x,
1021 nbv->grp[eintNonlocal].nbat);
1022 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1023 cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1026 if (bUseGPU && !bDiffKernels)
1028 wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
1029 /* launch non-local nonbonded F on GPU */
1030 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
1031 nrnb, wcycle);
1032 cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1036 if (bUseGPU)
1038 /* launch D2H copy-back F */
1039 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1040 if (DOMAINDECOMP(cr) && !bDiffKernels)
1042 nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->grp[eintNonlocal].nbat,
1043 flags, eatNonlocal);
1045 nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->grp[eintLocal].nbat,
1046 flags, eatLocal);
1047 cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1050 if (bStateChanged && inputrecNeedMutot(inputrec))
1052 if (PAR(cr))
1054 gmx_sumd(2*DIM, mu, cr);
1057 for (i = 0; i < 2; i++)
1059 for (j = 0; j < DIM; j++)
1061 fr->mu_tot[i][j] = mu[i*DIM + j];
1065 if (fr->efep == efepNO)
1067 copy_rvec(fr->mu_tot[0], mu_tot);
1069 else
1071 for (j = 0; j < DIM; j++)
1073 mu_tot[j] =
1074 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
1075 lambda[efptCOUL]*fr->mu_tot[1][j];
1079 /* Reset energies */
1080 reset_enerdata(enerd);
1081 clear_rvecs(SHIFTS, fr->fshift);
1083 if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
1085 wallcycle_start(wcycle, ewcPPDURINGPME);
1086 dd_force_flop_start(cr->dd, nrnb);
1089 if (inputrec->bRot)
1091 /* Enforced rotation has its own cycle counter that starts after the collective
1092 * coordinates have been communicated. It is added to ddCyclF to allow
1093 * for proper load-balancing */
1094 wallcycle_start(wcycle, ewcROT);
1095 do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
1096 wallcycle_stop(wcycle, ewcROT);
1099 /* Temporary solution until all routines take PaddedRVecVector */
1100 rvec *f = as_rvec_array(force->data());
1102 /* Start the force cycle counter.
1103 * This counter is stopped after do_force_lowlevel.
1104 * No parallel communication should occur while this counter is running,
1105 * since that will interfere with the dynamic load balancing.
1107 wallcycle_start(wcycle, ewcFORCE);
1108 if (bDoForces)
1110 /* Reset forces for which the virial is calculated separately:
1111 * PME/Ewald forces if necessary */
1112 if (fr->bF_NoVirSum)
1114 if (flags & GMX_FORCE_VIRIAL)
1116 fr->f_novirsum = fr->forceBufferNoVirialSummation;
1118 else
1120 /* We are not calculating the pressure so we do not need
1121 * a separate array for forces that do not contribute
1122 * to the pressure.
1124 fr->f_novirsum = force;
1128 if (fr->bF_NoVirSum)
1130 if (flags & GMX_FORCE_VIRIAL)
1132 /* TODO: remove this - 1 when padding is properly implemented */
1133 clear_rvecs_omp(fr->f_novirsum->size() - 1,
1134 as_rvec_array(fr->f_novirsum->data()));
1137 /* Clear the short- and long-range forces */
1138 clear_rvecs_omp(fr->natoms_force_constr, f);
1140 clear_rvec(fr->vir_diag_posres);
1143 if (inputrec->bPull && pull_have_constraint(inputrec->pull_work))
1145 clear_pull_forces(inputrec->pull_work);
1148 /* We calculate the non-bonded forces, when done on the CPU, here.
1149 * We do this before calling do_force_lowlevel, because in that
1150 * function, the listed forces are calculated before PME, which
1151 * does communication. With this order, non-bonded and listed
1152 * force calculation imbalance can be balanced out by the domain
1153 * decomposition load balancing.
1156 if (!bUseOrEmulGPU)
1158 /* Maybe we should move this into do_force_lowlevel */
1159 do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
1160 nrnb, wcycle);
1163 if (fr->efep != efepNO)
1165 /* Calculate the local and non-local free energy interactions here.
1166 * Happens here on the CPU both with and without GPU.
1168 if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
1170 do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
1171 fr, x, f, mdatoms,
1172 inputrec->fepvals, lambda,
1173 enerd, flags, nrnb, wcycle);
1176 if (DOMAINDECOMP(cr) &&
1177 fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
1179 do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
1180 fr, x, f, mdatoms,
1181 inputrec->fepvals, lambda,
1182 enerd, flags, nrnb, wcycle);
1186 if (!bUseOrEmulGPU || bDiffKernels)
1188 int aloc;
1190 if (DOMAINDECOMP(cr))
1192 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
1193 bDiffKernels ? enbvClearFYes : enbvClearFNo,
1194 nrnb, wcycle);
1197 if (!bUseOrEmulGPU)
1199 aloc = eintLocal;
1201 else
1203 aloc = eintNonlocal;
1206 /* Add all the non-bonded force to the normal force array.
1207 * This can be split into a local and a non-local part when overlapping
1208 * communication with calculation with domain decomposition.
1210 cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1211 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1212 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1213 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f);
1214 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1215 cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1216 wallcycle_start_nocount(wcycle, ewcFORCE);
1218 /* if there are multiple fshift output buffers reduce them */
1219 if ((flags & GMX_FORCE_VIRIAL) &&
1220 nbv->grp[aloc].nbl_lists.nnbl > 1)
1222 /* This is not in a subcounter because it takes a
1223 negligible and constant-sized amount of time */
1224 nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
1225 fr->fshift);
1229 /* update QMMMrec, if necessary */
1230 if (fr->bQMMM)
1232 update_QMMMrec(cr, fr, x, mdatoms, box, top);
1235 /* Compute the bonded and non-bonded energies and optionally forces */
1236 do_force_lowlevel(fr, inputrec, &(top->idef),
1237 cr, nrnb, wcycle, mdatoms,
1238 x, hist, f, enerd, fcd, top, fr->born,
1239 bBornRadii, box,
1240 inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
1241 flags, &cycles_pme);
1243 cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1245 if (ed)
1247 do_flood(cr, inputrec, x, f, ed, box, step, bNS);
1250 if (bUseOrEmulGPU && !bDiffKernels)
1252 /* wait for non-local forces (or calculate in emulation mode) */
1253 if (DOMAINDECOMP(cr))
1255 if (bUseGPU)
1257 float cycles_tmp;
1259 wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
1260 nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
1261 flags, eatNonlocal,
1262 enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1263 fr->fshift);
1264 cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
1265 cycles_wait_gpu += cycles_tmp;
1266 cycles_force += cycles_tmp;
1268 else
1270 wallcycle_start_nocount(wcycle, ewcFORCE);
1271 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
1272 nrnb, wcycle);
1273 cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1275 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1276 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1277 /* skip the reduction if there was no non-local work to do */
1278 if (nbv->grp[eintNonlocal].nbl_lists.nbl[0]->nsci > 0)
1280 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
1281 nbv->grp[eintNonlocal].nbat, f);
1283 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1284 cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1288 if (bDoForces && DOMAINDECOMP(cr))
1290 if (bUseGPU)
1292 /* We are done with the CPU compute, but the GPU local non-bonded
1293 * kernel can still be running while we communicate the forces.
1294 * We start a counter here, so we can, hopefully, time the rest
1295 * of the GPU kernel execution and data transfer.
1297 cycleCountBeforeLocalWorkCompletes = gmx_cycles_read();
1300 /* Communicate the forces */
1301 wallcycle_start(wcycle, ewcMOVEF);
1302 dd_move_f(cr->dd, f, fr->fshift);
1303 wallcycle_stop(wcycle, ewcMOVEF);
1306 if (bUseOrEmulGPU)
1308 /* wait for local forces (or calculate in emulation mode) */
1309 if (bUseGPU)
1311 float cycles_tmp, cycles_wait_est;
1312 /* Measured overhead on CUDA and OpenCL with(out) GPU sharing
1313 * is between 0.5 and 1.5 Mcycles. So 2 MCycles is an overestimate,
1314 * but even with a step of 0.1 ms the difference is less than 1%
1315 * of the step time.
1317 const float gpuWaitApiOverheadMargin = 2e6f; /* cycles */
1319 wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
1320 nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
1321 flags, eatLocal,
1322 enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1323 fr->fshift);
1324 cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
1326 if (bDoForces && DOMAINDECOMP(cr))
1328 cycles_wait_est = gmx_cycles_read() - cycleCountBeforeLocalWorkCompletes;
1330 if (cycles_tmp < gpuWaitApiOverheadMargin)
1332 /* We measured few cycles, it could be that the kernel
1333 * and transfer finished earlier and there was no actual
1334 * wait time, only API call overhead.
1335 * Then the actual time could be anywhere between 0 and
1336 * cycles_wait_est. As a compromise, we use half the time.
1338 cycles_wait_est *= 0.5f;
1341 else
1343 /* No force communication so we actually timed the wait */
1344 cycles_wait_est = cycles_tmp;
1346 /* Even though this is after dd_move_f, the actual task we are
1347 * waiting for runs asynchronously with dd_move_f and we usually
1348 * have nothing to balance it with, so we can and should add
1349 * the time to the force time for load balancing.
1351 cycles_force += cycles_wait_est;
1352 cycles_wait_gpu += cycles_wait_est;
1354 /* now clear the GPU outputs while we finish the step on the CPU */
1355 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1356 nbnxn_gpu_clear_outputs(nbv->gpu_nbv, flags);
1357 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1359 else
1361 wallcycle_start_nocount(wcycle, ewcFORCE);
1362 do_nb_verlet(fr, ic, enerd, flags, eintLocal,
1363 DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
1364 nrnb, wcycle);
1365 wallcycle_stop(wcycle, ewcFORCE);
1367 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1368 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1369 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
1370 nbv->grp[eintLocal].nbat, f);
1371 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1372 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1375 if (DOMAINDECOMP(cr))
1377 dd_force_flop_stop(cr->dd, nrnb);
1378 if (wcycle)
1380 dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
1381 if (bUseGPU)
1383 dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU);
1388 if (bDoForces)
1390 /* Collect forces from modules */
1391 gmx::ArrayRef<gmx::RVec> fNoVirSum;
1392 if (fr->bF_NoVirSum)
1394 fNoVirSum = *fr->f_novirsum;
1396 fr->forceProviders->calculateForces(cr, mdatoms, box, t, x, *force, fNoVirSum);
1398 /* If we have NoVirSum forces, but we do not calculate the virial,
1399 * we sum fr->f_novirsum=f later.
1401 if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
1403 wallcycle_start(wcycle, ewcVSITESPREAD);
1404 spread_vsite_f(vsite, x, f, fr->fshift, FALSE, nullptr, nrnb,
1405 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1406 wallcycle_stop(wcycle, ewcVSITESPREAD);
1409 if (flags & GMX_FORCE_VIRIAL)
1411 /* Calculation of the virial must be done after vsites! */
1412 calc_virial(0, mdatoms->homenr, x, f,
1413 vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1417 if (inputrec->bPull && pull_have_potential(inputrec->pull_work))
1419 /* Since the COM pulling is always done mass-weighted, no forces are
1420 * applied to vsites and this call can be done after vsite spreading.
1422 pull_potential_wrapper(cr, inputrec, box, x,
1423 f, vir_force, mdatoms, enerd, lambda, t,
1424 wcycle);
1427 /* Add the forces from enforced rotation potentials (if any) */
1428 if (inputrec->bRot)
1430 wallcycle_start(wcycle, ewcROTadd);
1431 enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
1432 wallcycle_stop(wcycle, ewcROTadd);
1435 /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
1436 IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
1438 if (PAR(cr) && !(cr->duty & DUTY_PME))
1440 /* In case of node-splitting, the PP nodes receive the long-range
1441 * forces, virial and energy from the PME nodes here.
1443 pme_receive_force_ener(cr, wcycle, enerd, fr);
1446 if (bDoForces)
1448 post_process_forces(cr, step, nrnb, wcycle,
1449 top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1450 flags);
1453 if (flags & GMX_FORCE_ENERGY)
1455 /* Sum the potential energy terms from group contributions */
1456 sum_epot(&(enerd->grpp), enerd->term);
1458 if (!EI_TPI(inputrec->eI))
1460 checkPotentialEnergyValidity(enerd);
1465 void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
1466 t_inputrec *inputrec,
1467 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1468 gmx_localtop_t *top,
1469 gmx_groups_t *groups,
1470 matrix box, rvec x[], history_t *hist,
1471 PaddedRVecVector *force,
1472 tensor vir_force,
1473 t_mdatoms *mdatoms,
1474 gmx_enerdata_t *enerd, t_fcdata *fcd,
1475 real *lambda, t_graph *graph,
1476 t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
1477 double t, gmx_edsam_t ed,
1478 gmx_bool bBornRadii,
1479 int flags)
1481 int cg0, cg1, i, j;
1482 double mu[2*DIM];
1483 gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
1484 gmx_bool bDoForces;
1485 float cycles_pme, cycles_force;
1487 const int start = 0;
1488 const int homenr = mdatoms->homenr;
1490 clear_mat(vir_force);
1492 cg0 = 0;
1493 if (DOMAINDECOMP(cr))
1495 cg1 = cr->dd->ncg_tot;
1497 else
1499 cg1 = top->cgs.nr;
1501 if (fr->n_tpi > 0)
1503 cg1--;
1506 bStateChanged = (flags & GMX_FORCE_STATECHANGED);
1507 bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
1508 /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
1509 bFillGrid = (bNS && bStateChanged);
1510 bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
1511 bDoForces = (flags & GMX_FORCE_FORCES);
1513 if (bStateChanged)
1515 update_forcerec(fr, box);
1517 if (inputrecNeedMutot(inputrec))
1519 /* Calculate total (local) dipole moment in a temporary common array.
1520 * This makes it possible to sum them over nodes faster.
1522 calc_mu(start, homenr,
1523 x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
1524 mu, mu+DIM);
1528 if (fr->ePBC != epbcNONE)
1530 /* Compute shift vectors every step,
1531 * because of pressure coupling or box deformation!
1533 if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
1535 calc_shifts(box, fr->shift_vec);
1538 if (bCalcCGCM)
1540 put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
1541 &(top->cgs), x, fr->cg_cm);
1542 inc_nrnb(nrnb, eNR_CGCM, homenr);
1543 inc_nrnb(nrnb, eNR_RESETX, cg1-cg0);
1545 else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
1547 unshift_self(graph, box, x);
1550 else if (bCalcCGCM)
1552 calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
1553 inc_nrnb(nrnb, eNR_CGCM, homenr);
1556 if (bCalcCGCM && gmx_debug_at)
1558 pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
1561 #if GMX_MPI
1562 if (!(cr->duty & DUTY_PME))
1564 gmx_bool bBS;
1565 matrix boxs;
1567 /* Send particle coordinates to the pme nodes.
1568 * Since this is only implemented for domain decomposition
1569 * and domain decomposition does not use the graph,
1570 * we do not need to worry about shifting.
1573 wallcycle_start(wcycle, ewcPP_PMESENDX);
1575 bBS = (inputrec->nwall == 2);
1576 if (bBS)
1578 copy_mat(box, boxs);
1579 svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
1582 gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
1583 lambda[efptCOUL], lambda[efptVDW],
1584 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
1585 step);
1587 wallcycle_stop(wcycle, ewcPP_PMESENDX);
1589 #endif /* GMX_MPI */
1591 /* Communicate coordinates and sum dipole if necessary */
1592 if (DOMAINDECOMP(cr))
1594 wallcycle_start(wcycle, ewcMOVEX);
1595 dd_move_x(cr->dd, box, x);
1596 wallcycle_stop(wcycle, ewcMOVEX);
1599 if (inputrecNeedMutot(inputrec))
1601 if (bStateChanged)
1603 if (PAR(cr))
1605 gmx_sumd(2*DIM, mu, cr);
1607 for (i = 0; i < 2; i++)
1609 for (j = 0; j < DIM; j++)
1611 fr->mu_tot[i][j] = mu[i*DIM + j];
1615 if (fr->efep == efepNO)
1617 copy_rvec(fr->mu_tot[0], mu_tot);
1619 else
1621 for (j = 0; j < DIM; j++)
1623 mu_tot[j] =
1624 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
1629 /* Reset energies */
1630 reset_enerdata(enerd);
1631 clear_rvecs(SHIFTS, fr->fshift);
1633 if (bNS)
1635 wallcycle_start(wcycle, ewcNS);
1637 if (graph && bStateChanged)
1639 /* Calculate intramolecular shift vectors to make molecules whole */
1640 mk_mshift(fplog, graph, fr->ePBC, box, x);
1643 /* Do the actual neighbour searching */
1644 ns(fplog, fr, box,
1645 groups, top, mdatoms,
1646 cr, nrnb, bFillGrid);
1648 wallcycle_stop(wcycle, ewcNS);
1651 if (inputrec->implicit_solvent && bNS)
1653 make_gb_nblist(cr, inputrec->gb_algorithm,
1654 x, box, fr, &top->idef, graph, fr->born);
1657 if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
1659 wallcycle_start(wcycle, ewcPPDURINGPME);
1660 dd_force_flop_start(cr->dd, nrnb);
1663 if (inputrec->bRot)
1665 /* Enforced rotation has its own cycle counter that starts after the collective
1666 * coordinates have been communicated. It is added to ddCyclF to allow
1667 * for proper load-balancing */
1668 wallcycle_start(wcycle, ewcROT);
1669 do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
1670 wallcycle_stop(wcycle, ewcROT);
1673 /* Temporary solution until all routines take PaddedRVecVector */
1674 rvec *f = as_rvec_array(force->data());
1676 /* Start the force cycle counter.
1677 * This counter is stopped after do_force_lowlevel.
1678 * No parallel communication should occur while this counter is running,
1679 * since that will interfere with the dynamic load balancing.
1681 wallcycle_start(wcycle, ewcFORCE);
1683 if (bDoForces)
1685 /* Reset forces for which the virial is calculated separately:
1686 * PME/Ewald forces if necessary */
1687 if (fr->bF_NoVirSum)
1689 if (flags & GMX_FORCE_VIRIAL)
1691 fr->f_novirsum = fr->forceBufferNoVirialSummation;
1692 /* TODO: remove this - 1 when padding is properly implemented */
1693 clear_rvecs(fr->f_novirsum->size() - 1,
1694 as_rvec_array(fr->f_novirsum->data()));
1696 else
1698 /* We are not calculating the pressure so we do not need
1699 * a separate array for forces that do not contribute
1700 * to the pressure.
1702 fr->f_novirsum = force;
1706 /* Clear the short- and long-range forces */
1707 clear_rvecs(fr->natoms_force_constr, f);
1709 clear_rvec(fr->vir_diag_posres);
1711 if (inputrec->bPull && pull_have_constraint(inputrec->pull_work))
1713 clear_pull_forces(inputrec->pull_work);
1716 /* update QMMMrec, if necessary */
1717 if (fr->bQMMM)
1719 update_QMMMrec(cr, fr, x, mdatoms, box, top);
1722 /* Compute the bonded and non-bonded energies and optionally forces */
1723 do_force_lowlevel(fr, inputrec, &(top->idef),
1724 cr, nrnb, wcycle, mdatoms,
1725 x, hist, f, enerd, fcd, top, fr->born,
1726 bBornRadii, box,
1727 inputrec->fepvals, lambda,
1728 graph, &(top->excls), fr->mu_tot,
1729 flags,
1730 &cycles_pme);
1732 cycles_force = wallcycle_stop(wcycle, ewcFORCE);
1734 if (ed)
1736 do_flood(cr, inputrec, x, f, ed, box, step, bNS);
1739 if (DOMAINDECOMP(cr))
1741 dd_force_flop_stop(cr->dd, nrnb);
1742 if (wcycle)
1744 dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
1748 if (bDoForces)
1750 /* Collect forces from modules */
1751 gmx::ArrayRef<gmx::RVec> fNoVirSum;
1752 if (fr->bF_NoVirSum)
1754 fNoVirSum = *fr->f_novirsum;
1756 fr->forceProviders->calculateForces(cr, mdatoms, box, t, x, *force, fNoVirSum);
1758 /* Communicate the forces */
1759 if (DOMAINDECOMP(cr))
1761 wallcycle_start(wcycle, ewcMOVEF);
1762 dd_move_f(cr->dd, f, fr->fshift);
1763 /* Do we need to communicate the separate force array
1764 * for terms that do not contribute to the single sum virial?
1765 * Position restraints and electric fields do not introduce
1766 * inter-cg forces, only full electrostatics methods do.
1767 * When we do not calculate the virial, fr->f_novirsum = f,
1768 * so we have already communicated these forces.
1770 if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
1771 (flags & GMX_FORCE_VIRIAL))
1773 dd_move_f(cr->dd, as_rvec_array(fr->f_novirsum->data()), nullptr);
1775 wallcycle_stop(wcycle, ewcMOVEF);
1778 /* If we have NoVirSum forces, but we do not calculate the virial,
1779 * we sum fr->f_novirsum=f later.
1781 if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
1783 wallcycle_start(wcycle, ewcVSITESPREAD);
1784 spread_vsite_f(vsite, x, f, fr->fshift, FALSE, nullptr, nrnb,
1785 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1786 wallcycle_stop(wcycle, ewcVSITESPREAD);
1789 if (flags & GMX_FORCE_VIRIAL)
1791 /* Calculation of the virial must be done after vsites! */
1792 calc_virial(0, mdatoms->homenr, x, f,
1793 vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1797 if (inputrec->bPull && pull_have_potential(inputrec->pull_work))
1799 pull_potential_wrapper(cr, inputrec, box, x,
1800 f, vir_force, mdatoms, enerd, lambda, t,
1801 wcycle);
1804 /* Add the forces from enforced rotation potentials (if any) */
1805 if (inputrec->bRot)
1807 wallcycle_start(wcycle, ewcROTadd);
1808 enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
1809 wallcycle_stop(wcycle, ewcROTadd);
1812 /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
1813 IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
1815 if (PAR(cr) && !(cr->duty & DUTY_PME))
1817 /* In case of node-splitting, the PP nodes receive the long-range
1818 * forces, virial and energy from the PME nodes here.
1820 pme_receive_force_ener(cr, wcycle, enerd, fr);
1823 if (bDoForces)
1825 post_process_forces(cr, step, nrnb, wcycle,
1826 top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1827 flags);
1830 if (flags & GMX_FORCE_ENERGY)
1832 /* Sum the potential energy terms from group contributions */
1833 sum_epot(&(enerd->grpp), enerd->term);
1835 if (!EI_TPI(inputrec->eI))
1837 checkPotentialEnergyValidity(enerd);
1843 void do_force(FILE *fplog, t_commrec *cr,
1844 t_inputrec *inputrec,
1845 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1846 gmx_localtop_t *top,
1847 gmx_groups_t *groups,
1848 matrix box, PaddedRVecVector *coordinates, history_t *hist,
1849 PaddedRVecVector *force,
1850 tensor vir_force,
1851 t_mdatoms *mdatoms,
1852 gmx_enerdata_t *enerd, t_fcdata *fcd,
1853 gmx::ArrayRef<real> lambda, t_graph *graph,
1854 t_forcerec *fr,
1855 gmx_vsite_t *vsite, rvec mu_tot,
1856 double t, gmx_edsam_t ed,
1857 gmx_bool bBornRadii,
1858 int flags)
1860 /* modify force flag if not doing nonbonded */
1861 if (!fr->bNonbonded)
1863 flags &= ~GMX_FORCE_NONBONDED;
1866 GMX_ASSERT(coordinates->size() >= static_cast<unsigned int>(fr->natoms_force + 1) ||
1867 fr->natoms_force == 0, "We might need 1 element extra for SIMD");
1868 GMX_ASSERT(force->size() >= static_cast<unsigned int>(fr->natoms_force + 1) ||
1869 fr->natoms_force == 0, "We might need 1 element extra for SIMD");
1871 rvec *x = as_rvec_array(coordinates->data());
1873 switch (inputrec->cutoff_scheme)
1875 case ecutsVERLET:
1876 do_force_cutsVERLET(fplog, cr, inputrec,
1877 step, nrnb, wcycle,
1878 top,
1879 groups,
1880 box, x, hist,
1881 force, vir_force,
1882 mdatoms,
1883 enerd, fcd,
1884 lambda.data(), graph,
1885 fr, fr->ic,
1886 vsite, mu_tot,
1887 t, ed,
1888 bBornRadii,
1889 flags);
1890 break;
1891 case ecutsGROUP:
1892 do_force_cutsGROUP(fplog, cr, inputrec,
1893 step, nrnb, wcycle,
1894 top,
1895 groups,
1896 box, x, hist,
1897 force, vir_force,
1898 mdatoms,
1899 enerd, fcd,
1900 lambda.data(), graph,
1901 fr, vsite, mu_tot,
1902 t, ed,
1903 bBornRadii,
1904 flags);
1905 break;
1906 default:
1907 gmx_incons("Invalid cut-off scheme passed!");
1912 void do_constrain_first(FILE *fplog, gmx_constr_t constr,
1913 t_inputrec *ir, t_mdatoms *md,
1914 t_state *state, t_commrec *cr, t_nrnb *nrnb,
1915 t_forcerec *fr, gmx_localtop_t *top)
1917 int i, m, start, end;
1918 gmx_int64_t step;
1919 real dt = ir->delta_t;
1920 real dvdl_dum;
1921 rvec *savex;
1923 /* We need to allocate one element extra, since we might use
1924 * (unaligned) 4-wide SIMD loads to access rvec entries.
1926 snew(savex, state->natoms + 1);
1928 start = 0;
1929 end = md->homenr;
1931 if (debug)
1933 fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
1934 start, md->homenr, end);
1936 /* Do a first constrain to reset particles... */
1937 step = ir->init_step;
1938 if (fplog)
1940 char buf[STEPSTRSIZE];
1941 fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
1942 gmx_step_str(step, buf));
1944 dvdl_dum = 0;
1946 /* constrain the current position */
1947 constrain(nullptr, TRUE, FALSE, constr, &(top->idef),
1948 ir, cr, step, 0, 1.0, md,
1949 as_rvec_array(state->x.data()), as_rvec_array(state->x.data()), nullptr,
1950 fr->bMolPBC, state->box,
1951 state->lambda[efptBONDED], &dvdl_dum,
1952 nullptr, nullptr, nrnb, econqCoord);
1953 if (EI_VV(ir->eI))
1955 /* constrain the inital velocity, and save it */
1956 /* also may be useful if we need the ekin from the halfstep for velocity verlet */
1957 constrain(nullptr, TRUE, FALSE, constr, &(top->idef),
1958 ir, cr, step, 0, 1.0, md,
1959 as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), as_rvec_array(state->v.data()),
1960 fr->bMolPBC, state->box,
1961 state->lambda[efptBONDED], &dvdl_dum,
1962 nullptr, nullptr, nrnb, econqVeloc);
1964 /* constrain the inital velocities at t-dt/2 */
1965 if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV)
1967 for (i = start; (i < end); i++)
1969 for (m = 0; (m < DIM); m++)
1971 /* Reverse the velocity */
1972 state->v[i][m] = -state->v[i][m];
1973 /* Store the position at t-dt in buf */
1974 savex[i][m] = state->x[i][m] + dt*state->v[i][m];
1977 /* Shake the positions at t=-dt with the positions at t=0
1978 * as reference coordinates.
1980 if (fplog)
1982 char buf[STEPSTRSIZE];
1983 fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
1984 gmx_step_str(step, buf));
1986 dvdl_dum = 0;
1987 constrain(nullptr, TRUE, FALSE, constr, &(top->idef),
1988 ir, cr, step, -1, 1.0, md,
1989 as_rvec_array(state->x.data()), savex, nullptr,
1990 fr->bMolPBC, state->box,
1991 state->lambda[efptBONDED], &dvdl_dum,
1992 as_rvec_array(state->v.data()), nullptr, nrnb, econqCoord);
1994 for (i = start; i < end; i++)
1996 for (m = 0; m < DIM; m++)
1998 /* Re-reverse the velocities */
1999 state->v[i][m] = -state->v[i][m];
2003 sfree(savex);
2007 static void
2008 integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
2009 double *enerout, double *virout)
2011 double enersum, virsum;
2012 double invscale, invscale2, invscale3;
2013 double r, ea, eb, ec, pa, pb, pc, pd;
2014 double y0, f, g, h;
2015 int ri, offset;
2016 double tabfactor;
2018 invscale = 1.0/scale;
2019 invscale2 = invscale*invscale;
2020 invscale3 = invscale*invscale2;
2022 /* Following summation derived from cubic spline definition,
2023 * Numerical Recipies in C, second edition, p. 113-116. Exact for
2024 * the cubic spline. We first calculate the negative of the
2025 * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
2026 * add the more standard, abrupt cutoff correction to that result,
2027 * yielding the long-range correction for a switched function. We
2028 * perform both the pressure and energy loops at the same time for
2029 * simplicity, as the computational cost is low. */
2031 if (offstart == 0)
2033 /* Since the dispersion table has been scaled down a factor
2034 * 6.0 and the repulsion a factor 12.0 to compensate for the
2035 * c6/c12 parameters inside nbfp[] being scaled up (to save
2036 * flops in kernels), we need to correct for this.
2038 tabfactor = 6.0;
2040 else
2042 tabfactor = 12.0;
2045 enersum = 0.0;
2046 virsum = 0.0;
2047 for (ri = rstart; ri < rend; ++ri)
2049 r = ri*invscale;
2050 ea = invscale3;
2051 eb = 2.0*invscale2*r;
2052 ec = invscale*r*r;
2054 pa = invscale3;
2055 pb = 3.0*invscale2*r;
2056 pc = 3.0*invscale*r*r;
2057 pd = r*r*r;
2059 /* this "8" is from the packing in the vdwtab array - perhaps
2060 should be defined? */
2062 offset = 8*ri + offstart;
2063 y0 = vdwtab[offset];
2064 f = vdwtab[offset+1];
2065 g = vdwtab[offset+2];
2066 h = vdwtab[offset+3];
2068 enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
2069 virsum += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
2071 *enerout = 4.0*M_PI*enersum*tabfactor;
2072 *virout = 4.0*M_PI*virsum*tabfactor;
2075 void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr)
2077 double eners[2], virs[2], enersum, virsum;
2078 double r0, rc3, rc9;
2079 int ri0, ri1, i;
2080 real scale, *vdwtab;
2082 fr->enershiftsix = 0;
2083 fr->enershifttwelve = 0;
2084 fr->enerdiffsix = 0;
2085 fr->enerdifftwelve = 0;
2086 fr->virdiffsix = 0;
2087 fr->virdifftwelve = 0;
2089 if (eDispCorr != edispcNO)
2091 for (i = 0; i < 2; i++)
2093 eners[i] = 0;
2094 virs[i] = 0;
2096 if ((fr->vdw_modifier == eintmodPOTSHIFT) ||
2097 (fr->vdw_modifier == eintmodPOTSWITCH) ||
2098 (fr->vdw_modifier == eintmodFORCESWITCH) ||
2099 (fr->vdwtype == evdwSHIFT) ||
2100 (fr->vdwtype == evdwSWITCH))
2102 if (((fr->vdw_modifier == eintmodPOTSWITCH) ||
2103 (fr->vdw_modifier == eintmodFORCESWITCH) ||
2104 (fr->vdwtype == evdwSWITCH)) && fr->rvdw_switch == 0)
2106 gmx_fatal(FARGS,
2107 "With dispersion correction rvdw-switch can not be zero "
2108 "for vdw-type = %s", evdw_names[fr->vdwtype]);
2111 /* TODO This code depends on the logic in tables.c that
2112 constructs the table layout, which should be made
2113 explicit in future cleanup. */
2114 GMX_ASSERT(fr->dispersionCorrectionTable->interaction == GMX_TABLE_INTERACTION_VDWREP_VDWDISP,
2115 "Dispersion-correction code needs a table with both repulsion and dispersion terms");
2116 scale = fr->dispersionCorrectionTable->scale;
2117 vdwtab = fr->dispersionCorrectionTable->data;
2119 /* Round the cut-offs to exact table values for precision */
2120 ri0 = static_cast<int>(floor(fr->rvdw_switch*scale));
2121 ri1 = static_cast<int>(ceil(fr->rvdw*scale));
2123 /* The code below has some support for handling force-switching, i.e.
2124 * when the force (instead of potential) is switched over a limited
2125 * region. This leads to a constant shift in the potential inside the
2126 * switching region, which we can handle by adding a constant energy
2127 * term in the force-switch case just like when we do potential-shift.
2129 * For now this is not enabled, but to keep the functionality in the
2130 * code we check separately for switch and shift. When we do force-switch
2131 * the shifting point is rvdw_switch, while it is the cutoff when we
2132 * have a classical potential-shift.
2134 * For a pure potential-shift the potential has a constant shift
2135 * all the way out to the cutoff, and that is it. For other forms
2136 * we need to calculate the constant shift up to the point where we
2137 * start modifying the potential.
2139 ri0 = (fr->vdw_modifier == eintmodPOTSHIFT) ? ri1 : ri0;
2141 r0 = ri0/scale;
2142 rc3 = r0*r0*r0;
2143 rc9 = rc3*rc3*rc3;
2145 if ((fr->vdw_modifier == eintmodFORCESWITCH) ||
2146 (fr->vdwtype == evdwSHIFT))
2148 /* Determine the constant energy shift below rvdw_switch.
2149 * Table has a scale factor since we have scaled it down to compensate
2150 * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
2152 fr->enershiftsix = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
2153 fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
2155 else if (fr->vdw_modifier == eintmodPOTSHIFT)
2157 fr->enershiftsix = (real)(-1.0/(rc3*rc3));
2158 fr->enershifttwelve = (real)( 1.0/(rc9*rc3));
2161 /* Add the constant part from 0 to rvdw_switch.
2162 * This integration from 0 to rvdw_switch overcounts the number
2163 * of interactions by 1, as it also counts the self interaction.
2164 * We will correct for this later.
2166 eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
2167 eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
2169 /* Calculate the contribution in the range [r0,r1] where we
2170 * modify the potential. For a pure potential-shift modifier we will
2171 * have ri0==ri1, and there will not be any contribution here.
2173 for (i = 0; i < 2; i++)
2175 enersum = 0;
2176 virsum = 0;
2177 integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
2178 eners[i] -= enersum;
2179 virs[i] -= virsum;
2182 /* Alright: Above we compensated by REMOVING the parts outside r0
2183 * corresponding to the ideal VdW 1/r6 and /r12 potentials.
2185 * Regardless of whether r0 is the point where we start switching,
2186 * or the cutoff where we calculated the constant shift, we include
2187 * all the parts we are missing out to infinity from r0 by
2188 * calculating the analytical dispersion correction.
2190 eners[0] += -4.0*M_PI/(3.0*rc3);
2191 eners[1] += 4.0*M_PI/(9.0*rc9);
2192 virs[0] += 8.0*M_PI/rc3;
2193 virs[1] += -16.0*M_PI/(3.0*rc9);
2195 else if (fr->vdwtype == evdwCUT ||
2196 EVDW_PME(fr->vdwtype) ||
2197 fr->vdwtype == evdwUSER)
2199 if (fr->vdwtype == evdwUSER && fplog)
2201 fprintf(fplog,
2202 "WARNING: using dispersion correction with user tables\n");
2205 /* Note that with LJ-PME, the dispersion correction is multiplied
2206 * by the difference between the actual C6 and the value of C6
2207 * that would produce the combination rule.
2208 * This means the normal energy and virial difference formulas
2209 * can be used here.
2212 rc3 = fr->rvdw*fr->rvdw*fr->rvdw;
2213 rc9 = rc3*rc3*rc3;
2214 /* Contribution beyond the cut-off */
2215 eners[0] += -4.0*M_PI/(3.0*rc3);
2216 eners[1] += 4.0*M_PI/(9.0*rc9);
2217 if (fr->vdw_modifier == eintmodPOTSHIFT)
2219 /* Contribution within the cut-off */
2220 eners[0] += -4.0*M_PI/(3.0*rc3);
2221 eners[1] += 4.0*M_PI/(3.0*rc9);
2223 /* Contribution beyond the cut-off */
2224 virs[0] += 8.0*M_PI/rc3;
2225 virs[1] += -16.0*M_PI/(3.0*rc9);
2227 else
2229 gmx_fatal(FARGS,
2230 "Dispersion correction is not implemented for vdw-type = %s",
2231 evdw_names[fr->vdwtype]);
2234 /* When we deprecate the group kernels the code below can go too */
2235 if (fr->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP)
2237 /* Calculate self-interaction coefficient (assuming that
2238 * the reciprocal-space contribution is constant in the
2239 * region that contributes to the self-interaction).
2241 fr->enershiftsix = gmx::power6(fr->ewaldcoeff_lj) / 6.0;
2243 eners[0] += -gmx::power3(std::sqrt(M_PI)*fr->ewaldcoeff_lj)/3.0;
2244 virs[0] += gmx::power3(std::sqrt(M_PI)*fr->ewaldcoeff_lj);
2247 fr->enerdiffsix = eners[0];
2248 fr->enerdifftwelve = eners[1];
2249 /* The 0.5 is due to the Gromacs definition of the virial */
2250 fr->virdiffsix = 0.5*virs[0];
2251 fr->virdifftwelve = 0.5*virs[1];
2255 void calc_dispcorr(t_inputrec *ir, t_forcerec *fr,
2256 matrix box, real lambda, tensor pres, tensor virial,
2257 real *prescorr, real *enercorr, real *dvdlcorr)
2259 gmx_bool bCorrAll, bCorrPres;
2260 real dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
2261 int m;
2263 *prescorr = 0;
2264 *enercorr = 0;
2265 *dvdlcorr = 0;
2267 clear_mat(virial);
2268 clear_mat(pres);
2270 if (ir->eDispCorr != edispcNO)
2272 bCorrAll = (ir->eDispCorr == edispcAllEner ||
2273 ir->eDispCorr == edispcAllEnerPres);
2274 bCorrPres = (ir->eDispCorr == edispcEnerPres ||
2275 ir->eDispCorr == edispcAllEnerPres);
2277 invvol = 1/det(box);
2278 if (fr->n_tpi)
2280 /* Only correct for the interactions with the inserted molecule */
2281 dens = (fr->numAtomsForDispersionCorrection - fr->n_tpi)*invvol;
2282 ninter = fr->n_tpi;
2284 else
2286 dens = fr->numAtomsForDispersionCorrection*invvol;
2287 ninter = 0.5*fr->numAtomsForDispersionCorrection;
2290 if (ir->efep == efepNO)
2292 avcsix = fr->avcsix[0];
2293 avctwelve = fr->avctwelve[0];
2295 else
2297 avcsix = (1 - lambda)*fr->avcsix[0] + lambda*fr->avcsix[1];
2298 avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
2301 enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
2302 *enercorr += avcsix*enerdiff;
2303 dvdlambda = 0.0;
2304 if (ir->efep != efepNO)
2306 dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
2308 if (bCorrAll)
2310 enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
2311 *enercorr += avctwelve*enerdiff;
2312 if (fr->efep != efepNO)
2314 dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
2318 if (bCorrPres)
2320 svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
2321 if (ir->eDispCorr == edispcAllEnerPres)
2323 svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
2325 /* The factor 2 is because of the Gromacs virial definition */
2326 spres = -2.0*invvol*svir*PRESFAC;
2328 for (m = 0; m < DIM; m++)
2330 virial[m][m] += svir;
2331 pres[m][m] += spres;
2333 *prescorr += spres;
2336 /* Can't currently control when it prints, for now, just print when degugging */
2337 if (debug)
2339 if (bCorrAll)
2341 fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
2342 avcsix, avctwelve);
2344 if (bCorrPres)
2346 fprintf(debug,
2347 "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
2348 *enercorr, spres, svir);
2350 else
2352 fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
2356 if (fr->efep != efepNO)
2358 *dvdlcorr += dvdlambda;
2363 static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
2364 const gmx_mtop_t *mtop, rvec x[],
2365 gmx_bool bFirst)
2367 t_graph *graph;
2368 int mb, as, mol;
2369 gmx_molblock_t *molb;
2371 if (bFirst && fplog)
2373 fprintf(fplog, "Removing pbc first time\n");
2376 snew(graph, 1);
2377 as = 0;
2378 for (mb = 0; mb < mtop->nmolblock; mb++)
2380 molb = &mtop->molblock[mb];
2381 if (molb->natoms_mol == 1 ||
2382 (!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
2384 /* Just one atom or charge group in the molecule, no PBC required */
2385 as += molb->nmol*molb->natoms_mol;
2387 else
2389 /* Pass NULL iso fplog to avoid graph prints for each molecule type */
2390 mk_graph_ilist(nullptr, mtop->moltype[molb->type].ilist,
2391 0, molb->natoms_mol, FALSE, FALSE, graph);
2393 for (mol = 0; mol < molb->nmol; mol++)
2395 mk_mshift(fplog, graph, ePBC, box, x+as);
2397 shift_self(graph, box, x+as);
2398 /* The molecule is whole now.
2399 * We don't need the second mk_mshift call as in do_pbc_first,
2400 * since we no longer need this graph.
2403 as += molb->natoms_mol;
2405 done_graph(graph);
2408 sfree(graph);
2411 void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
2412 const gmx_mtop_t *mtop, rvec x[])
2414 low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE);
2417 void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
2418 gmx_mtop_t *mtop, rvec x[])
2420 low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE);
2423 void put_atoms_in_box_omp(int ePBC, const matrix box, int natoms, rvec x[])
2425 int t, nth;
2426 nth = gmx_omp_nthreads_get(emntDefault);
2428 #pragma omp parallel for num_threads(nth) schedule(static)
2429 for (t = 0; t < nth; t++)
2433 int offset, len;
2435 offset = (natoms*t )/nth;
2436 len = (natoms*(t + 1))/nth - offset;
2437 put_atoms_in_box(ePBC, box, len, x + offset);
2439 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
2443 // TODO This can be cleaned up a lot, and move back to runner.cpp
2444 void finish_run(FILE *fplog, const gmx::MDLogger &mdlog, t_commrec *cr,
2445 t_inputrec *inputrec,
2446 t_nrnb nrnb[], gmx_wallcycle_t wcycle,
2447 gmx_walltime_accounting_t walltime_accounting,
2448 nonbonded_verlet_t *nbv,
2449 gmx_bool bWriteStat)
2451 t_nrnb *nrnb_tot = nullptr;
2452 double delta_t = 0;
2453 double nbfs = 0, mflop = 0;
2454 double elapsed_time,
2455 elapsed_time_over_all_ranks,
2456 elapsed_time_over_all_threads,
2457 elapsed_time_over_all_threads_over_all_ranks;
2458 /* Control whether it is valid to print a report. Only the
2459 simulation master may print, but it should not do so if the run
2460 terminated e.g. before a scheduled reset step. This is
2461 complicated by the fact that PME ranks are unaware of the
2462 reason why they were sent a pmerecvqxFINISH. To avoid
2463 communication deadlocks, we always do the communication for the
2464 report, even if we've decided not to write the report, because
2465 how long it takes to finish the run is not important when we've
2466 decided not to report on the simulation performance. */
2467 bool printReport = SIMMASTER(cr);
2469 if (!walltime_accounting_get_valid_finish(walltime_accounting))
2471 GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
2472 printReport = false;
2475 if (cr->nnodes > 1)
2477 snew(nrnb_tot, 1);
2478 #if GMX_MPI
2479 MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
2480 cr->mpi_comm_mysim);
2481 #endif
2483 else
2485 nrnb_tot = nrnb;
2488 elapsed_time = walltime_accounting_get_elapsed_time(walltime_accounting);
2489 elapsed_time_over_all_ranks = elapsed_time;
2490 elapsed_time_over_all_threads = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
2491 elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
2492 #if GMX_MPI
2493 if (cr->nnodes > 1)
2495 /* reduce elapsed_time over all MPI ranks in the current simulation */
2496 MPI_Allreduce(&elapsed_time,
2497 &elapsed_time_over_all_ranks,
2498 1, MPI_DOUBLE, MPI_SUM,
2499 cr->mpi_comm_mysim);
2500 elapsed_time_over_all_ranks /= cr->nnodes;
2501 /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
2502 * current simulation. */
2503 MPI_Allreduce(&elapsed_time_over_all_threads,
2504 &elapsed_time_over_all_threads_over_all_ranks,
2505 1, MPI_DOUBLE, MPI_SUM,
2506 cr->mpi_comm_mysim);
2508 #endif
2510 if (printReport)
2512 print_flop(fplog, nrnb_tot, &nbfs, &mflop);
2514 if (cr->nnodes > 1)
2516 sfree(nrnb_tot);
2519 if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
2521 print_dd_statistics(cr, inputrec, fplog);
2524 /* TODO Move the responsibility for any scaling by thread counts
2525 * to the code that handled the thread region, so that there's a
2526 * mechanism to keep cycle counting working during the transition
2527 * to task parallelism. */
2528 int nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);
2529 int nthreads_pme = gmx_omp_nthreads_get(emntPME);
2530 wallcycle_scale_by_num_threads(wcycle, cr->duty == DUTY_PME, nthreads_pp, nthreads_pme);
2531 auto cycle_sum(wallcycle_sum(cr, wcycle));
2533 if (printReport)
2535 struct gmx_wallclock_gpu_t* gputimes = use_GPU(nbv) ? nbnxn_gpu_get_timings(nbv->gpu_nbv) : nullptr;
2537 wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
2538 elapsed_time_over_all_ranks,
2539 wcycle, cycle_sum, gputimes);
2541 if (EI_DYNAMICS(inputrec->eI))
2543 delta_t = inputrec->delta_t;
2546 if (fplog)
2548 print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
2549 elapsed_time_over_all_ranks,
2550 walltime_accounting_get_nsteps_done(walltime_accounting),
2551 delta_t, nbfs, mflop);
2553 if (bWriteStat)
2555 print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
2556 elapsed_time_over_all_ranks,
2557 walltime_accounting_get_nsteps_done(walltime_accounting),
2558 delta_t, nbfs, mflop);
2563 extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, gmx::ArrayRef<real> lambda, double *lam0)
2565 /* this function works, but could probably use a logic rewrite to keep all the different
2566 types of efep straight. */
2568 if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
2570 return;
2573 t_lambda *fep = ir->fepvals;
2574 *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
2575 if checkpoint is set -- a kludge is in for now
2576 to prevent this.*/
2578 for (int i = 0; i < efptNR; i++)
2580 /* overwrite lambda state with init_lambda for now for backwards compatibility */
2581 if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
2583 lambda[i] = fep->init_lambda;
2584 if (lam0)
2586 lam0[i] = lambda[i];
2589 else
2591 lambda[i] = fep->all_lambda[i][*fep_state];
2592 if (lam0)
2594 lam0[i] = lambda[i];
2598 if (ir->bSimTemp)
2600 /* need to rescale control temperatures to match current state */
2601 for (int i = 0; i < ir->opts.ngtc; i++)
2603 if (ir->opts.ref_t[i] > 0)
2605 ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
2610 /* Send to the log the information on the current lambdas */
2611 if (fplog != nullptr)
2613 fprintf(fplog, "Initial vector of lambda components:[ ");
2614 for (const auto &l : lambda)
2616 fprintf(fplog, "%10.4f ", l);
2618 fprintf(fplog, "]\n");
2620 return;
2624 void init_md(FILE *fplog,
2625 t_commrec *cr, gmx::IMDOutputProvider *outputProvider,
2626 t_inputrec *ir, const gmx_output_env_t *oenv,
2627 double *t, double *t0,
2628 gmx::ArrayRef<real> lambda, int *fep_state, double *lam0,
2629 t_nrnb *nrnb, gmx_mtop_t *mtop,
2630 gmx_update_t **upd,
2631 int nfile, const t_filenm fnm[],
2632 gmx_mdoutf_t *outf, t_mdebin **mdebin,
2633 tensor force_vir, tensor shake_vir, rvec mu_tot,
2634 gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags,
2635 gmx_wallcycle_t wcycle)
2637 int i;
2639 /* Initial values */
2640 *t = *t0 = ir->init_t;
2642 *bSimAnn = FALSE;
2643 for (i = 0; i < ir->opts.ngtc; i++)
2645 /* set bSimAnn if any group is being annealed */
2646 if (ir->opts.annealing[i] != eannNO)
2648 *bSimAnn = TRUE;
2652 /* Initialize lambda variables */
2653 initialize_lambdas(fplog, ir, fep_state, lambda, lam0);
2655 // TODO upd is never NULL in practice, but the analysers don't know that
2656 if (upd)
2658 *upd = init_update(ir);
2660 if (*bSimAnn)
2662 update_annealing_target_temp(ir, ir->init_t, upd ? *upd : nullptr);
2665 if (vcm != nullptr)
2667 *vcm = init_vcm(fplog, &mtop->groups, ir);
2670 if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
2672 if (ir->etc == etcBERENDSEN)
2674 please_cite(fplog, "Berendsen84a");
2676 if (ir->etc == etcVRESCALE)
2678 please_cite(fplog, "Bussi2007a");
2680 if (ir->eI == eiSD1)
2682 please_cite(fplog, "Goga2012");
2685 init_nrnb(nrnb);
2687 if (nfile != -1)
2689 *outf = init_mdoutf(fplog, nfile, fnm, Flags, cr, outputProvider, ir, mtop, oenv, wcycle);
2691 *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? nullptr : mdoutf_get_fp_ene(*outf),
2692 mtop, ir, mdoutf_get_fp_dhdl(*outf));
2695 /* Initiate variables */
2696 clear_mat(force_vir);
2697 clear_mat(shake_vir);
2698 clear_rvec(mu_tot);