Continue removing -nb gpu_cpu
[gromacs.git] / src / gromacs / mdlib / sim_util.cpp
blob815b1d964d81dcfb0b8ccae37ee46ac068f5cb05
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "sim_util.h"
41 #include "config.h"
43 #include <assert.h>
44 #include <math.h>
45 #include <stdio.h>
46 #include <string.h>
48 #include <cstdint>
50 #include <array>
52 #include "gromacs/domdec/dlbtiming.h"
53 #include "gromacs/domdec/domdec.h"
54 #include "gromacs/domdec/domdec_struct.h"
55 #include "gromacs/essentialdynamics/edsam.h"
56 #include "gromacs/ewald/pme.h"
57 #include "gromacs/gmxlib/chargegroup.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/gmxlib/nrnb.h"
60 #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
61 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
62 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
63 #include "gromacs/imd/imd.h"
64 #include "gromacs/listed-forces/bonded.h"
65 #include "gromacs/listed-forces/disre.h"
66 #include "gromacs/listed-forces/orires.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/units.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/math/vecdump.h"
71 #include "gromacs/mdlib/calcmu.h"
72 #include "gromacs/mdlib/constr.h"
73 #include "gromacs/mdlib/force.h"
74 #include "gromacs/mdlib/forcerec.h"
75 #include "gromacs/mdlib/genborn.h"
76 #include "gromacs/mdlib/gmx_omp_nthreads.h"
77 #include "gromacs/mdlib/mdrun.h"
78 #include "gromacs/mdlib/nb_verlet.h"
79 #include "gromacs/mdlib/nbnxn_atomdata.h"
80 #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
81 #include "gromacs/mdlib/nbnxn_grid.h"
82 #include "gromacs/mdlib/nbnxn_search.h"
83 #include "gromacs/mdlib/qmmm.h"
84 #include "gromacs/mdlib/update.h"
85 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
86 #include "gromacs/mdtypes/commrec.h"
87 #include "gromacs/mdtypes/iforceprovider.h"
88 #include "gromacs/mdtypes/inputrec.h"
89 #include "gromacs/mdtypes/md_enums.h"
90 #include "gromacs/mdtypes/state.h"
91 #include "gromacs/pbcutil/ishift.h"
92 #include "gromacs/pbcutil/mshift.h"
93 #include "gromacs/pbcutil/pbc.h"
94 #include "gromacs/pulling/pull.h"
95 #include "gromacs/pulling/pull_rotation.h"
96 #include "gromacs/timing/cyclecounter.h"
97 #include "gromacs/timing/gpu_timing.h"
98 #include "gromacs/timing/wallcycle.h"
99 #include "gromacs/timing/wallcyclereporting.h"
100 #include "gromacs/timing/walltime_accounting.h"
101 #include "gromacs/utility/arrayref.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/cstringutil.h"
104 #include "gromacs/utility/exceptions.h"
105 #include "gromacs/utility/fatalerror.h"
106 #include "gromacs/utility/gmxassert.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/logger.h"
109 #include "gromacs/utility/pleasecite.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/sysinfo.h"
113 #include "nbnxn_gpu.h"
114 #include "nbnxn_kernels/nbnxn_kernel_cpu.h"
115 #include "nbnxn_kernels/nbnxn_kernel_prune.h"
117 void print_time(FILE *out,
118 gmx_walltime_accounting_t walltime_accounting,
119 gmx_int64_t step,
120 t_inputrec *ir,
121 t_commrec gmx_unused *cr)
123 time_t finish;
124 char timebuf[STRLEN];
125 double dt, elapsed_seconds, time_per_step;
126 char buf[48];
128 #if !GMX_THREAD_MPI
129 if (!PAR(cr))
130 #endif
132 fprintf(out, "\r");
134 fprintf(out, "step %s", gmx_step_str(step, buf));
135 fflush(out);
137 if ((step >= ir->nstlist))
139 double seconds_since_epoch = gmx_gettime();
140 elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
141 time_per_step = elapsed_seconds/(step - ir->init_step + 1);
142 dt = (ir->nsteps + ir->init_step - step) * time_per_step;
144 if (ir->nsteps >= 0)
146 if (dt >= 300)
148 finish = (time_t) (seconds_since_epoch + dt);
149 gmx_ctime_r(&finish, timebuf, STRLEN);
150 sprintf(buf, "%s", timebuf);
151 buf[strlen(buf)-1] = '\0';
152 fprintf(out, ", will finish %s", buf);
154 else
156 fprintf(out, ", remaining wall clock time: %5d s ", (int)dt);
159 else
161 fprintf(out, " performance: %.1f ns/day ",
162 ir->delta_t/1000*24*60*60/time_per_step);
165 #if !GMX_THREAD_MPI
166 if (PAR(cr))
168 fprintf(out, "\n");
170 #endif
172 fflush(out);
175 void print_date_and_time(FILE *fplog, int nodeid, const char *title,
176 double the_time)
178 char time_string[STRLEN];
180 if (!fplog)
182 return;
186 int i;
187 char timebuf[STRLEN];
188 time_t temp_time = (time_t) the_time;
190 gmx_ctime_r(&temp_time, timebuf, STRLEN);
191 for (i = 0; timebuf[i] >= ' '; i++)
193 time_string[i] = timebuf[i];
195 time_string[i] = '\0';
198 fprintf(fplog, "%s on rank %d %s\n", title, nodeid, time_string);
201 void print_start(FILE *fplog, t_commrec *cr,
202 gmx_walltime_accounting_t walltime_accounting,
203 const char *name)
205 char buf[STRLEN];
207 sprintf(buf, "Started %s", name);
208 print_date_and_time(fplog, cr->nodeid, buf,
209 walltime_accounting_get_start_time_stamp(walltime_accounting));
212 static void sum_forces(rvec f[], const PaddedRVecVector *forceToAdd)
214 /* TODO: remove this - 1 when padding is properly implemented */
215 int end = forceToAdd->size() - 1;
216 const rvec *fAdd = as_rvec_array(forceToAdd->data());
218 // cppcheck-suppress unreadVariable
219 int gmx_unused nt = gmx_omp_nthreads_get(emntDefault);
220 #pragma omp parallel for num_threads(nt) schedule(static)
221 for (int i = 0; i < end; i++)
223 rvec_inc(f[i], fAdd[i]);
227 static void calc_virial(int start, int homenr, rvec x[], rvec f[],
228 tensor vir_part, t_graph *graph, matrix box,
229 t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
231 int i;
233 /* The short-range virial from surrounding boxes */
234 calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
235 inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
237 /* Calculate partial virial, for local atoms only, based on short range.
238 * Total virial is computed in global_stat, called from do_md
240 f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
241 inc_nrnb(nrnb, eNR_VIRIAL, homenr);
243 /* Add position restraint contribution */
244 for (i = 0; i < DIM; i++)
246 vir_part[i][i] += fr->vir_diag_posres[i];
249 /* Add wall contribution */
250 for (i = 0; i < DIM; i++)
252 vir_part[i][ZZ] += fr->vir_wall_z[i];
255 if (debug)
257 pr_rvecs(debug, 0, "vir_part", vir_part, DIM);
261 static void pull_potential_wrapper(t_commrec *cr,
262 t_inputrec *ir,
263 matrix box, rvec x[],
264 rvec f[],
265 tensor vir_force,
266 t_mdatoms *mdatoms,
267 gmx_enerdata_t *enerd,
268 real *lambda,
269 double t,
270 gmx_wallcycle_t wcycle)
272 t_pbc pbc;
273 real dvdl;
275 /* Calculate the center of mass forces, this requires communication,
276 * which is why pull_potential is called close to other communication.
277 * The virial contribution is calculated directly,
278 * which is why we call pull_potential after calc_virial.
280 wallcycle_start(wcycle, ewcPULLPOT);
281 set_pbc(&pbc, ir->ePBC, box);
282 dvdl = 0;
283 enerd->term[F_COM_PULL] +=
284 pull_potential(ir->pull_work, mdatoms, &pbc,
285 cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
286 enerd->dvdl_lin[efptRESTRAINT] += dvdl;
287 wallcycle_stop(wcycle, ewcPULLPOT);
290 static void pme_receive_force_ener(t_commrec *cr,
291 gmx_wallcycle_t wcycle,
292 gmx_enerdata_t *enerd,
293 t_forcerec *fr)
295 real e_q, e_lj, dvdl_q, dvdl_lj;
296 float cycles_ppdpme, cycles_seppme;
298 cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
299 dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
301 /* In case of node-splitting, the PP nodes receive the long-range
302 * forces, virial and energy from the PME nodes here.
304 wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
305 dvdl_q = 0;
306 dvdl_lj = 0;
307 gmx_pme_receive_f(cr, as_rvec_array(fr->f_novirsum->data()), fr->vir_el_recip, &e_q,
308 fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
309 &cycles_seppme);
310 enerd->term[F_COUL_RECIP] += e_q;
311 enerd->term[F_LJ_RECIP] += e_lj;
312 enerd->dvdl_lin[efptCOUL] += dvdl_q;
313 enerd->dvdl_lin[efptVDW] += dvdl_lj;
315 if (wcycle)
317 dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
319 wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
322 static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr,
323 gmx_int64_t step, real forceTolerance,
324 const rvec *x, const rvec *f)
326 real force2Tolerance = gmx::square(forceTolerance);
327 std::uintmax_t numNonFinite = 0;
328 for (int i = 0; i < md->homenr; i++)
330 real force2 = norm2(f[i]);
331 bool nonFinite = !std::isfinite(force2);
332 if (force2 >= force2Tolerance || nonFinite)
334 fprintf(fp, "step %" GMX_PRId64 " atom %6d x %8.3f %8.3f %8.3f force %12.5e\n",
335 step,
336 ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], std::sqrt(force2));
338 if (nonFinite)
340 numNonFinite++;
343 if (numNonFinite > 0)
345 /* Note that with MPI this fatal call on one rank might interrupt
346 * the printing on other ranks. But we can only avoid that with
347 * an expensive MPI barrier that we would need at each step.
349 gmx_fatal(FARGS, "At step %" GMX_PRId64 " detected non-finite forces on %ju atoms", step, numNonFinite);
353 static void post_process_forces(t_commrec *cr,
354 gmx_int64_t step,
355 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
356 gmx_localtop_t *top,
357 matrix box, rvec x[],
358 rvec f[],
359 tensor vir_force,
360 t_mdatoms *mdatoms,
361 t_graph *graph,
362 t_forcerec *fr, gmx_vsite_t *vsite,
363 int flags)
365 if (fr->bF_NoVirSum)
367 if (vsite)
369 /* Spread the mesh force on virtual sites to the other particles...
370 * This is parallellized. MPI communication is performed
371 * if the constructing atoms aren't local.
373 wallcycle_start(wcycle, ewcVSITESPREAD);
374 spread_vsite_f(vsite, x, as_rvec_array(fr->f_novirsum->data()), nullptr,
375 (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
376 nrnb,
377 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
378 wallcycle_stop(wcycle, ewcVSITESPREAD);
380 if (flags & GMX_FORCE_VIRIAL)
382 /* Now add the forces, this is local */
383 sum_forces(f, fr->f_novirsum);
385 if (EEL_FULL(fr->ic->eeltype))
387 /* Add the mesh contribution to the virial */
388 m_add(vir_force, fr->vir_el_recip, vir_force);
390 if (EVDW_PME(fr->ic->vdwtype))
392 /* Add the mesh contribution to the virial */
393 m_add(vir_force, fr->vir_lj_recip, vir_force);
395 if (debug)
397 pr_rvecs(debug, 0, "vir_force", vir_force, DIM);
402 if (fr->print_force >= 0)
404 print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f);
408 static void do_nb_verlet(t_forcerec *fr,
409 interaction_const_t *ic,
410 gmx_enerdata_t *enerd,
411 int flags, int ilocality,
412 int clearF,
413 gmx_int64_t step,
414 t_nrnb *nrnb,
415 gmx_wallcycle_t wcycle)
417 if (!(flags & GMX_FORCE_NONBONDED))
419 /* skip non-bonded calculation */
420 return;
423 nonbonded_verlet_t *nbv = fr->nbv;
424 nonbonded_verlet_group_t *nbvg = &nbv->grp[ilocality];
426 /* GPU kernel launch overhead is already timed separately */
427 if (fr->cutoff_scheme != ecutsVERLET)
429 gmx_incons("Invalid cut-off scheme passed!");
432 bool bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
434 if (!bUsingGpuKernels)
436 /* When dynamic pair-list pruning is requested, we need to prune
437 * at nstlistPrune steps.
439 if (nbv->listParams->useDynamicPruning &&
440 (step - nbvg->nbl_lists.outerListCreationStep) % nbv->listParams->nstlistPrune == 0)
442 /* Prune the pair-list beyond fr->ic->rlistPrune using
443 * the current coordinates of the atoms.
445 wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
446 nbnxn_kernel_cpu_prune(nbvg, nbv->nbat, fr->shift_vec, nbv->listParams->rlistInner);
447 wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
450 wallcycle_sub_start(wcycle, ewcsNONBONDED);
453 switch (nbvg->kernel_type)
455 case nbnxnk4x4_PlainC:
456 case nbnxnk4xN_SIMD_4xN:
457 case nbnxnk4xN_SIMD_2xNN:
458 nbnxn_kernel_cpu(nbvg,
459 nbv->nbat,
461 fr->shift_vec,
462 flags,
463 clearF,
464 fr->fshift[0],
465 enerd->grpp.ener[egCOULSR],
466 fr->bBHAM ?
467 enerd->grpp.ener[egBHAMSR] :
468 enerd->grpp.ener[egLJSR]);
469 break;
471 case nbnxnk8x8x8_GPU:
472 nbnxn_gpu_launch_kernel(nbv->gpu_nbv, nbv->nbat, flags, ilocality);
473 break;
475 case nbnxnk8x8x8_PlainC:
476 nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
477 nbv->nbat, ic,
478 fr->shift_vec,
479 flags,
480 clearF,
481 nbv->nbat->out[0].f,
482 fr->fshift[0],
483 enerd->grpp.ener[egCOULSR],
484 fr->bBHAM ?
485 enerd->grpp.ener[egBHAMSR] :
486 enerd->grpp.ener[egLJSR]);
487 break;
489 default:
490 GMX_RELEASE_ASSERT(false, "Invalid nonbonded kernel type passed!");
493 if (!bUsingGpuKernels)
495 wallcycle_sub_stop(wcycle, ewcsNONBONDED);
498 int enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
499 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
501 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
503 else if ((!bUsingGpuKernels && nbvg->ewald_excl == ewaldexclAnalytical) ||
504 (bUsingGpuKernels && nbnxn_gpu_is_kernel_ewald_analytical(nbv->gpu_nbv)))
506 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
508 else
510 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
512 enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
513 if (flags & GMX_FORCE_ENERGY)
515 /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
516 enr_nbnxn_kernel_ljc += 1;
517 enr_nbnxn_kernel_lj += 1;
520 inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
521 nbvg->nbl_lists.natpair_ljq);
522 inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
523 nbvg->nbl_lists.natpair_lj);
524 /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
525 inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
526 nbvg->nbl_lists.natpair_q);
528 if (ic->vdw_modifier == eintmodFORCESWITCH)
530 /* We add up the switch cost separately */
531 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
532 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
534 if (ic->vdw_modifier == eintmodPOTSWITCH)
536 /* We add up the switch cost separately */
537 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
538 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
540 if (ic->vdwtype == evdwPME)
542 /* We add up the LJ Ewald cost separately */
543 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
544 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
548 static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
549 t_forcerec *fr,
550 rvec x[],
551 rvec f[],
552 t_mdatoms *mdatoms,
553 t_lambda *fepvals,
554 real *lambda,
555 gmx_enerdata_t *enerd,
556 int flags,
557 t_nrnb *nrnb,
558 gmx_wallcycle_t wcycle)
560 int donb_flags;
561 nb_kernel_data_t kernel_data;
562 real lam_i[efptNR];
563 real dvdl_nb[efptNR];
564 int th;
565 int i, j;
567 donb_flags = 0;
568 /* Add short-range interactions */
569 donb_flags |= GMX_NONBONDED_DO_SR;
571 /* Currently all group scheme kernels always calculate (shift-)forces */
572 if (flags & GMX_FORCE_FORCES)
574 donb_flags |= GMX_NONBONDED_DO_FORCE;
576 if (flags & GMX_FORCE_VIRIAL)
578 donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
580 if (flags & GMX_FORCE_ENERGY)
582 donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
585 kernel_data.flags = donb_flags;
586 kernel_data.lambda = lambda;
587 kernel_data.dvdl = dvdl_nb;
589 kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
590 kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR];
592 /* reset free energy components */
593 for (i = 0; i < efptNR; i++)
595 dvdl_nb[i] = 0;
598 assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl);
600 wallcycle_sub_start(wcycle, ewcsNONBONDED);
601 #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
602 for (th = 0; th < nbl_lists->nnbl; th++)
606 gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
607 x, f, fr, mdatoms, &kernel_data, nrnb);
609 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
612 if (fepvals->sc_alpha != 0)
614 enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
615 enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
617 else
619 enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
620 enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
623 /* If we do foreign lambda and we have soft-core interactions
624 * we have to recalculate the (non-linear) energies contributions.
626 if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
628 kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
629 kernel_data.lambda = lam_i;
630 kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
631 kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR];
632 /* Note that we add to kernel_data.dvdl, but ignore the result */
634 for (i = 0; i < enerd->n_lambda; i++)
636 for (j = 0; j < efptNR; j++)
638 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
640 reset_foreign_enerdata(enerd);
641 #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
642 for (th = 0; th < nbl_lists->nnbl; th++)
646 gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
647 x, f, fr, mdatoms, &kernel_data, nrnb);
649 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
652 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
653 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
657 wallcycle_sub_stop(wcycle, ewcsNONBONDED);
660 gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
662 return nbv != nullptr && nbv->bUseGPU;
665 static gmx_inline void clear_rvecs_omp(int n, rvec v[])
667 int nth = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, n);
669 /* Note that we would like to avoid this conditional by putting it
670 * into the omp pragma instead, but then we still take the full
671 * omp parallel for overhead (at least with gcc5).
673 if (nth == 1)
675 for (int i = 0; i < n; i++)
677 clear_rvec(v[i]);
680 else
682 #pragma omp parallel for num_threads(nth) schedule(static)
683 for (int i = 0; i < n; i++)
685 clear_rvec(v[i]);
690 /*! \brief This routine checks if the potential energy is finite.
692 * Note that passing this check does not guarantee finite forces,
693 * since those use slightly different arithmetics. But in most cases
694 * there is just a narrow coordinate range where forces are not finite
695 * and energies are finite.
697 * \param[in] enerd The energy data; the non-bonded group energies need to be added in here before calling this routine
699 static void checkPotentialEnergyValidity(const gmx_enerdata_t *enerd)
701 if (!std::isfinite(enerd->term[F_EPOT]))
703 gmx_fatal(FARGS, "The total potential energy is %g, which is not finite. The LJ and electrostatic contributions to the energy are %g and %g, respectively. A non-finite potential energy can be caused by overlapping interactions in bonded interactions or very large or NaN coordinate values. Usually this is caused by a badly or non-equilibrated initial configuration or incorrect interactions or parameters in the topology.",
704 enerd->term[F_EPOT],
705 enerd->term[F_LJ],
706 enerd->term[F_COUL_SR]);
710 /*! \brief Compute forces and/or energies for special algorithms
712 * The intention is to collect all calls to algorithms that compute
713 * forces on local atoms only and that do not contribute to the local
714 * virial sum (but add their virial contribution separately).
715 * Eventually these should likely all become ForceProviders.
716 * Within this function the intention is to have algorithms that do
717 * global communication at the end, so global barriers within the MD loop
718 * are as close together as possible.
720 * \param[in] cr The communication record
721 * \param[in] inputrec The input record
722 * \param[in] step The current MD step
723 * \param[in] t The current time
724 * \param[in,out] wcycle Wallcycle accounting struct
725 * \param[in,out] forceProviders Pointer to a list of force providers
726 * \param[in] box The unit cell
727 * \param[in] x The coordinates
728 * \param[in] mdatoms Per atom properties
729 * \param[in] lambda Array of free-energy lambda values
730 * \param[in] forceFlags Flags that tell whether we should compute forces/energies/virial
731 * \param[in,out] force Force buffer; does not contribute to the virial
732 * \param[in,out] virial Virial buffer
733 * \param[in,out] enerd Energy buffer
734 * \param[in,out] ed Essential dynamics pointer
735 * \param[in] bNS Tells if we did neighbor searching this step, used for ED sampling
737 * \todo Remove bNS, which is used incorrectly.
738 * \todo Convert all other algorithms called here to ForceProviders.
740 static void
741 computeSpecialForces(t_commrec *cr,
742 t_inputrec *inputrec,
743 gmx_int64_t step,
744 double t,
745 gmx_wallcycle_t wcycle,
746 ForceProviders *forceProviders,
747 matrix box,
748 rvec *x,
749 t_mdatoms *mdatoms,
750 real *lambda,
751 int forceFlags,
752 PaddedRVecVector *force,
753 tensor virial,
754 gmx_enerdata_t *enerd,
755 gmx_edsam_t ed,
756 gmx_bool bNS)
758 const bool computeForces = (forceFlags & GMX_FORCE_FORCES);
760 /* NOTE: Currently all ForceProviders only provide forces.
761 * When they also provide energies, remove this conditional.
763 if (computeForces)
765 /* Collect forces from modules */
766 gmx::ArrayRef<gmx::RVec> f = *force;
768 forceProviders->calculateForces(cr, mdatoms, box, t, x, f);
771 rvec *f = as_rvec_array(force->data());
773 if (inputrec->bPull && pull_have_potential(inputrec->pull_work))
775 pull_potential_wrapper(cr, inputrec, box, x,
776 f, virial, mdatoms, enerd, lambda, t,
777 wcycle);
780 /* Add the forces from enforced rotation potentials (if any) */
781 if (inputrec->bRot)
783 wallcycle_start(wcycle, ewcROTadd);
784 enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
785 wallcycle_stop(wcycle, ewcROTadd);
788 if (ed)
790 /* Note that since init_edsam() is called after the initialization
791 * of forcerec, edsam doesn't request the noVirSum force buffer.
792 * Thus if no other algorithm (e.g. PME) requires it, the forces
793 * here will contribute to the virial.
795 do_flood(cr, inputrec, x, f, ed, box, step, bNS);
798 /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
799 if (inputrec->bIMD && computeForces)
801 IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
805 static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
806 t_inputrec *inputrec,
807 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
808 gmx_localtop_t *top,
809 gmx_groups_t gmx_unused *groups,
810 matrix box, rvec x[], history_t *hist,
811 PaddedRVecVector *force,
812 tensor vir_force,
813 t_mdatoms *mdatoms,
814 gmx_enerdata_t *enerd, t_fcdata *fcd,
815 real *lambda, t_graph *graph,
816 t_forcerec *fr, interaction_const_t *ic,
817 gmx_vsite_t *vsite, rvec mu_tot,
818 double t, gmx_edsam_t ed,
819 gmx_bool bBornRadii,
820 int flags,
821 DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
822 DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion)
824 int cg1, i, j;
825 double mu[2*DIM];
826 gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
827 gmx_bool bDoForces, bUseGPU, bUseOrEmulGPU;
828 rvec vzero, box_diag;
829 float cycles_pme, cycles_wait_gpu;
830 nonbonded_verlet_t *nbv = fr->nbv;
832 bStateChanged = (flags & GMX_FORCE_STATECHANGED);
833 bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
834 bFillGrid = (bNS && bStateChanged);
835 bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
836 bDoForces = (flags & GMX_FORCE_FORCES);
837 bUseGPU = fr->nbv->bUseGPU;
838 bUseOrEmulGPU = bUseGPU || (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes);
840 /* At a search step we need to start the first balancing region
841 * somewhere early inside the step after communication during domain
842 * decomposition (and not during the previous step as usual).
844 if (bNS &&
845 ddOpenBalanceRegion == DdOpenBalanceRegionBeforeForceComputation::yes)
847 ddOpenBalanceRegionCpu(cr->dd, DdAllowBalanceRegionReopen::yes);
850 cycles_wait_gpu = 0;
852 const int start = 0;
853 const int homenr = mdatoms->homenr;
855 clear_mat(vir_force);
857 if (DOMAINDECOMP(cr))
859 cg1 = cr->dd->ncg_tot;
861 else
863 cg1 = top->cgs.nr;
865 if (fr->n_tpi > 0)
867 cg1--;
870 if (bStateChanged)
872 update_forcerec(fr, box);
874 if (inputrecNeedMutot(inputrec))
876 /* Calculate total (local) dipole moment in a temporary common array.
877 * This makes it possible to sum them over nodes faster.
879 calc_mu(start, homenr,
880 x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
881 mu, mu+DIM);
885 if (fr->ePBC != epbcNONE)
887 /* Compute shift vectors every step,
888 * because of pressure coupling or box deformation!
890 if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
892 calc_shifts(box, fr->shift_vec);
895 if (bCalcCGCM)
897 put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
898 inc_nrnb(nrnb, eNR_SHIFTX, homenr);
900 else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
902 unshift_self(graph, box, x);
906 nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
907 fr->shift_vec, nbv->nbat);
909 #if GMX_MPI
910 if (!(cr->duty & DUTY_PME))
912 gmx_bool bBS;
913 matrix boxs;
915 /* Send particle coordinates to the pme nodes.
916 * Since this is only implemented for domain decomposition
917 * and domain decomposition does not use the graph,
918 * we do not need to worry about shifting.
921 wallcycle_start(wcycle, ewcPP_PMESENDX);
923 bBS = (inputrec->nwall == 2);
924 if (bBS)
926 copy_mat(box, boxs);
927 svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
930 gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
931 lambda[efptCOUL], lambda[efptVDW],
932 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
933 step);
935 wallcycle_stop(wcycle, ewcPP_PMESENDX);
937 #endif /* GMX_MPI */
939 /* do gridding for pair search */
940 if (bNS)
942 if (graph && bStateChanged)
944 /* Calculate intramolecular shift vectors to make molecules whole */
945 mk_mshift(fplog, graph, fr->ePBC, box, x);
948 clear_rvec(vzero);
949 box_diag[XX] = box[XX][XX];
950 box_diag[YY] = box[YY][YY];
951 box_diag[ZZ] = box[ZZ][ZZ];
953 wallcycle_start(wcycle, ewcNS);
954 if (!fr->bDomDec)
956 wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
957 nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
958 0, vzero, box_diag,
959 0, mdatoms->homenr, -1, fr->cginfo, x,
960 0, nullptr,
961 nbv->grp[eintLocal].kernel_type,
962 nbv->nbat);
963 wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
965 else
967 wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
968 nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
969 fr->cginfo, x,
970 nbv->grp[eintNonlocal].kernel_type,
971 nbv->nbat);
972 wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
975 nbnxn_atomdata_set(nbv->nbat, nbv->nbs, mdatoms, fr->cginfo);
976 wallcycle_stop(wcycle, ewcNS);
979 /* initialize the GPU atom data and copy shift vector */
980 if (bUseGPU)
982 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
983 wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
985 if (bNS)
987 nbnxn_gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat);
990 nbnxn_gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat);
992 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
993 wallcycle_stop(wcycle, ewcLAUNCH_GPU);
996 /* do local pair search */
997 if (bNS)
999 wallcycle_start_nocount(wcycle, ewcNS);
1000 wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
1001 nbnxn_make_pairlist(nbv->nbs, nbv->nbat,
1002 &top->excls,
1003 nbv->listParams->rlistOuter,
1004 nbv->min_ci_balanced,
1005 &nbv->grp[eintLocal].nbl_lists,
1006 eintLocal,
1007 nbv->grp[eintLocal].kernel_type,
1008 nrnb);
1009 nbv->grp[eintLocal].nbl_lists.outerListCreationStep = step;
1010 if (nbv->listParams->useDynamicPruning && !bUseGPU)
1012 nbnxnPrepareListForDynamicPruning(&nbv->grp[eintLocal].nbl_lists);
1014 wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
1016 if (bUseGPU)
1018 /* initialize local pair-list on the GPU */
1019 nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
1020 nbv->grp[eintLocal].nbl_lists.nbl[0],
1021 eintLocal);
1023 wallcycle_stop(wcycle, ewcNS);
1025 else
1027 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1028 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1029 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x,
1030 nbv->nbat);
1031 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1032 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1035 if (bUseGPU)
1037 if (DOMAINDECOMP(cr))
1039 ddOpenBalanceRegionGpu(cr->dd);
1042 wallcycle_start(wcycle, ewcLAUNCH_GPU);
1043 wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1044 /* launch local nonbonded work on GPU */
1045 do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
1046 step, nrnb, wcycle);
1047 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1048 wallcycle_stop(wcycle, ewcLAUNCH_GPU);
1051 /* Communicate coordinates and sum dipole if necessary +
1052 do non-local pair search */
1053 if (DOMAINDECOMP(cr))
1055 if (bNS)
1057 wallcycle_start_nocount(wcycle, ewcNS);
1058 wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1060 nbnxn_make_pairlist(nbv->nbs, nbv->nbat,
1061 &top->excls,
1062 nbv->listParams->rlistOuter,
1063 nbv->min_ci_balanced,
1064 &nbv->grp[eintNonlocal].nbl_lists,
1065 eintNonlocal,
1066 nbv->grp[eintNonlocal].kernel_type,
1067 nrnb);
1068 nbv->grp[eintNonlocal].nbl_lists.outerListCreationStep = step;
1069 if (nbv->listParams->useDynamicPruning && !bUseGPU)
1071 nbnxnPrepareListForDynamicPruning(&nbv->grp[eintNonlocal].nbl_lists);
1073 wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1075 if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_GPU)
1077 /* initialize non-local pair-list on the GPU */
1078 nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
1079 nbv->grp[eintNonlocal].nbl_lists.nbl[0],
1080 eintNonlocal);
1082 wallcycle_stop(wcycle, ewcNS);
1084 else
1086 wallcycle_start(wcycle, ewcMOVEX);
1087 dd_move_x(cr->dd, box, x);
1088 wallcycle_stop(wcycle, ewcMOVEX);
1090 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1091 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1092 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x,
1093 nbv->nbat);
1094 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1095 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1098 if (bUseGPU)
1100 wallcycle_start(wcycle, ewcLAUNCH_GPU);
1101 wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1102 /* launch non-local nonbonded tasks on GPU */
1103 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
1104 step, nrnb, wcycle);
1105 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1106 wallcycle_stop(wcycle, ewcLAUNCH_GPU);
1110 if (bUseGPU)
1112 /* launch D2H copy-back F */
1113 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
1114 wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1115 if (DOMAINDECOMP(cr))
1117 nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
1118 flags, eatNonlocal);
1120 nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
1121 flags, eatLocal);
1122 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1123 wallcycle_stop(wcycle, ewcLAUNCH_GPU);
1126 if (bStateChanged && inputrecNeedMutot(inputrec))
1128 if (PAR(cr))
1130 gmx_sumd(2*DIM, mu, cr);
1131 ddReopenBalanceRegionCpu(cr->dd);
1134 for (i = 0; i < 2; i++)
1136 for (j = 0; j < DIM; j++)
1138 fr->mu_tot[i][j] = mu[i*DIM + j];
1142 if (fr->efep == efepNO)
1144 copy_rvec(fr->mu_tot[0], mu_tot);
1146 else
1148 for (j = 0; j < DIM; j++)
1150 mu_tot[j] =
1151 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
1152 lambda[efptCOUL]*fr->mu_tot[1][j];
1156 /* Reset energies */
1157 reset_enerdata(enerd);
1158 clear_rvecs(SHIFTS, fr->fshift);
1160 if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
1162 wallcycle_start(wcycle, ewcPPDURINGPME);
1163 dd_force_flop_start(cr->dd, nrnb);
1166 if (inputrec->bRot)
1168 wallcycle_start(wcycle, ewcROT);
1169 do_rotation(cr, inputrec, box, x, t, step, bNS);
1170 wallcycle_stop(wcycle, ewcROT);
1173 /* Temporary solution until all routines take PaddedRVecVector */
1174 rvec *f = as_rvec_array(force->data());
1176 /* Start the force cycle counter.
1177 * Note that a different counter is used for dynamic load balancing.
1179 wallcycle_start(wcycle, ewcFORCE);
1180 fr->f_novirsum = force;
1181 if (bDoForces)
1183 /* If we need to compute the virial, we might need a separate
1184 * force buffer for algorithms for which the virial is calculated
1185 * separately, such as PME.
1187 if (fr->bF_NoVirSum && (flags & GMX_FORCE_VIRIAL))
1189 fr->f_novirsum = fr->forceBufferNoVirialSummation;
1191 /* TODO: remove this - 1 when padding is properly implemented */
1192 clear_rvecs_omp(fr->f_novirsum->size() - 1,
1193 as_rvec_array(fr->f_novirsum->data()));
1195 /* Clear the short- and long-range forces */
1196 clear_rvecs_omp(fr->natoms_force_constr, f);
1198 clear_rvec(fr->vir_diag_posres);
1201 if (inputrec->bPull && pull_have_constraint(inputrec->pull_work))
1203 clear_pull_forces(inputrec->pull_work);
1206 /* We calculate the non-bonded forces, when done on the CPU, here.
1207 * We do this before calling do_force_lowlevel, because in that
1208 * function, the listed forces are calculated before PME, which
1209 * does communication. With this order, non-bonded and listed
1210 * force calculation imbalance can be balanced out by the domain
1211 * decomposition load balancing.
1214 if (!bUseOrEmulGPU)
1216 do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
1217 step, nrnb, wcycle);
1220 if (fr->efep != efepNO)
1222 /* Calculate the local and non-local free energy interactions here.
1223 * Happens here on the CPU both with and without GPU.
1225 if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
1227 do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
1228 fr, x, f, mdatoms,
1229 inputrec->fepvals, lambda,
1230 enerd, flags, nrnb, wcycle);
1233 if (DOMAINDECOMP(cr) &&
1234 fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
1236 do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
1237 fr, x, f, mdatoms,
1238 inputrec->fepvals, lambda,
1239 enerd, flags, nrnb, wcycle);
1243 if (!bUseOrEmulGPU)
1245 int aloc;
1247 if (DOMAINDECOMP(cr))
1249 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
1250 step, nrnb, wcycle);
1253 if (!bUseOrEmulGPU)
1255 aloc = eintLocal;
1257 else
1259 aloc = eintNonlocal;
1262 /* Add all the non-bonded force to the normal force array.
1263 * This can be split into a local and a non-local part when overlapping
1264 * communication with calculation with domain decomposition.
1266 wallcycle_stop(wcycle, ewcFORCE);
1267 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1268 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1269 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->nbat, f);
1270 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1271 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1272 wallcycle_start_nocount(wcycle, ewcFORCE);
1274 /* if there are multiple fshift output buffers reduce them */
1275 if ((flags & GMX_FORCE_VIRIAL) &&
1276 nbv->grp[aloc].nbl_lists.nnbl > 1)
1278 /* This is not in a subcounter because it takes a
1279 negligible and constant-sized amount of time */
1280 nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat,
1281 fr->fshift);
1285 /* update QMMMrec, if necessary */
1286 if (fr->bQMMM)
1288 update_QMMMrec(cr, fr, x, mdatoms, box);
1291 /* Compute the bonded and non-bonded energies and optionally forces */
1292 do_force_lowlevel(fr, inputrec, &(top->idef),
1293 cr, nrnb, wcycle, mdatoms,
1294 x, hist, f, enerd, fcd, top, fr->born,
1295 bBornRadii, box,
1296 inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
1297 flags, &cycles_pme);
1299 wallcycle_stop(wcycle, ewcFORCE);
1301 computeSpecialForces(cr, inputrec, step, t, wcycle,
1302 fr->forceProviders, box, x, mdatoms, lambda,
1303 flags, fr->f_novirsum, vir_force, enerd,
1304 ed, bNS);
1306 if (bUseOrEmulGPU)
1308 /* wait for non-local forces (or calculate in emulation mode) */
1309 if (DOMAINDECOMP(cr))
1311 if (bUseGPU)
1313 wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
1314 nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
1315 flags, eatNonlocal,
1316 enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1317 fr->fshift);
1318 cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
1320 else
1322 wallcycle_start_nocount(wcycle, ewcFORCE);
1323 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
1324 step, nrnb, wcycle);
1325 wallcycle_stop(wcycle, ewcFORCE);
1327 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1328 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1329 /* skip the reduction if there was no non-local work to do */
1330 if (nbv->grp[eintNonlocal].nbl_lists.nbl[0]->nsci > 0)
1332 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
1333 nbv->nbat, f);
1335 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1336 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1340 if (DOMAINDECOMP(cr))
1342 /* We are done with the CPU compute.
1343 * We will now communicate the non-local forces.
1344 * If we use a GPU this will overlap with GPU work, so in that case
1345 * we do not close the DD force balancing region here.
1347 if (ddCloseBalanceRegion == DdCloseBalanceRegionAfterForceComputation::yes)
1349 ddCloseBalanceRegionCpu(cr->dd);
1351 if (bDoForces)
1353 wallcycle_start(wcycle, ewcMOVEF);
1354 dd_move_f(cr->dd, f, fr->fshift);
1355 wallcycle_stop(wcycle, ewcMOVEF);
1359 if (bUseOrEmulGPU)
1361 /* wait for local forces (or calculate in emulation mode) */
1362 if (bUseGPU)
1364 /* Measured overhead on CUDA and OpenCL with(out) GPU sharing
1365 * is between 0.5 and 1.5 Mcycles. So 2 MCycles is an overestimate,
1366 * but even with a step of 0.1 ms the difference is less than 1%
1367 * of the step time.
1369 const float gpuWaitApiOverheadMargin = 2e6f; /* cycles */
1371 wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
1372 nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
1373 flags, eatLocal,
1374 enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1375 fr->fshift);
1376 float cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
1378 if (ddCloseBalanceRegion == DdCloseBalanceRegionAfterForceComputation::yes)
1380 DdBalanceRegionWaitedForGpu waitedForGpu = DdBalanceRegionWaitedForGpu::yes;
1381 if (bDoForces && cycles_tmp <= gpuWaitApiOverheadMargin)
1383 /* We measured few cycles, it could be that the kernel
1384 * and transfer finished earlier and there was no actual
1385 * wait time, only API call overhead.
1386 * Then the actual time could be anywhere between 0 and
1387 * cycles_wait_est. We will use half of cycles_wait_est.
1389 waitedForGpu = DdBalanceRegionWaitedForGpu::no;
1391 ddCloseBalanceRegionGpu(cr->dd, cycles_wait_gpu, waitedForGpu);
1394 /* now clear the GPU outputs while we finish the step on the CPU */
1395 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
1396 wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1397 nbnxn_gpu_clear_outputs(nbv->gpu_nbv, flags);
1399 /* Is dynamic pair-list pruning activated? */
1400 if (nbv->listParams->useDynamicPruning)
1402 /* We should not launch the rolling pruning kernel at a search
1403 * step or just before search steps, since that's useless.
1404 * Without domain decomposition we prune at even steps.
1405 * With domain decomposition we alternate local and non-local
1406 * pruning at even and odd steps.
1408 int numRollingParts = nbv->listParams->numRollingParts;
1409 GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
1410 int stepWithCurrentList = step - nbv->grp[eintLocal].nbl_lists.outerListCreationStep;
1411 bool stepIsEven = ((stepWithCurrentList & 1) == 0);
1412 if (stepWithCurrentList > 0 &&
1413 stepWithCurrentList < inputrec->nstlist - 1 &&
1414 (stepIsEven || DOMAINDECOMP(cr)))
1416 nbnxn_gpu_launch_kernel_pruneonly(fr->nbv->gpu_nbv,
1417 stepIsEven ? eintLocal : eintNonlocal,
1418 numRollingParts);
1421 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
1422 wallcycle_stop(wcycle, ewcLAUNCH_GPU);
1424 else
1426 wallcycle_start_nocount(wcycle, ewcFORCE);
1427 do_nb_verlet(fr, ic, enerd, flags, eintLocal,
1428 DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
1429 step, nrnb, wcycle);
1430 wallcycle_stop(wcycle, ewcFORCE);
1432 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1433 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1434 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
1435 nbv->nbat, f);
1436 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1437 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1440 if (DOMAINDECOMP(cr))
1442 dd_force_flop_stop(cr->dd, nrnb);
1445 if (bDoForces)
1447 /* If we have NoVirSum forces, but we do not calculate the virial,
1448 * we sum fr->f_novirsum=f later.
1450 if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
1452 wallcycle_start(wcycle, ewcVSITESPREAD);
1453 spread_vsite_f(vsite, x, f, fr->fshift, FALSE, nullptr, nrnb,
1454 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1455 wallcycle_stop(wcycle, ewcVSITESPREAD);
1458 if (flags & GMX_FORCE_VIRIAL)
1460 /* Calculation of the virial must be done after vsites! */
1461 calc_virial(0, mdatoms->homenr, x, f,
1462 vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1466 if (PAR(cr) && !(cr->duty & DUTY_PME))
1468 /* In case of node-splitting, the PP nodes receive the long-range
1469 * forces, virial and energy from the PME nodes here.
1471 pme_receive_force_ener(cr, wcycle, enerd, fr);
1474 if (bDoForces)
1476 post_process_forces(cr, step, nrnb, wcycle,
1477 top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1478 flags);
1481 if (flags & GMX_FORCE_ENERGY)
1483 /* Sum the potential energy terms from group contributions */
1484 sum_epot(&(enerd->grpp), enerd->term);
1486 if (!EI_TPI(inputrec->eI))
1488 checkPotentialEnergyValidity(enerd);
1493 static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
1494 t_inputrec *inputrec,
1495 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1496 gmx_localtop_t *top,
1497 gmx_groups_t *groups,
1498 matrix box, rvec x[], history_t *hist,
1499 PaddedRVecVector *force,
1500 tensor vir_force,
1501 t_mdatoms *mdatoms,
1502 gmx_enerdata_t *enerd, t_fcdata *fcd,
1503 real *lambda, t_graph *graph,
1504 t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
1505 double t, gmx_edsam_t ed,
1506 gmx_bool bBornRadii,
1507 int flags,
1508 DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
1509 DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion)
1511 int cg0, cg1, i, j;
1512 double mu[2*DIM];
1513 gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
1514 gmx_bool bDoForces;
1515 float cycles_pme;
1517 const int start = 0;
1518 const int homenr = mdatoms->homenr;
1520 clear_mat(vir_force);
1522 cg0 = 0;
1523 if (DOMAINDECOMP(cr))
1525 cg1 = cr->dd->ncg_tot;
1527 else
1529 cg1 = top->cgs.nr;
1531 if (fr->n_tpi > 0)
1533 cg1--;
1536 bStateChanged = (flags & GMX_FORCE_STATECHANGED);
1537 bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
1538 /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
1539 bFillGrid = (bNS && bStateChanged);
1540 bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
1541 bDoForces = (flags & GMX_FORCE_FORCES);
1543 if (bStateChanged)
1545 update_forcerec(fr, box);
1547 if (inputrecNeedMutot(inputrec))
1549 /* Calculate total (local) dipole moment in a temporary common array.
1550 * This makes it possible to sum them over nodes faster.
1552 calc_mu(start, homenr,
1553 x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
1554 mu, mu+DIM);
1558 if (fr->ePBC != epbcNONE)
1560 /* Compute shift vectors every step,
1561 * because of pressure coupling or box deformation!
1563 if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
1565 calc_shifts(box, fr->shift_vec);
1568 if (bCalcCGCM)
1570 put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
1571 &(top->cgs), x, fr->cg_cm);
1572 inc_nrnb(nrnb, eNR_CGCM, homenr);
1573 inc_nrnb(nrnb, eNR_RESETX, cg1-cg0);
1575 else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
1577 unshift_self(graph, box, x);
1580 else if (bCalcCGCM)
1582 calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
1583 inc_nrnb(nrnb, eNR_CGCM, homenr);
1586 if (bCalcCGCM && gmx_debug_at)
1588 pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
1591 #if GMX_MPI
1592 if (!(cr->duty & DUTY_PME))
1594 gmx_bool bBS;
1595 matrix boxs;
1597 /* Send particle coordinates to the pme nodes.
1598 * Since this is only implemented for domain decomposition
1599 * and domain decomposition does not use the graph,
1600 * we do not need to worry about shifting.
1603 wallcycle_start(wcycle, ewcPP_PMESENDX);
1605 bBS = (inputrec->nwall == 2);
1606 if (bBS)
1608 copy_mat(box, boxs);
1609 svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
1612 gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
1613 lambda[efptCOUL], lambda[efptVDW],
1614 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
1615 step);
1617 wallcycle_stop(wcycle, ewcPP_PMESENDX);
1619 #endif /* GMX_MPI */
1621 /* Communicate coordinates and sum dipole if necessary */
1622 if (DOMAINDECOMP(cr))
1624 wallcycle_start(wcycle, ewcMOVEX);
1625 dd_move_x(cr->dd, box, x);
1626 wallcycle_stop(wcycle, ewcMOVEX);
1627 /* No GPU support, no move_x overlap, so reopen the balance region here */
1628 if (ddOpenBalanceRegion == DdOpenBalanceRegionBeforeForceComputation::yes)
1630 ddReopenBalanceRegionCpu(cr->dd);
1634 if (inputrecNeedMutot(inputrec))
1636 if (bStateChanged)
1638 if (PAR(cr))
1640 gmx_sumd(2*DIM, mu, cr);
1641 ddReopenBalanceRegionCpu(cr->dd);
1643 for (i = 0; i < 2; i++)
1645 for (j = 0; j < DIM; j++)
1647 fr->mu_tot[i][j] = mu[i*DIM + j];
1651 if (fr->efep == efepNO)
1653 copy_rvec(fr->mu_tot[0], mu_tot);
1655 else
1657 for (j = 0; j < DIM; j++)
1659 mu_tot[j] =
1660 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
1665 /* Reset energies */
1666 reset_enerdata(enerd);
1667 clear_rvecs(SHIFTS, fr->fshift);
1669 if (bNS)
1671 wallcycle_start(wcycle, ewcNS);
1673 if (graph && bStateChanged)
1675 /* Calculate intramolecular shift vectors to make molecules whole */
1676 mk_mshift(fplog, graph, fr->ePBC, box, x);
1679 /* Do the actual neighbour searching */
1680 ns(fplog, fr, box,
1681 groups, top, mdatoms,
1682 cr, nrnb, bFillGrid);
1684 wallcycle_stop(wcycle, ewcNS);
1687 if (inputrec->implicit_solvent && bNS)
1689 make_gb_nblist(cr, inputrec->gb_algorithm,
1690 x, box, fr, &top->idef, graph, fr->born);
1693 if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
1695 wallcycle_start(wcycle, ewcPPDURINGPME);
1696 dd_force_flop_start(cr->dd, nrnb);
1699 if (inputrec->bRot)
1701 wallcycle_start(wcycle, ewcROT);
1702 do_rotation(cr, inputrec, box, x, t, step, bNS);
1703 wallcycle_stop(wcycle, ewcROT);
1706 /* Temporary solution until all routines take PaddedRVecVector */
1707 rvec *f = as_rvec_array(force->data());
1709 /* Start the force cycle counter.
1710 * Note that a different counter is used for dynamic load balancing.
1712 wallcycle_start(wcycle, ewcFORCE);
1714 fr->f_novirsum = force;
1715 if (bDoForces)
1717 /* If we need to compute the virial, we might need a separate
1718 * force buffer for algorithms for which the virial is calculated
1719 * separately, such as PME.
1721 if (fr->bF_NoVirSum && (flags & GMX_FORCE_VIRIAL))
1723 fr->f_novirsum = fr->forceBufferNoVirialSummation;
1725 /* TODO: remove this - 1 when padding is properly implemented */
1726 clear_rvecs(fr->f_novirsum->size() - 1,
1727 as_rvec_array(fr->f_novirsum->data()));
1730 /* Clear the short- and long-range forces */
1731 clear_rvecs(fr->natoms_force_constr, f);
1733 clear_rvec(fr->vir_diag_posres);
1735 if (inputrec->bPull && pull_have_constraint(inputrec->pull_work))
1737 clear_pull_forces(inputrec->pull_work);
1740 /* update QMMMrec, if necessary */
1741 if (fr->bQMMM)
1743 update_QMMMrec(cr, fr, x, mdatoms, box);
1746 /* Compute the bonded and non-bonded energies and optionally forces */
1747 do_force_lowlevel(fr, inputrec, &(top->idef),
1748 cr, nrnb, wcycle, mdatoms,
1749 x, hist, f, enerd, fcd, top, fr->born,
1750 bBornRadii, box,
1751 inputrec->fepvals, lambda,
1752 graph, &(top->excls), fr->mu_tot,
1753 flags,
1754 &cycles_pme);
1756 wallcycle_stop(wcycle, ewcFORCE);
1758 if (DOMAINDECOMP(cr))
1760 dd_force_flop_stop(cr->dd, nrnb);
1762 if (ddCloseBalanceRegion == DdCloseBalanceRegionAfterForceComputation::yes)
1764 ddCloseBalanceRegionCpu(cr->dd);
1768 computeSpecialForces(cr, inputrec, step, t, wcycle,
1769 fr->forceProviders, box, x, mdatoms, lambda,
1770 flags, fr->f_novirsum, vir_force, enerd,
1771 ed, bNS);
1773 if (bDoForces)
1775 /* Communicate the forces */
1776 if (DOMAINDECOMP(cr))
1778 wallcycle_start(wcycle, ewcMOVEF);
1779 dd_move_f(cr->dd, f, fr->fshift);
1780 /* Do we need to communicate the separate force array
1781 * for terms that do not contribute to the single sum virial?
1782 * Position restraints and electric fields do not introduce
1783 * inter-cg forces, only full electrostatics methods do.
1784 * When we do not calculate the virial, fr->f_novirsum = f,
1785 * so we have already communicated these forces.
1787 if (EEL_FULL(fr->ic->eeltype) && cr->dd->n_intercg_excl &&
1788 (flags & GMX_FORCE_VIRIAL))
1790 dd_move_f(cr->dd, as_rvec_array(fr->f_novirsum->data()), nullptr);
1792 wallcycle_stop(wcycle, ewcMOVEF);
1795 /* If we have NoVirSum forces, but we do not calculate the virial,
1796 * we sum fr->f_novirsum=f later.
1798 if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
1800 wallcycle_start(wcycle, ewcVSITESPREAD);
1801 spread_vsite_f(vsite, x, f, fr->fshift, FALSE, nullptr, nrnb,
1802 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1803 wallcycle_stop(wcycle, ewcVSITESPREAD);
1806 if (flags & GMX_FORCE_VIRIAL)
1808 /* Calculation of the virial must be done after vsites! */
1809 calc_virial(0, mdatoms->homenr, x, f,
1810 vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1814 if (PAR(cr) && !(cr->duty & DUTY_PME))
1816 /* In case of node-splitting, the PP nodes receive the long-range
1817 * forces, virial and energy from the PME nodes here.
1819 pme_receive_force_ener(cr, wcycle, enerd, fr);
1822 if (bDoForces)
1824 post_process_forces(cr, step, nrnb, wcycle,
1825 top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1826 flags);
1829 if (flags & GMX_FORCE_ENERGY)
1831 /* Sum the potential energy terms from group contributions */
1832 sum_epot(&(enerd->grpp), enerd->term);
1834 if (!EI_TPI(inputrec->eI))
1836 checkPotentialEnergyValidity(enerd);
1842 void do_force(FILE *fplog, t_commrec *cr,
1843 t_inputrec *inputrec,
1844 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1845 gmx_localtop_t *top,
1846 gmx_groups_t *groups,
1847 matrix box, PaddedRVecVector *coordinates, history_t *hist,
1848 PaddedRVecVector *force,
1849 tensor vir_force,
1850 t_mdatoms *mdatoms,
1851 gmx_enerdata_t *enerd, t_fcdata *fcd,
1852 gmx::ArrayRef<real> lambda, t_graph *graph,
1853 t_forcerec *fr,
1854 gmx_vsite_t *vsite, rvec mu_tot,
1855 double t, gmx_edsam_t ed,
1856 gmx_bool bBornRadii,
1857 int flags,
1858 DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion,
1859 DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion)
1861 /* modify force flag if not doing nonbonded */
1862 if (!fr->bNonbonded)
1864 flags &= ~GMX_FORCE_NONBONDED;
1867 GMX_ASSERT(coordinates->size() >= static_cast<unsigned int>(fr->natoms_force + 1) ||
1868 fr->natoms_force == 0, "We might need 1 element extra for SIMD");
1869 GMX_ASSERT(force->size() >= static_cast<unsigned int>(fr->natoms_force + 1) ||
1870 fr->natoms_force == 0, "We might need 1 element extra for SIMD");
1872 rvec *x = as_rvec_array(coordinates->data());
1874 switch (inputrec->cutoff_scheme)
1876 case ecutsVERLET:
1877 do_force_cutsVERLET(fplog, cr, inputrec,
1878 step, nrnb, wcycle,
1879 top,
1880 groups,
1881 box, x, hist,
1882 force, vir_force,
1883 mdatoms,
1884 enerd, fcd,
1885 lambda.data(), graph,
1886 fr, fr->ic,
1887 vsite, mu_tot,
1888 t, ed,
1889 bBornRadii,
1890 flags,
1891 ddOpenBalanceRegion,
1892 ddCloseBalanceRegion);
1893 break;
1894 case ecutsGROUP:
1895 do_force_cutsGROUP(fplog, cr, inputrec,
1896 step, nrnb, wcycle,
1897 top,
1898 groups,
1899 box, x, hist,
1900 force, vir_force,
1901 mdatoms,
1902 enerd, fcd,
1903 lambda.data(), graph,
1904 fr, vsite, mu_tot,
1905 t, ed,
1906 bBornRadii,
1907 flags,
1908 ddOpenBalanceRegion,
1909 ddCloseBalanceRegion);
1910 break;
1911 default:
1912 gmx_incons("Invalid cut-off scheme passed!");
1915 /* In case we don't have constraints and are using GPUs, the next balancing
1916 * region starts here.
1917 * Some "special" work at the end of do_force_cuts?, such as vsite spread,
1918 * virial calculation and COM pulling, is not thus not included in
1919 * the balance timing, which is ok as most tasks do communication.
1921 if (ddOpenBalanceRegion == DdOpenBalanceRegionBeforeForceComputation::yes)
1923 ddOpenBalanceRegionCpu(cr->dd, DdAllowBalanceRegionReopen::no);
1928 void do_constrain_first(FILE *fplog, gmx_constr_t constr,
1929 t_inputrec *ir, t_mdatoms *md,
1930 t_state *state, t_commrec *cr, t_nrnb *nrnb,
1931 t_forcerec *fr, gmx_localtop_t *top)
1933 int i, m, start, end;
1934 gmx_int64_t step;
1935 real dt = ir->delta_t;
1936 real dvdl_dum;
1937 rvec *savex;
1939 /* We need to allocate one element extra, since we might use
1940 * (unaligned) 4-wide SIMD loads to access rvec entries.
1942 snew(savex, state->natoms + 1);
1944 start = 0;
1945 end = md->homenr;
1947 if (debug)
1949 fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
1950 start, md->homenr, end);
1952 /* Do a first constrain to reset particles... */
1953 step = ir->init_step;
1954 if (fplog)
1956 char buf[STEPSTRSIZE];
1957 fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
1958 gmx_step_str(step, buf));
1960 dvdl_dum = 0;
1962 /* constrain the current position */
1963 constrain(nullptr, TRUE, FALSE, constr, &(top->idef),
1964 ir, cr, step, 0, 1.0, md,
1965 as_rvec_array(state->x.data()), as_rvec_array(state->x.data()), nullptr,
1966 fr->bMolPBC, state->box,
1967 state->lambda[efptBONDED], &dvdl_dum,
1968 nullptr, nullptr, nrnb, econqCoord);
1969 if (EI_VV(ir->eI))
1971 /* constrain the inital velocity, and save it */
1972 /* also may be useful if we need the ekin from the halfstep for velocity verlet */
1973 constrain(nullptr, TRUE, FALSE, constr, &(top->idef),
1974 ir, cr, step, 0, 1.0, md,
1975 as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), as_rvec_array(state->v.data()),
1976 fr->bMolPBC, state->box,
1977 state->lambda[efptBONDED], &dvdl_dum,
1978 nullptr, nullptr, nrnb, econqVeloc);
1980 /* constrain the inital velocities at t-dt/2 */
1981 if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV)
1983 for (i = start; (i < end); i++)
1985 for (m = 0; (m < DIM); m++)
1987 /* Reverse the velocity */
1988 state->v[i][m] = -state->v[i][m];
1989 /* Store the position at t-dt in buf */
1990 savex[i][m] = state->x[i][m] + dt*state->v[i][m];
1993 /* Shake the positions at t=-dt with the positions at t=0
1994 * as reference coordinates.
1996 if (fplog)
1998 char buf[STEPSTRSIZE];
1999 fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
2000 gmx_step_str(step, buf));
2002 dvdl_dum = 0;
2003 constrain(nullptr, TRUE, FALSE, constr, &(top->idef),
2004 ir, cr, step, -1, 1.0, md,
2005 as_rvec_array(state->x.data()), savex, nullptr,
2006 fr->bMolPBC, state->box,
2007 state->lambda[efptBONDED], &dvdl_dum,
2008 as_rvec_array(state->v.data()), nullptr, nrnb, econqCoord);
2010 for (i = start; i < end; i++)
2012 for (m = 0; m < DIM; m++)
2014 /* Re-reverse the velocities */
2015 state->v[i][m] = -state->v[i][m];
2019 sfree(savex);
2023 static void
2024 integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
2025 double *enerout, double *virout)
2027 double enersum, virsum;
2028 double invscale, invscale2, invscale3;
2029 double r, ea, eb, ec, pa, pb, pc, pd;
2030 double y0, f, g, h;
2031 int ri, offset;
2032 double tabfactor;
2034 invscale = 1.0/scale;
2035 invscale2 = invscale*invscale;
2036 invscale3 = invscale*invscale2;
2038 /* Following summation derived from cubic spline definition,
2039 * Numerical Recipies in C, second edition, p. 113-116. Exact for
2040 * the cubic spline. We first calculate the negative of the
2041 * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
2042 * add the more standard, abrupt cutoff correction to that result,
2043 * yielding the long-range correction for a switched function. We
2044 * perform both the pressure and energy loops at the same time for
2045 * simplicity, as the computational cost is low. */
2047 if (offstart == 0)
2049 /* Since the dispersion table has been scaled down a factor
2050 * 6.0 and the repulsion a factor 12.0 to compensate for the
2051 * c6/c12 parameters inside nbfp[] being scaled up (to save
2052 * flops in kernels), we need to correct for this.
2054 tabfactor = 6.0;
2056 else
2058 tabfactor = 12.0;
2061 enersum = 0.0;
2062 virsum = 0.0;
2063 for (ri = rstart; ri < rend; ++ri)
2065 r = ri*invscale;
2066 ea = invscale3;
2067 eb = 2.0*invscale2*r;
2068 ec = invscale*r*r;
2070 pa = invscale3;
2071 pb = 3.0*invscale2*r;
2072 pc = 3.0*invscale*r*r;
2073 pd = r*r*r;
2075 /* this "8" is from the packing in the vdwtab array - perhaps
2076 should be defined? */
2078 offset = 8*ri + offstart;
2079 y0 = vdwtab[offset];
2080 f = vdwtab[offset+1];
2081 g = vdwtab[offset+2];
2082 h = vdwtab[offset+3];
2084 enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
2085 virsum += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
2087 *enerout = 4.0*M_PI*enersum*tabfactor;
2088 *virout = 4.0*M_PI*virsum*tabfactor;
2091 void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr)
2093 double eners[2], virs[2], enersum, virsum;
2094 double r0, rc3, rc9;
2095 int ri0, ri1, i;
2096 real scale, *vdwtab;
2098 fr->enershiftsix = 0;
2099 fr->enershifttwelve = 0;
2100 fr->enerdiffsix = 0;
2101 fr->enerdifftwelve = 0;
2102 fr->virdiffsix = 0;
2103 fr->virdifftwelve = 0;
2105 const interaction_const_t *ic = fr->ic;
2107 if (eDispCorr != edispcNO)
2109 for (i = 0; i < 2; i++)
2111 eners[i] = 0;
2112 virs[i] = 0;
2114 if ((ic->vdw_modifier == eintmodPOTSHIFT) ||
2115 (ic->vdw_modifier == eintmodPOTSWITCH) ||
2116 (ic->vdw_modifier == eintmodFORCESWITCH) ||
2117 (ic->vdwtype == evdwSHIFT) ||
2118 (ic->vdwtype == evdwSWITCH))
2120 if (((ic->vdw_modifier == eintmodPOTSWITCH) ||
2121 (ic->vdw_modifier == eintmodFORCESWITCH) ||
2122 (ic->vdwtype == evdwSWITCH)) && ic->rvdw_switch == 0)
2124 gmx_fatal(FARGS,
2125 "With dispersion correction rvdw-switch can not be zero "
2126 "for vdw-type = %s", evdw_names[ic->vdwtype]);
2129 /* TODO This code depends on the logic in tables.c that
2130 constructs the table layout, which should be made
2131 explicit in future cleanup. */
2132 GMX_ASSERT(fr->dispersionCorrectionTable->interaction == GMX_TABLE_INTERACTION_VDWREP_VDWDISP,
2133 "Dispersion-correction code needs a table with both repulsion and dispersion terms");
2134 scale = fr->dispersionCorrectionTable->scale;
2135 vdwtab = fr->dispersionCorrectionTable->data;
2137 /* Round the cut-offs to exact table values for precision */
2138 ri0 = static_cast<int>(floor(ic->rvdw_switch*scale));
2139 ri1 = static_cast<int>(ceil(ic->rvdw*scale));
2141 /* The code below has some support for handling force-switching, i.e.
2142 * when the force (instead of potential) is switched over a limited
2143 * region. This leads to a constant shift in the potential inside the
2144 * switching region, which we can handle by adding a constant energy
2145 * term in the force-switch case just like when we do potential-shift.
2147 * For now this is not enabled, but to keep the functionality in the
2148 * code we check separately for switch and shift. When we do force-switch
2149 * the shifting point is rvdw_switch, while it is the cutoff when we
2150 * have a classical potential-shift.
2152 * For a pure potential-shift the potential has a constant shift
2153 * all the way out to the cutoff, and that is it. For other forms
2154 * we need to calculate the constant shift up to the point where we
2155 * start modifying the potential.
2157 ri0 = (ic->vdw_modifier == eintmodPOTSHIFT) ? ri1 : ri0;
2159 r0 = ri0/scale;
2160 rc3 = r0*r0*r0;
2161 rc9 = rc3*rc3*rc3;
2163 if ((ic->vdw_modifier == eintmodFORCESWITCH) ||
2164 (ic->vdwtype == evdwSHIFT))
2166 /* Determine the constant energy shift below rvdw_switch.
2167 * Table has a scale factor since we have scaled it down to compensate
2168 * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
2170 fr->enershiftsix = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
2171 fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
2173 else if (ic->vdw_modifier == eintmodPOTSHIFT)
2175 fr->enershiftsix = (real)(-1.0/(rc3*rc3));
2176 fr->enershifttwelve = (real)( 1.0/(rc9*rc3));
2179 /* Add the constant part from 0 to rvdw_switch.
2180 * This integration from 0 to rvdw_switch overcounts the number
2181 * of interactions by 1, as it also counts the self interaction.
2182 * We will correct for this later.
2184 eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
2185 eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
2187 /* Calculate the contribution in the range [r0,r1] where we
2188 * modify the potential. For a pure potential-shift modifier we will
2189 * have ri0==ri1, and there will not be any contribution here.
2191 for (i = 0; i < 2; i++)
2193 enersum = 0;
2194 virsum = 0;
2195 integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
2196 eners[i] -= enersum;
2197 virs[i] -= virsum;
2200 /* Alright: Above we compensated by REMOVING the parts outside r0
2201 * corresponding to the ideal VdW 1/r6 and /r12 potentials.
2203 * Regardless of whether r0 is the point where we start switching,
2204 * or the cutoff where we calculated the constant shift, we include
2205 * all the parts we are missing out to infinity from r0 by
2206 * calculating the analytical dispersion correction.
2208 eners[0] += -4.0*M_PI/(3.0*rc3);
2209 eners[1] += 4.0*M_PI/(9.0*rc9);
2210 virs[0] += 8.0*M_PI/rc3;
2211 virs[1] += -16.0*M_PI/(3.0*rc9);
2213 else if (ic->vdwtype == evdwCUT ||
2214 EVDW_PME(ic->vdwtype) ||
2215 ic->vdwtype == evdwUSER)
2217 if (ic->vdwtype == evdwUSER && fplog)
2219 fprintf(fplog,
2220 "WARNING: using dispersion correction with user tables\n");
2223 /* Note that with LJ-PME, the dispersion correction is multiplied
2224 * by the difference between the actual C6 and the value of C6
2225 * that would produce the combination rule.
2226 * This means the normal energy and virial difference formulas
2227 * can be used here.
2230 rc3 = ic->rvdw*ic->rvdw*ic->rvdw;
2231 rc9 = rc3*rc3*rc3;
2232 /* Contribution beyond the cut-off */
2233 eners[0] += -4.0*M_PI/(3.0*rc3);
2234 eners[1] += 4.0*M_PI/(9.0*rc9);
2235 if (ic->vdw_modifier == eintmodPOTSHIFT)
2237 /* Contribution within the cut-off */
2238 eners[0] += -4.0*M_PI/(3.0*rc3);
2239 eners[1] += 4.0*M_PI/(3.0*rc9);
2241 /* Contribution beyond the cut-off */
2242 virs[0] += 8.0*M_PI/rc3;
2243 virs[1] += -16.0*M_PI/(3.0*rc9);
2245 else
2247 gmx_fatal(FARGS,
2248 "Dispersion correction is not implemented for vdw-type = %s",
2249 evdw_names[ic->vdwtype]);
2252 /* When we deprecate the group kernels the code below can go too */
2253 if (ic->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP)
2255 /* Calculate self-interaction coefficient (assuming that
2256 * the reciprocal-space contribution is constant in the
2257 * region that contributes to the self-interaction).
2259 fr->enershiftsix = gmx::power6(ic->ewaldcoeff_lj) / 6.0;
2261 eners[0] += -gmx::power3(std::sqrt(M_PI)*ic->ewaldcoeff_lj)/3.0;
2262 virs[0] += gmx::power3(std::sqrt(M_PI)*ic->ewaldcoeff_lj);
2265 fr->enerdiffsix = eners[0];
2266 fr->enerdifftwelve = eners[1];
2267 /* The 0.5 is due to the Gromacs definition of the virial */
2268 fr->virdiffsix = 0.5*virs[0];
2269 fr->virdifftwelve = 0.5*virs[1];
2273 void calc_dispcorr(t_inputrec *ir, t_forcerec *fr,
2274 matrix box, real lambda, tensor pres, tensor virial,
2275 real *prescorr, real *enercorr, real *dvdlcorr)
2277 gmx_bool bCorrAll, bCorrPres;
2278 real dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
2279 int m;
2281 *prescorr = 0;
2282 *enercorr = 0;
2283 *dvdlcorr = 0;
2285 clear_mat(virial);
2286 clear_mat(pres);
2288 if (ir->eDispCorr != edispcNO)
2290 bCorrAll = (ir->eDispCorr == edispcAllEner ||
2291 ir->eDispCorr == edispcAllEnerPres);
2292 bCorrPres = (ir->eDispCorr == edispcEnerPres ||
2293 ir->eDispCorr == edispcAllEnerPres);
2295 invvol = 1/det(box);
2296 if (fr->n_tpi)
2298 /* Only correct for the interactions with the inserted molecule */
2299 dens = (fr->numAtomsForDispersionCorrection - fr->n_tpi)*invvol;
2300 ninter = fr->n_tpi;
2302 else
2304 dens = fr->numAtomsForDispersionCorrection*invvol;
2305 ninter = 0.5*fr->numAtomsForDispersionCorrection;
2308 if (ir->efep == efepNO)
2310 avcsix = fr->avcsix[0];
2311 avctwelve = fr->avctwelve[0];
2313 else
2315 avcsix = (1 - lambda)*fr->avcsix[0] + lambda*fr->avcsix[1];
2316 avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
2319 enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
2320 *enercorr += avcsix*enerdiff;
2321 dvdlambda = 0.0;
2322 if (ir->efep != efepNO)
2324 dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
2326 if (bCorrAll)
2328 enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
2329 *enercorr += avctwelve*enerdiff;
2330 if (fr->efep != efepNO)
2332 dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
2336 if (bCorrPres)
2338 svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
2339 if (ir->eDispCorr == edispcAllEnerPres)
2341 svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
2343 /* The factor 2 is because of the Gromacs virial definition */
2344 spres = -2.0*invvol*svir*PRESFAC;
2346 for (m = 0; m < DIM; m++)
2348 virial[m][m] += svir;
2349 pres[m][m] += spres;
2351 *prescorr += spres;
2354 /* Can't currently control when it prints, for now, just print when degugging */
2355 if (debug)
2357 if (bCorrAll)
2359 fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
2360 avcsix, avctwelve);
2362 if (bCorrPres)
2364 fprintf(debug,
2365 "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
2366 *enercorr, spres, svir);
2368 else
2370 fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
2374 if (fr->efep != efepNO)
2376 *dvdlcorr += dvdlambda;
2381 static void low_do_pbc_mtop(FILE *fplog, int ePBC, const matrix box,
2382 const gmx_mtop_t *mtop, rvec x[],
2383 gmx_bool bFirst)
2385 t_graph *graph;
2386 int mb, as, mol;
2387 gmx_molblock_t *molb;
2389 if (bFirst && fplog)
2391 fprintf(fplog, "Removing pbc first time\n");
2394 snew(graph, 1);
2395 as = 0;
2396 for (mb = 0; mb < mtop->nmolblock; mb++)
2398 molb = &mtop->molblock[mb];
2399 if (molb->natoms_mol == 1 ||
2400 (!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
2402 /* Just one atom or charge group in the molecule, no PBC required */
2403 as += molb->nmol*molb->natoms_mol;
2405 else
2407 /* Pass NULL iso fplog to avoid graph prints for each molecule type */
2408 mk_graph_ilist(nullptr, mtop->moltype[molb->type].ilist,
2409 0, molb->natoms_mol, FALSE, FALSE, graph);
2411 for (mol = 0; mol < molb->nmol; mol++)
2413 mk_mshift(fplog, graph, ePBC, box, x+as);
2415 shift_self(graph, box, x+as);
2416 /* The molecule is whole now.
2417 * We don't need the second mk_mshift call as in do_pbc_first,
2418 * since we no longer need this graph.
2421 as += molb->natoms_mol;
2423 done_graph(graph);
2426 sfree(graph);
2429 void do_pbc_first_mtop(FILE *fplog, int ePBC, const matrix box,
2430 const gmx_mtop_t *mtop, rvec x[])
2432 low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE);
2435 void do_pbc_mtop(FILE *fplog, int ePBC, const matrix box,
2436 const gmx_mtop_t *mtop, rvec x[])
2438 low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE);
2441 void put_atoms_in_box_omp(int ePBC, const matrix box, int natoms, rvec x[])
2443 int t, nth;
2444 nth = gmx_omp_nthreads_get(emntDefault);
2446 #pragma omp parallel for num_threads(nth) schedule(static)
2447 for (t = 0; t < nth; t++)
2451 int offset, len;
2453 offset = (natoms*t )/nth;
2454 len = (natoms*(t + 1))/nth - offset;
2455 put_atoms_in_box(ePBC, box, len, x + offset);
2457 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
2461 // TODO This can be cleaned up a lot, and move back to runner.cpp
2462 void finish_run(FILE *fplog, const gmx::MDLogger &mdlog, t_commrec *cr,
2463 const t_inputrec *inputrec,
2464 t_nrnb nrnb[], gmx_wallcycle_t wcycle,
2465 gmx_walltime_accounting_t walltime_accounting,
2466 nonbonded_verlet_t *nbv,
2467 gmx_bool bWriteStat)
2469 t_nrnb *nrnb_tot = nullptr;
2470 double delta_t = 0;
2471 double nbfs = 0, mflop = 0;
2472 double elapsed_time,
2473 elapsed_time_over_all_ranks,
2474 elapsed_time_over_all_threads,
2475 elapsed_time_over_all_threads_over_all_ranks;
2476 /* Control whether it is valid to print a report. Only the
2477 simulation master may print, but it should not do so if the run
2478 terminated e.g. before a scheduled reset step. This is
2479 complicated by the fact that PME ranks are unaware of the
2480 reason why they were sent a pmerecvqxFINISH. To avoid
2481 communication deadlocks, we always do the communication for the
2482 report, even if we've decided not to write the report, because
2483 how long it takes to finish the run is not important when we've
2484 decided not to report on the simulation performance. */
2485 bool printReport = SIMMASTER(cr);
2487 if (!walltime_accounting_get_valid_finish(walltime_accounting))
2489 GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
2490 printReport = false;
2493 if (cr->nnodes > 1)
2495 snew(nrnb_tot, 1);
2496 #if GMX_MPI
2497 MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
2498 cr->mpi_comm_mysim);
2499 #endif
2501 else
2503 nrnb_tot = nrnb;
2506 elapsed_time = walltime_accounting_get_elapsed_time(walltime_accounting);
2507 elapsed_time_over_all_ranks = elapsed_time;
2508 elapsed_time_over_all_threads = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
2509 elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
2510 #if GMX_MPI
2511 if (cr->nnodes > 1)
2513 /* reduce elapsed_time over all MPI ranks in the current simulation */
2514 MPI_Allreduce(&elapsed_time,
2515 &elapsed_time_over_all_ranks,
2516 1, MPI_DOUBLE, MPI_SUM,
2517 cr->mpi_comm_mysim);
2518 elapsed_time_over_all_ranks /= cr->nnodes;
2519 /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
2520 * current simulation. */
2521 MPI_Allreduce(&elapsed_time_over_all_threads,
2522 &elapsed_time_over_all_threads_over_all_ranks,
2523 1, MPI_DOUBLE, MPI_SUM,
2524 cr->mpi_comm_mysim);
2526 #endif
2528 if (printReport)
2530 print_flop(fplog, nrnb_tot, &nbfs, &mflop);
2532 if (cr->nnodes > 1)
2534 sfree(nrnb_tot);
2537 if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
2539 print_dd_statistics(cr, inputrec, fplog);
2542 /* TODO Move the responsibility for any scaling by thread counts
2543 * to the code that handled the thread region, so that there's a
2544 * mechanism to keep cycle counting working during the transition
2545 * to task parallelism. */
2546 int nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);
2547 int nthreads_pme = gmx_omp_nthreads_get(emntPME);
2548 wallcycle_scale_by_num_threads(wcycle, cr->duty == DUTY_PME, nthreads_pp, nthreads_pme);
2549 auto cycle_sum(wallcycle_sum(cr, wcycle));
2551 if (printReport)
2553 struct gmx_wallclock_gpu_t* gputimes = use_GPU(nbv) ? nbnxn_gpu_get_timings(nbv->gpu_nbv) : nullptr;
2555 wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
2556 elapsed_time_over_all_ranks,
2557 wcycle, cycle_sum, gputimes);
2559 if (EI_DYNAMICS(inputrec->eI))
2561 delta_t = inputrec->delta_t;
2564 if (fplog)
2566 print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
2567 elapsed_time_over_all_ranks,
2568 walltime_accounting_get_nsteps_done(walltime_accounting),
2569 delta_t, nbfs, mflop);
2571 if (bWriteStat)
2573 print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
2574 elapsed_time_over_all_ranks,
2575 walltime_accounting_get_nsteps_done(walltime_accounting),
2576 delta_t, nbfs, mflop);
2581 extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, gmx::ArrayRef<real> lambda, double *lam0)
2583 /* this function works, but could probably use a logic rewrite to keep all the different
2584 types of efep straight. */
2586 if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
2588 return;
2591 t_lambda *fep = ir->fepvals;
2592 *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
2593 if checkpoint is set -- a kludge is in for now
2594 to prevent this.*/
2596 for (int i = 0; i < efptNR; i++)
2598 /* overwrite lambda state with init_lambda for now for backwards compatibility */
2599 if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
2601 lambda[i] = fep->init_lambda;
2602 if (lam0)
2604 lam0[i] = lambda[i];
2607 else
2609 lambda[i] = fep->all_lambda[i][*fep_state];
2610 if (lam0)
2612 lam0[i] = lambda[i];
2616 if (ir->bSimTemp)
2618 /* need to rescale control temperatures to match current state */
2619 for (int i = 0; i < ir->opts.ngtc; i++)
2621 if (ir->opts.ref_t[i] > 0)
2623 ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
2628 /* Send to the log the information on the current lambdas */
2629 if (fplog != nullptr)
2631 fprintf(fplog, "Initial vector of lambda components:[ ");
2632 for (const auto &l : lambda)
2634 fprintf(fplog, "%10.4f ", l);
2636 fprintf(fplog, "]\n");
2638 return;
2642 void init_md(FILE *fplog,
2643 t_commrec *cr, gmx::IMDOutputProvider *outputProvider,
2644 t_inputrec *ir, const gmx_output_env_t *oenv,
2645 const MdrunOptions &mdrunOptions,
2646 double *t, double *t0,
2647 t_state *globalState, double *lam0,
2648 t_nrnb *nrnb, gmx_mtop_t *mtop,
2649 gmx_update_t **upd,
2650 int nfile, const t_filenm fnm[],
2651 gmx_mdoutf_t *outf, t_mdebin **mdebin,
2652 tensor force_vir, tensor shake_vir, rvec mu_tot,
2653 gmx_bool *bSimAnn, t_vcm **vcm,
2654 gmx_wallcycle_t wcycle)
2656 int i;
2658 /* Initial values */
2659 *t = *t0 = ir->init_t;
2661 *bSimAnn = FALSE;
2662 for (i = 0; i < ir->opts.ngtc; i++)
2664 /* set bSimAnn if any group is being annealed */
2665 if (ir->opts.annealing[i] != eannNO)
2667 *bSimAnn = TRUE;
2671 /* Initialize lambda variables */
2672 /* TODO: Clean up initialization of fep_state and lambda in t_state.
2673 * We currently need to call initialize_lambdas on non-master ranks
2674 * to initialize lam0.
2676 if (MASTER(cr))
2678 initialize_lambdas(fplog, ir, &globalState->fep_state, globalState->lambda, lam0);
2680 else
2682 int tmpFepState;
2683 std::array<real, efptNR> tmpLambda;
2684 initialize_lambdas(fplog, ir, &tmpFepState, tmpLambda, lam0);
2687 // TODO upd is never NULL in practice, but the analysers don't know that
2688 if (upd)
2690 *upd = init_update(ir);
2692 if (*bSimAnn)
2694 update_annealing_target_temp(ir, ir->init_t, upd ? *upd : nullptr);
2697 if (vcm != nullptr)
2699 *vcm = init_vcm(fplog, &mtop->groups, ir);
2702 if (EI_DYNAMICS(ir->eI) && !mdrunOptions.continuationOptions.appendFiles)
2704 if (ir->etc == etcBERENDSEN)
2706 please_cite(fplog, "Berendsen84a");
2708 if (ir->etc == etcVRESCALE)
2710 please_cite(fplog, "Bussi2007a");
2712 if (ir->eI == eiSD1)
2714 please_cite(fplog, "Goga2012");
2717 init_nrnb(nrnb);
2719 if (nfile != -1)
2721 *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, mtop, oenv, wcycle);
2723 *mdebin = init_mdebin(mdrunOptions.continuationOptions.appendFiles ? nullptr : mdoutf_get_fp_ene(*outf),
2724 mtop, ir, mdoutf_get_fp_dhdl(*outf));
2727 /* Initiate variables */
2728 clear_mat(force_vir);
2729 clear_mat(shake_vir);
2730 clear_rvec(mu_tot);