Fix mdrun hanging upon exit with sep PME ranks
[gromacs.git] / src / gromacs / mdlib / sim_util.cpp
blob4973896b2c3efd2d5eada84210a6b2b444b12a5a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "sim_util.h"
41 #include "config.h"
43 #include <assert.h>
44 #include <math.h>
45 #include <stdio.h>
46 #include <string.h>
48 #include <array>
50 #include "gromacs/domdec/domdec.h"
51 #include "gromacs/domdec/domdec_struct.h"
52 #include "gromacs/essentialdynamics/edsam.h"
53 #include "gromacs/ewald/pme.h"
54 #include "gromacs/gmxlib/chargegroup.h"
55 #include "gromacs/gmxlib/md_logging.h"
56 #include "gromacs/gmxlib/network.h"
57 #include "gromacs/gmxlib/nrnb.h"
58 #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
59 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
60 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
61 #include "gromacs/imd/imd.h"
62 #include "gromacs/listed-forces/bonded.h"
63 #include "gromacs/listed-forces/disre.h"
64 #include "gromacs/listed-forces/orires.h"
65 #include "gromacs/math/functions.h"
66 #include "gromacs/math/units.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/math/vecdump.h"
69 #include "gromacs/mdlib/calcmu.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/force.h"
72 #include "gromacs/mdlib/forcerec.h"
73 #include "gromacs/mdlib/genborn.h"
74 #include "gromacs/mdlib/gmx_omp_nthreads.h"
75 #include "gromacs/mdlib/mdrun.h"
76 #include "gromacs/mdlib/nb_verlet.h"
77 #include "gromacs/mdlib/nbnxn_atomdata.h"
78 #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
79 #include "gromacs/mdlib/nbnxn_grid.h"
80 #include "gromacs/mdlib/nbnxn_search.h"
81 #include "gromacs/mdlib/qmmm.h"
82 #include "gromacs/mdlib/update.h"
83 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
84 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
85 #include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
86 #include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
87 #include "gromacs/mdtypes/commrec.h"
88 #include "gromacs/mdtypes/inputrec.h"
89 #include "gromacs/mdtypes/md_enums.h"
90 #include "gromacs/pbcutil/ishift.h"
91 #include "gromacs/pbcutil/mshift.h"
92 #include "gromacs/pbcutil/pbc.h"
93 #include "gromacs/pulling/pull.h"
94 #include "gromacs/pulling/pull_rotation.h"
95 #include "gromacs/timing/cyclecounter.h"
96 #include "gromacs/timing/gpu_timing.h"
97 #include "gromacs/timing/wallcycle.h"
98 #include "gromacs/timing/wallcyclereporting.h"
99 #include "gromacs/timing/walltime_accounting.h"
100 #include "gromacs/utility/basedefinitions.h"
101 #include "gromacs/utility/cstringutil.h"
102 #include "gromacs/utility/exceptions.h"
103 #include "gromacs/utility/fatalerror.h"
104 #include "gromacs/utility/gmxassert.h"
105 #include "gromacs/utility/gmxmpi.h"
106 #include "gromacs/utility/pleasecite.h"
107 #include "gromacs/utility/smalloc.h"
108 #include "gromacs/utility/sysinfo.h"
110 #include "nbnxn_gpu.h"
112 void print_time(FILE *out,
113 gmx_walltime_accounting_t walltime_accounting,
114 gmx_int64_t step,
115 t_inputrec *ir,
116 t_commrec gmx_unused *cr)
118 time_t finish;
119 char timebuf[STRLEN];
120 double dt, elapsed_seconds, time_per_step;
121 char buf[48];
123 #if !GMX_THREAD_MPI
124 if (!PAR(cr))
125 #endif
127 fprintf(out, "\r");
129 fprintf(out, "step %s", gmx_step_str(step, buf));
130 fflush(out);
132 if ((step >= ir->nstlist))
134 double seconds_since_epoch = gmx_gettime();
135 elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
136 time_per_step = elapsed_seconds/(step - ir->init_step + 1);
137 dt = (ir->nsteps + ir->init_step - step) * time_per_step;
139 if (ir->nsteps >= 0)
141 if (dt >= 300)
143 finish = (time_t) (seconds_since_epoch + dt);
144 gmx_ctime_r(&finish, timebuf, STRLEN);
145 sprintf(buf, "%s", timebuf);
146 buf[strlen(buf)-1] = '\0';
147 fprintf(out, ", will finish %s", buf);
149 else
151 fprintf(out, ", remaining wall clock time: %5d s ", (int)dt);
154 else
156 fprintf(out, " performance: %.1f ns/day ",
157 ir->delta_t/1000*24*60*60/time_per_step);
160 #if !GMX_THREAD_MPI
161 if (PAR(cr))
163 fprintf(out, "\n");
165 #endif
167 fflush(out);
170 void print_date_and_time(FILE *fplog, int nodeid, const char *title,
171 double the_time)
173 char time_string[STRLEN];
175 if (!fplog)
177 return;
181 int i;
182 char timebuf[STRLEN];
183 time_t temp_time = (time_t) the_time;
185 gmx_ctime_r(&temp_time, timebuf, STRLEN);
186 for (i = 0; timebuf[i] >= ' '; i++)
188 time_string[i] = timebuf[i];
190 time_string[i] = '\0';
193 fprintf(fplog, "%s on rank %d %s\n", title, nodeid, time_string);
196 void print_start(FILE *fplog, t_commrec *cr,
197 gmx_walltime_accounting_t walltime_accounting,
198 const char *name)
200 char buf[STRLEN];
202 sprintf(buf, "Started %s", name);
203 print_date_and_time(fplog, cr->nodeid, buf,
204 walltime_accounting_get_start_time_stamp(walltime_accounting));
207 static void sum_forces(int start, int end, rvec f[], rvec flr[])
209 int i;
211 if (gmx_debug_at)
213 pr_rvecs(debug, 0, "fsr", f+start, end-start);
214 pr_rvecs(debug, 0, "flr", flr+start, end-start);
216 // cppcheck-suppress unreadVariable
217 int gmx_unused nt = gmx_omp_nthreads_get(emntDefault);
218 #pragma omp parallel for num_threads(nt) schedule(static)
219 for (i = start; i < end; i++)
221 rvec_inc(f[i], flr[i]);
226 * calc_f_el calculates forces due to an electric field.
228 * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
230 * Et[] contains the parameters for the time dependent
231 * part of the field.
232 * Ex[] contains the parameters for
233 * the spatial dependent part of the field.
234 * The function should return the energy due to the electric field
235 * (if any) but for now returns 0.
237 * WARNING:
238 * There can be problems with the virial.
239 * Since the field is not self-consistent this is unavoidable.
240 * For neutral molecules the virial is correct within this approximation.
241 * For neutral systems with many charged molecules the error is small.
242 * But for systems with a net charge or a few charged molecules
243 * the error can be significant when the field is high.
244 * Solution: implement a self-consistent electric field into PME.
246 static void calc_f_el(FILE *fp, int start, int homenr,
247 real charge[], rvec f[],
248 t_cosines Ex[], t_cosines Et[], double t)
250 rvec Ext;
251 real t0;
252 int i, m;
254 for (m = 0; (m < DIM); m++)
256 if (Et[m].n > 0)
258 if (Et[m].n == 3)
260 t0 = Et[m].a[1];
261 Ext[m] = std::cos(Et[m].a[0]*(t-t0))*std::exp(-gmx::square(t-t0)/(2.0*gmx::square(Et[m].a[2])));
263 else
265 Ext[m] = std::cos(Et[m].a[0]*t);
268 else
270 Ext[m] = 1.0;
272 if (Ex[m].n > 0)
274 /* Convert the field strength from V/nm to MD-units */
275 Ext[m] *= Ex[m].a[0]*FIELDFAC;
276 for (i = start; (i < start+homenr); i++)
278 f[i][m] += charge[i]*Ext[m];
281 else
283 Ext[m] = 0;
286 if (fp != NULL)
288 fprintf(fp, "%10g %10g %10g %10g #FIELD\n", t,
289 Ext[XX]/FIELDFAC, Ext[YY]/FIELDFAC, Ext[ZZ]/FIELDFAC);
293 static void calc_virial(int start, int homenr, rvec x[], rvec f[],
294 tensor vir_part, t_graph *graph, matrix box,
295 t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
297 int i;
299 /* The short-range virial from surrounding boxes */
300 clear_mat(vir_part);
301 calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
302 inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
304 /* Calculate partial virial, for local atoms only, based on short range.
305 * Total virial is computed in global_stat, called from do_md
307 f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
308 inc_nrnb(nrnb, eNR_VIRIAL, homenr);
310 /* Add position restraint contribution */
311 for (i = 0; i < DIM; i++)
313 vir_part[i][i] += fr->vir_diag_posres[i];
316 /* Add wall contribution */
317 for (i = 0; i < DIM; i++)
319 vir_part[i][ZZ] += fr->vir_wall_z[i];
322 if (debug)
324 pr_rvecs(debug, 0, "vir_part", vir_part, DIM);
328 static void pull_potential_wrapper(t_commrec *cr,
329 t_inputrec *ir,
330 matrix box, rvec x[],
331 rvec f[],
332 tensor vir_force,
333 t_mdatoms *mdatoms,
334 gmx_enerdata_t *enerd,
335 real *lambda,
336 double t,
337 gmx_wallcycle_t wcycle)
339 t_pbc pbc;
340 real dvdl;
342 /* Calculate the center of mass forces, this requires communication,
343 * which is why pull_potential is called close to other communication.
344 * The virial contribution is calculated directly,
345 * which is why we call pull_potential after calc_virial.
347 wallcycle_start(wcycle, ewcPULLPOT);
348 set_pbc(&pbc, ir->ePBC, box);
349 dvdl = 0;
350 enerd->term[F_COM_PULL] +=
351 pull_potential(ir->pull_work, mdatoms, &pbc,
352 cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
353 enerd->dvdl_lin[efptRESTRAINT] += dvdl;
354 wallcycle_stop(wcycle, ewcPULLPOT);
357 static void pme_receive_force_ener(t_commrec *cr,
358 gmx_wallcycle_t wcycle,
359 gmx_enerdata_t *enerd,
360 t_forcerec *fr)
362 real e_q, e_lj, dvdl_q, dvdl_lj;
363 float cycles_ppdpme, cycles_seppme;
365 cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
366 dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
368 /* In case of node-splitting, the PP nodes receive the long-range
369 * forces, virial and energy from the PME nodes here.
371 wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
372 dvdl_q = 0;
373 dvdl_lj = 0;
374 gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
375 fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
376 &cycles_seppme);
377 enerd->term[F_COUL_RECIP] += e_q;
378 enerd->term[F_LJ_RECIP] += e_lj;
379 enerd->dvdl_lin[efptCOUL] += dvdl_q;
380 enerd->dvdl_lin[efptVDW] += dvdl_lj;
382 if (wcycle)
384 dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
386 wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
389 static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr,
390 gmx_int64_t step, real pforce, rvec *x, rvec *f)
392 int i;
393 real pf2, fn2;
394 char buf[STEPSTRSIZE];
396 pf2 = gmx::square(pforce);
397 for (i = 0; i < md->homenr; i++)
399 fn2 = norm2(f[i]);
400 /* We also catch NAN, if the compiler does not optimize this away. */
401 if (fn2 >= pf2 || fn2 != fn2)
403 fprintf(fp, "step %s atom %6d x %8.3f %8.3f %8.3f force %12.5e\n",
404 gmx_step_str(step, buf),
405 ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], std::sqrt(fn2));
410 static void post_process_forces(t_commrec *cr,
411 gmx_int64_t step,
412 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
413 gmx_localtop_t *top,
414 matrix box, rvec x[],
415 rvec f[],
416 tensor vir_force,
417 t_mdatoms *mdatoms,
418 t_graph *graph,
419 t_forcerec *fr, gmx_vsite_t *vsite,
420 int flags)
422 if (fr->bF_NoVirSum)
424 if (vsite)
426 /* Spread the mesh force on virtual sites to the other particles...
427 * This is parallellized. MPI communication is performed
428 * if the constructing atoms aren't local.
430 wallcycle_start(wcycle, ewcVSITESPREAD);
431 spread_vsite_f(vsite, x, fr->f_novirsum, NULL,
432 (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
433 nrnb,
434 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
435 wallcycle_stop(wcycle, ewcVSITESPREAD);
437 if (flags & GMX_FORCE_VIRIAL)
439 /* Now add the forces, this is local */
440 if (fr->bDomDec)
442 sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum);
444 else
446 sum_forces(0, mdatoms->homenr,
447 f, fr->f_novirsum);
449 if (EEL_FULL(fr->eeltype))
451 /* Add the mesh contribution to the virial */
452 m_add(vir_force, fr->vir_el_recip, vir_force);
454 if (EVDW_PME(fr->vdwtype))
456 /* Add the mesh contribution to the virial */
457 m_add(vir_force, fr->vir_lj_recip, vir_force);
459 if (debug)
461 pr_rvecs(debug, 0, "vir_force", vir_force, DIM);
466 if (fr->print_force >= 0)
468 print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f);
472 static void do_nb_verlet(t_forcerec *fr,
473 interaction_const_t *ic,
474 gmx_enerdata_t *enerd,
475 int flags, int ilocality,
476 int clearF,
477 t_nrnb *nrnb,
478 gmx_wallcycle_t wcycle)
480 int enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
481 nonbonded_verlet_group_t *nbvg;
482 gmx_bool bUsingGpuKernels;
484 if (!(flags & GMX_FORCE_NONBONDED))
486 /* skip non-bonded calculation */
487 return;
490 nbvg = &fr->nbv->grp[ilocality];
492 /* GPU kernel launch overhead is already timed separately */
493 if (fr->cutoff_scheme != ecutsVERLET)
495 gmx_incons("Invalid cut-off scheme passed!");
498 bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
500 if (!bUsingGpuKernels)
502 wallcycle_sub_start(wcycle, ewcsNONBONDED);
504 switch (nbvg->kernel_type)
506 case nbnxnk4x4_PlainC:
507 nbnxn_kernel_ref(&nbvg->nbl_lists,
508 nbvg->nbat, ic,
509 fr->shift_vec,
510 flags,
511 clearF,
512 fr->fshift[0],
513 enerd->grpp.ener[egCOULSR],
514 fr->bBHAM ?
515 enerd->grpp.ener[egBHAMSR] :
516 enerd->grpp.ener[egLJSR]);
517 break;
519 case nbnxnk4xN_SIMD_4xN:
520 nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
521 nbvg->nbat, ic,
522 nbvg->ewald_excl,
523 fr->shift_vec,
524 flags,
525 clearF,
526 fr->fshift[0],
527 enerd->grpp.ener[egCOULSR],
528 fr->bBHAM ?
529 enerd->grpp.ener[egBHAMSR] :
530 enerd->grpp.ener[egLJSR]);
531 break;
532 case nbnxnk4xN_SIMD_2xNN:
533 nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
534 nbvg->nbat, ic,
535 nbvg->ewald_excl,
536 fr->shift_vec,
537 flags,
538 clearF,
539 fr->fshift[0],
540 enerd->grpp.ener[egCOULSR],
541 fr->bBHAM ?
542 enerd->grpp.ener[egBHAMSR] :
543 enerd->grpp.ener[egLJSR]);
544 break;
546 case nbnxnk8x8x8_GPU:
547 nbnxn_gpu_launch_kernel(fr->nbv->gpu_nbv, nbvg->nbat, flags, ilocality);
548 break;
550 case nbnxnk8x8x8_PlainC:
551 nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
552 nbvg->nbat, ic,
553 fr->shift_vec,
554 flags,
555 clearF,
556 nbvg->nbat->out[0].f,
557 fr->fshift[0],
558 enerd->grpp.ener[egCOULSR],
559 fr->bBHAM ?
560 enerd->grpp.ener[egBHAMSR] :
561 enerd->grpp.ener[egLJSR]);
562 break;
564 default:
565 gmx_incons("Invalid nonbonded kernel type passed!");
568 if (!bUsingGpuKernels)
570 wallcycle_sub_stop(wcycle, ewcsNONBONDED);
573 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
575 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
577 else if ((!bUsingGpuKernels && nbvg->ewald_excl == ewaldexclAnalytical) ||
578 (bUsingGpuKernels && nbnxn_gpu_is_kernel_ewald_analytical(fr->nbv->gpu_nbv)))
580 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
582 else
584 enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
586 enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
587 if (flags & GMX_FORCE_ENERGY)
589 /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
590 enr_nbnxn_kernel_ljc += 1;
591 enr_nbnxn_kernel_lj += 1;
594 inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
595 nbvg->nbl_lists.natpair_ljq);
596 inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
597 nbvg->nbl_lists.natpair_lj);
598 /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
599 inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
600 nbvg->nbl_lists.natpair_q);
602 if (ic->vdw_modifier == eintmodFORCESWITCH)
604 /* We add up the switch cost separately */
605 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
606 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
608 if (ic->vdw_modifier == eintmodPOTSWITCH)
610 /* We add up the switch cost separately */
611 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
612 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
614 if (ic->vdwtype == evdwPME)
616 /* We add up the LJ Ewald cost separately */
617 inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
618 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
622 static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
623 t_forcerec *fr,
624 rvec x[],
625 rvec f[],
626 t_mdatoms *mdatoms,
627 t_lambda *fepvals,
628 real *lambda,
629 gmx_enerdata_t *enerd,
630 int flags,
631 t_nrnb *nrnb,
632 gmx_wallcycle_t wcycle)
634 int donb_flags;
635 nb_kernel_data_t kernel_data;
636 real lam_i[efptNR];
637 real dvdl_nb[efptNR];
638 int th;
639 int i, j;
641 donb_flags = 0;
642 /* Add short-range interactions */
643 donb_flags |= GMX_NONBONDED_DO_SR;
645 /* Currently all group scheme kernels always calculate (shift-)forces */
646 if (flags & GMX_FORCE_FORCES)
648 donb_flags |= GMX_NONBONDED_DO_FORCE;
650 if (flags & GMX_FORCE_VIRIAL)
652 donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
654 if (flags & GMX_FORCE_ENERGY)
656 donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
659 kernel_data.flags = donb_flags;
660 kernel_data.lambda = lambda;
661 kernel_data.dvdl = dvdl_nb;
663 kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
664 kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR];
666 /* reset free energy components */
667 for (i = 0; i < efptNR; i++)
669 dvdl_nb[i] = 0;
672 assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl);
674 wallcycle_sub_start(wcycle, ewcsNONBONDED);
675 #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
676 for (th = 0; th < nbl_lists->nnbl; th++)
680 gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
681 x, f, fr, mdatoms, &kernel_data, nrnb);
683 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
686 if (fepvals->sc_alpha != 0)
688 enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
689 enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
691 else
693 enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
694 enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
697 /* If we do foreign lambda and we have soft-core interactions
698 * we have to recalculate the (non-linear) energies contributions.
700 if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
702 kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
703 kernel_data.lambda = lam_i;
704 kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
705 kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR];
706 /* Note that we add to kernel_data.dvdl, but ignore the result */
708 for (i = 0; i < enerd->n_lambda; i++)
710 for (j = 0; j < efptNR; j++)
712 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
714 reset_foreign_enerdata(enerd);
715 #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
716 for (th = 0; th < nbl_lists->nnbl; th++)
720 gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
721 x, f, fr, mdatoms, &kernel_data, nrnb);
723 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
726 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
727 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
731 wallcycle_sub_stop(wcycle, ewcsNONBONDED);
734 gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
736 return nbv != NULL && nbv->bUseGPU;
739 static gmx_inline void clear_rvecs_omp(int n, rvec v[])
741 int nth = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, n);
743 /* Note that we would like to avoid this conditional by putting it
744 * into the omp pragma instead, but then we still take the full
745 * omp parallel for overhead (at least with gcc5).
747 if (nth == 1)
749 for (int i = 0; i < n; i++)
751 clear_rvec(v[i]);
754 else
756 #pragma omp parallel for num_threads(nth) schedule(static)
757 for (int i = 0; i < n; i++)
759 clear_rvec(v[i]);
764 /*! \brief This routine checks if the potential energy is finite.
766 * Note that passing this check does not guarantee finite forces,
767 * since those use slightly different arithmetics. But in most cases
768 * there is just a narrow coordinate range where forces are not finite
769 * and energies are finite.
771 * \param[in] enerd The energy data; the non-bonded group energies need to be added in here before calling this routine
773 static void checkPotentialEnergyValidity(const gmx_enerdata_t *enerd)
775 if (!std::isfinite(enerd->term[F_EPOT]))
777 gmx_fatal(FARGS, "The total potential energy is %g, which is not finite. The LJ and electrostatic contributions to the energy are %g and %g, respectively. A non-finite potential energy can be caused by overlapping interactions in bonded interactions or very large or NaN coordinate values. Usually this is caused by a badly or non-equilibrated initial configuration or incorrect interactions or parameters in the topology.",
778 enerd->term[F_EPOT],
779 enerd->term[F_LJ],
780 enerd->term[F_COUL_SR]);
784 void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
785 t_inputrec *inputrec,
786 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
787 gmx_localtop_t *top,
788 gmx_groups_t gmx_unused *groups,
789 matrix box, rvec x[], history_t *hist,
790 rvec f[],
791 tensor vir_force,
792 t_mdatoms *mdatoms,
793 gmx_enerdata_t *enerd, t_fcdata *fcd,
794 real *lambda, t_graph *graph,
795 t_forcerec *fr, interaction_const_t *ic,
796 gmx_vsite_t *vsite, rvec mu_tot,
797 double t, FILE *field, gmx_edsam_t ed,
798 gmx_bool bBornRadii,
799 int flags)
801 int cg1, i, j;
802 int start, homenr;
803 double mu[2*DIM];
804 gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
805 gmx_bool bDoForces, bUseGPU, bUseOrEmulGPU;
806 gmx_bool bDiffKernels = FALSE;
807 rvec vzero, box_diag;
808 float cycles_pme, cycles_force, cycles_wait_gpu;
809 /* TODO To avoid loss of precision, float can't be used for a
810 * cycle count. Build an object that can do this right and perhaps
811 * also be used by gmx_wallcycle_t */
812 gmx_cycles_t cycleCountBeforeLocalWorkCompletes = 0;
813 nonbonded_verlet_t *nbv;
815 cycles_force = 0;
816 cycles_wait_gpu = 0;
817 nbv = fr->nbv;
819 start = 0;
820 homenr = mdatoms->homenr;
822 clear_mat(vir_force);
824 if (DOMAINDECOMP(cr))
826 cg1 = cr->dd->ncg_tot;
828 else
830 cg1 = top->cgs.nr;
832 if (fr->n_tpi > 0)
834 cg1--;
837 bStateChanged = (flags & GMX_FORCE_STATECHANGED);
838 bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
839 bFillGrid = (bNS && bStateChanged);
840 bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
841 bDoForces = (flags & GMX_FORCE_FORCES);
842 bUseGPU = fr->nbv->bUseGPU;
843 bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC);
845 if (bStateChanged)
847 update_forcerec(fr, box);
849 if (inputrecNeedMutot(inputrec))
851 /* Calculate total (local) dipole moment in a temporary common array.
852 * This makes it possible to sum them over nodes faster.
854 calc_mu(start, homenr,
855 x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
856 mu, mu+DIM);
860 if (fr->ePBC != epbcNONE)
862 /* Compute shift vectors every step,
863 * because of pressure coupling or box deformation!
865 if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
867 calc_shifts(box, fr->shift_vec);
870 if (bCalcCGCM)
872 put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
873 inc_nrnb(nrnb, eNR_SHIFTX, homenr);
875 else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
877 unshift_self(graph, box, x);
881 nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
882 fr->shift_vec, nbv->grp[0].nbat);
884 #if GMX_MPI
885 if (!(cr->duty & DUTY_PME))
887 gmx_bool bBS;
888 matrix boxs;
890 /* Send particle coordinates to the pme nodes.
891 * Since this is only implemented for domain decomposition
892 * and domain decomposition does not use the graph,
893 * we do not need to worry about shifting.
896 int pme_flags = 0;
898 wallcycle_start(wcycle, ewcPP_PMESENDX);
900 bBS = (inputrec->nwall == 2);
901 if (bBS)
903 copy_mat(box, boxs);
904 svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
907 if (EEL_PME(fr->eeltype))
909 pme_flags |= GMX_PME_DO_COULOMB;
912 if (EVDW_PME(fr->vdwtype))
914 pme_flags |= GMX_PME_DO_LJ;
917 gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
918 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
919 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
920 pme_flags, step);
922 wallcycle_stop(wcycle, ewcPP_PMESENDX);
924 #endif /* GMX_MPI */
926 /* do gridding for pair search */
927 if (bNS)
929 if (graph && bStateChanged)
931 /* Calculate intramolecular shift vectors to make molecules whole */
932 mk_mshift(fplog, graph, fr->ePBC, box, x);
935 clear_rvec(vzero);
936 box_diag[XX] = box[XX][XX];
937 box_diag[YY] = box[YY][YY];
938 box_diag[ZZ] = box[ZZ][ZZ];
940 wallcycle_start(wcycle, ewcNS);
941 if (!fr->bDomDec)
943 wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
944 nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
945 0, vzero, box_diag,
946 0, mdatoms->homenr, -1, fr->cginfo, x,
947 0, NULL,
948 nbv->grp[eintLocal].kernel_type,
949 nbv->grp[eintLocal].nbat);
950 wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
952 else
954 wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
955 nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
956 fr->cginfo, x,
957 nbv->grp[eintNonlocal].kernel_type,
958 nbv->grp[eintNonlocal].nbat);
959 wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
962 if (nbv->ngrp == 1 ||
963 nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
965 nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll,
966 nbv->nbs, mdatoms, fr->cginfo);
968 else
970 nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal,
971 nbv->nbs, mdatoms, fr->cginfo);
972 nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll,
973 nbv->nbs, mdatoms, fr->cginfo);
975 wallcycle_stop(wcycle, ewcNS);
978 /* initialize the GPU atom data and copy shift vector */
979 if (bUseGPU)
981 if (bNS)
983 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
984 nbnxn_gpu_init_atomdata(nbv->gpu_nbv, nbv->grp[eintLocal].nbat);
985 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
988 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
989 nbnxn_gpu_upload_shiftvec(nbv->gpu_nbv, nbv->grp[eintLocal].nbat);
990 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
993 /* do local pair search */
994 if (bNS)
996 wallcycle_start_nocount(wcycle, ewcNS);
997 wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
998 nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat,
999 &top->excls,
1000 ic->rlist,
1001 nbv->min_ci_balanced,
1002 &nbv->grp[eintLocal].nbl_lists,
1003 eintLocal,
1004 nbv->grp[eintLocal].kernel_type,
1005 nrnb);
1006 wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
1008 if (bUseGPU)
1010 /* initialize local pair-list on the GPU */
1011 nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
1012 nbv->grp[eintLocal].nbl_lists.nbl[0],
1013 eintLocal);
1015 wallcycle_stop(wcycle, ewcNS);
1017 else
1019 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1020 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1021 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x,
1022 nbv->grp[eintLocal].nbat);
1023 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1024 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1027 if (bUseGPU)
1029 wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
1030 /* launch local nonbonded F on GPU */
1031 do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
1032 nrnb, wcycle);
1033 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1036 /* Communicate coordinates and sum dipole if necessary +
1037 do non-local pair search */
1038 if (DOMAINDECOMP(cr))
1040 bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
1041 nbv->grp[eintLocal].kernel_type);
1043 if (bDiffKernels)
1045 /* With GPU+CPU non-bonded calculations we need to copy
1046 * the local coordinates to the non-local nbat struct
1047 * (in CPU format) as the non-local kernel call also
1048 * calculates the local - non-local interactions.
1050 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1051 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1052 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE, x,
1053 nbv->grp[eintNonlocal].nbat);
1054 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1055 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1058 if (bNS)
1060 wallcycle_start_nocount(wcycle, ewcNS);
1061 wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1063 if (bDiffKernels)
1065 nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat);
1068 nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat,
1069 &top->excls,
1070 ic->rlist,
1071 nbv->min_ci_balanced,
1072 &nbv->grp[eintNonlocal].nbl_lists,
1073 eintNonlocal,
1074 nbv->grp[eintNonlocal].kernel_type,
1075 nrnb);
1077 wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1079 if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_GPU)
1081 /* initialize non-local pair-list on the GPU */
1082 nbnxn_gpu_init_pairlist(nbv->gpu_nbv,
1083 nbv->grp[eintNonlocal].nbl_lists.nbl[0],
1084 eintNonlocal);
1086 wallcycle_stop(wcycle, ewcNS);
1088 else
1090 wallcycle_start(wcycle, ewcMOVEX);
1091 dd_move_x(cr->dd, box, x);
1092 wallcycle_stop(wcycle, ewcMOVEX);
1094 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1095 wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1096 nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x,
1097 nbv->grp[eintNonlocal].nbat);
1098 wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1099 cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1102 if (bUseGPU && !bDiffKernels)
1104 wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
1105 /* launch non-local nonbonded F on GPU */
1106 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
1107 nrnb, wcycle);
1108 cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1112 if (bUseGPU)
1114 /* launch D2H copy-back F */
1115 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1116 if (DOMAINDECOMP(cr) && !bDiffKernels)
1118 nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->grp[eintNonlocal].nbat,
1119 flags, eatNonlocal);
1121 nbnxn_gpu_launch_cpyback(nbv->gpu_nbv, nbv->grp[eintLocal].nbat,
1122 flags, eatLocal);
1123 cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1126 if (bStateChanged && inputrecNeedMutot(inputrec))
1128 if (PAR(cr))
1130 gmx_sumd(2*DIM, mu, cr);
1133 for (i = 0; i < 2; i++)
1135 for (j = 0; j < DIM; j++)
1137 fr->mu_tot[i][j] = mu[i*DIM + j];
1141 if (fr->efep == efepNO)
1143 copy_rvec(fr->mu_tot[0], mu_tot);
1145 else
1147 for (j = 0; j < DIM; j++)
1149 mu_tot[j] =
1150 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
1151 lambda[efptCOUL]*fr->mu_tot[1][j];
1155 /* Reset energies */
1156 reset_enerdata(enerd);
1157 clear_rvecs(SHIFTS, fr->fshift);
1159 if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
1161 wallcycle_start(wcycle, ewcPPDURINGPME);
1162 dd_force_flop_start(cr->dd, nrnb);
1165 if (inputrec->bRot)
1167 /* Enforced rotation has its own cycle counter that starts after the collective
1168 * coordinates have been communicated. It is added to ddCyclF to allow
1169 * for proper load-balancing */
1170 wallcycle_start(wcycle, ewcROT);
1171 do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
1172 wallcycle_stop(wcycle, ewcROT);
1175 /* Start the force cycle counter.
1176 * This counter is stopped after do_force_lowlevel.
1177 * No parallel communication should occur while this counter is running,
1178 * since that will interfere with the dynamic load balancing.
1180 wallcycle_start(wcycle, ewcFORCE);
1181 if (bDoForces)
1183 /* Reset forces for which the virial is calculated separately:
1184 * PME/Ewald forces if necessary */
1185 if (fr->bF_NoVirSum)
1187 if (flags & GMX_FORCE_VIRIAL)
1189 fr->f_novirsum = fr->f_novirsum_alloc;
1191 else
1193 /* We are not calculating the pressure so we do not need
1194 * a separate array for forces that do not contribute
1195 * to the pressure.
1197 fr->f_novirsum = f;
1201 if (fr->bF_NoVirSum)
1203 if (flags & GMX_FORCE_VIRIAL)
1205 if (fr->bDomDec)
1207 clear_rvecs_omp(fr->f_novirsum_n, fr->f_novirsum);
1209 else
1211 clear_rvecs_omp(homenr, fr->f_novirsum+start);
1215 /* Clear the short- and long-range forces */
1216 clear_rvecs_omp(fr->natoms_force_constr, f);
1218 clear_rvec(fr->vir_diag_posres);
1221 if (inputrec->bPull && pull_have_constraint(inputrec->pull_work))
1223 clear_pull_forces(inputrec->pull_work);
1226 /* We calculate the non-bonded forces, when done on the CPU, here.
1227 * We do this before calling do_force_lowlevel, because in that
1228 * function, the listed forces are calculated before PME, which
1229 * does communication. With this order, non-bonded and listed
1230 * force calculation imbalance can be balanced out by the domain
1231 * decomposition load balancing.
1234 if (!bUseOrEmulGPU)
1236 /* Maybe we should move this into do_force_lowlevel */
1237 do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
1238 nrnb, wcycle);
1241 if (fr->efep != efepNO)
1243 /* Calculate the local and non-local free energy interactions here.
1244 * Happens here on the CPU both with and without GPU.
1246 if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
1248 do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
1249 fr, x, f, mdatoms,
1250 inputrec->fepvals, lambda,
1251 enerd, flags, nrnb, wcycle);
1254 if (DOMAINDECOMP(cr) &&
1255 fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
1257 do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
1258 fr, x, f, mdatoms,
1259 inputrec->fepvals, lambda,
1260 enerd, flags, nrnb, wcycle);
1264 if (!bUseOrEmulGPU || bDiffKernels)
1266 int aloc;
1268 if (DOMAINDECOMP(cr))
1270 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
1271 bDiffKernels ? enbvClearFYes : enbvClearFNo,
1272 nrnb, wcycle);
1275 if (!bUseOrEmulGPU)
1277 aloc = eintLocal;
1279 else
1281 aloc = eintNonlocal;
1284 /* Add all the non-bonded force to the normal force array.
1285 * This can be split into a local and a non-local part when overlapping
1286 * communication with calculation with domain decomposition.
1288 cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1289 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1290 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1291 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f);
1292 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1293 cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1294 wallcycle_start_nocount(wcycle, ewcFORCE);
1296 /* if there are multiple fshift output buffers reduce them */
1297 if ((flags & GMX_FORCE_VIRIAL) &&
1298 nbv->grp[aloc].nbl_lists.nnbl > 1)
1300 /* This is not in a subcounter because it takes a
1301 negligible and constant-sized amount of time */
1302 nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
1303 fr->fshift);
1307 /* update QMMMrec, if necessary */
1308 if (fr->bQMMM)
1310 update_QMMMrec(cr, fr, x, mdatoms, box, top);
1313 /* Compute the bonded and non-bonded energies and optionally forces */
1314 do_force_lowlevel(fr, inputrec, &(top->idef),
1315 cr, nrnb, wcycle, mdatoms,
1316 x, hist, f, enerd, fcd, top, fr->born,
1317 bBornRadii, box,
1318 inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
1319 flags, &cycles_pme);
1321 cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1323 if (ed)
1325 do_flood(cr, inputrec, x, f, ed, box, step, bNS);
1328 if (bUseOrEmulGPU && !bDiffKernels)
1330 /* wait for non-local forces (or calculate in emulation mode) */
1331 if (DOMAINDECOMP(cr))
1333 if (bUseGPU)
1335 float cycles_tmp;
1337 wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
1338 nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
1339 flags, eatNonlocal,
1340 enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1341 fr->fshift);
1342 cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
1343 cycles_wait_gpu += cycles_tmp;
1344 cycles_force += cycles_tmp;
1346 else
1348 wallcycle_start_nocount(wcycle, ewcFORCE);
1349 do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
1350 nrnb, wcycle);
1351 cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1353 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1354 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1355 /* skip the reduction if there was no non-local work to do */
1356 if (nbv->grp[eintNonlocal].nbl_lists.nbl[0]->nsci > 0)
1358 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
1359 nbv->grp[eintNonlocal].nbat, f);
1361 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1362 cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1366 if (bDoForces && DOMAINDECOMP(cr))
1368 if (bUseGPU)
1370 /* We are done with the CPU compute, but the GPU local non-bonded
1371 * kernel can still be running while we communicate the forces.
1372 * We start a counter here, so we can, hopefully, time the rest
1373 * of the GPU kernel execution and data transfer.
1375 cycleCountBeforeLocalWorkCompletes = gmx_cycles_read();
1378 /* Communicate the forces */
1379 wallcycle_start(wcycle, ewcMOVEF);
1380 dd_move_f(cr->dd, f, fr->fshift);
1381 wallcycle_stop(wcycle, ewcMOVEF);
1384 if (bUseOrEmulGPU)
1386 /* wait for local forces (or calculate in emulation mode) */
1387 if (bUseGPU)
1389 float cycles_tmp, cycles_wait_est;
1390 /* Measured overhead on CUDA and OpenCL with(out) GPU sharing
1391 * is between 0.5 and 1.5 Mcycles. So 2 MCycles is an overestimate,
1392 * but even with a step of 0.1 ms the difference is less than 1%
1393 * of the step time.
1395 const float gpuWaitApiOverheadMargin = 2e6f; /* cycles */
1397 wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
1398 nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
1399 flags, eatLocal,
1400 enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1401 fr->fshift);
1402 cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
1404 if (bDoForces && DOMAINDECOMP(cr))
1406 cycles_wait_est = gmx_cycles_read() - cycleCountBeforeLocalWorkCompletes;
1408 if (cycles_tmp < gpuWaitApiOverheadMargin)
1410 /* We measured few cycles, it could be that the kernel
1411 * and transfer finished earlier and there was no actual
1412 * wait time, only API call overhead.
1413 * Then the actual time could be anywhere between 0 and
1414 * cycles_wait_est. As a compromise, we use half the time.
1416 cycles_wait_est *= 0.5f;
1419 else
1421 /* No force communication so we actually timed the wait */
1422 cycles_wait_est = cycles_tmp;
1424 /* Even though this is after dd_move_f, the actual task we are
1425 * waiting for runs asynchronously with dd_move_f and we usually
1426 * have nothing to balance it with, so we can and should add
1427 * the time to the force time for load balancing.
1429 cycles_force += cycles_wait_est;
1430 cycles_wait_gpu += cycles_wait_est;
1432 /* now clear the GPU outputs while we finish the step on the CPU */
1433 wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1434 nbnxn_gpu_clear_outputs(nbv->gpu_nbv, flags);
1435 wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1437 else
1439 wallcycle_start_nocount(wcycle, ewcFORCE);
1440 do_nb_verlet(fr, ic, enerd, flags, eintLocal,
1441 DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
1442 nrnb, wcycle);
1443 wallcycle_stop(wcycle, ewcFORCE);
1445 wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1446 wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1447 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
1448 nbv->grp[eintLocal].nbat, f);
1449 wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1450 wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1453 if (DOMAINDECOMP(cr))
1455 dd_force_flop_stop(cr->dd, nrnb);
1456 if (wcycle)
1458 dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
1459 if (bUseGPU)
1461 dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU);
1466 if (bDoForces)
1468 if (inputrecElecField(inputrec))
1470 /* Compute forces due to electric field */
1471 calc_f_el(MASTER(cr) ? field : NULL,
1472 start, homenr, mdatoms->chargeA, fr->f_novirsum,
1473 inputrec->ex, inputrec->et, t);
1476 /* If we have NoVirSum forces, but we do not calculate the virial,
1477 * we sum fr->f_novirsum=f later.
1479 if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
1481 wallcycle_start(wcycle, ewcVSITESPREAD);
1482 spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
1483 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1484 wallcycle_stop(wcycle, ewcVSITESPREAD);
1487 if (flags & GMX_FORCE_VIRIAL)
1489 /* Calculation of the virial must be done after vsites! */
1490 calc_virial(0, mdatoms->homenr, x, f,
1491 vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1495 if (inputrec->bPull && pull_have_potential(inputrec->pull_work))
1497 /* Since the COM pulling is always done mass-weighted, no forces are
1498 * applied to vsites and this call can be done after vsite spreading.
1500 pull_potential_wrapper(cr, inputrec, box, x,
1501 f, vir_force, mdatoms, enerd, lambda, t,
1502 wcycle);
1505 /* Add the forces from enforced rotation potentials (if any) */
1506 if (inputrec->bRot)
1508 wallcycle_start(wcycle, ewcROTadd);
1509 enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
1510 wallcycle_stop(wcycle, ewcROTadd);
1513 /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
1514 IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
1516 if (PAR(cr) && !(cr->duty & DUTY_PME))
1518 /* In case of node-splitting, the PP nodes receive the long-range
1519 * forces, virial and energy from the PME nodes here.
1521 pme_receive_force_ener(cr, wcycle, enerd, fr);
1524 if (bDoForces)
1526 post_process_forces(cr, step, nrnb, wcycle,
1527 top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1528 flags);
1531 if (flags & GMX_FORCE_ENERGY)
1533 /* Sum the potential energy terms from group contributions */
1534 sum_epot(&(enerd->grpp), enerd->term);
1536 if (!EI_TPI(inputrec->eI))
1538 checkPotentialEnergyValidity(enerd);
1543 void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
1544 t_inputrec *inputrec,
1545 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1546 gmx_localtop_t *top,
1547 gmx_groups_t *groups,
1548 matrix box, rvec x[], history_t *hist,
1549 rvec f[],
1550 tensor vir_force,
1551 t_mdatoms *mdatoms,
1552 gmx_enerdata_t *enerd, t_fcdata *fcd,
1553 real *lambda, t_graph *graph,
1554 t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
1555 double t, FILE *field, gmx_edsam_t ed,
1556 gmx_bool bBornRadii,
1557 int flags)
1559 int cg0, cg1, i, j;
1560 int start, homenr;
1561 double mu[2*DIM];
1562 gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
1563 gmx_bool bDoForces;
1564 float cycles_pme, cycles_force;
1566 start = 0;
1567 homenr = mdatoms->homenr;
1569 clear_mat(vir_force);
1571 cg0 = 0;
1572 if (DOMAINDECOMP(cr))
1574 cg1 = cr->dd->ncg_tot;
1576 else
1578 cg1 = top->cgs.nr;
1580 if (fr->n_tpi > 0)
1582 cg1--;
1585 bStateChanged = (flags & GMX_FORCE_STATECHANGED);
1586 bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
1587 /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
1588 bFillGrid = (bNS && bStateChanged);
1589 bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
1590 bDoForces = (flags & GMX_FORCE_FORCES);
1592 if (bStateChanged)
1594 update_forcerec(fr, box);
1596 if (inputrecNeedMutot(inputrec))
1598 /* Calculate total (local) dipole moment in a temporary common array.
1599 * This makes it possible to sum them over nodes faster.
1601 calc_mu(start, homenr,
1602 x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
1603 mu, mu+DIM);
1607 if (fr->ePBC != epbcNONE)
1609 /* Compute shift vectors every step,
1610 * because of pressure coupling or box deformation!
1612 if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
1614 calc_shifts(box, fr->shift_vec);
1617 if (bCalcCGCM)
1619 put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
1620 &(top->cgs), x, fr->cg_cm);
1621 inc_nrnb(nrnb, eNR_CGCM, homenr);
1622 inc_nrnb(nrnb, eNR_RESETX, cg1-cg0);
1624 else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
1626 unshift_self(graph, box, x);
1629 else if (bCalcCGCM)
1631 calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
1632 inc_nrnb(nrnb, eNR_CGCM, homenr);
1635 if (bCalcCGCM && gmx_debug_at)
1637 pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
1640 #if GMX_MPI
1641 if (!(cr->duty & DUTY_PME))
1643 gmx_bool bBS;
1644 matrix boxs;
1646 /* Send particle coordinates to the pme nodes.
1647 * Since this is only implemented for domain decomposition
1648 * and domain decomposition does not use the graph,
1649 * we do not need to worry about shifting.
1652 int pme_flags = 0;
1654 wallcycle_start(wcycle, ewcPP_PMESENDX);
1656 bBS = (inputrec->nwall == 2);
1657 if (bBS)
1659 copy_mat(box, boxs);
1660 svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
1663 if (EEL_PME(fr->eeltype))
1665 pme_flags |= GMX_PME_DO_COULOMB;
1668 if (EVDW_PME(fr->vdwtype))
1670 pme_flags |= GMX_PME_DO_LJ;
1673 gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
1674 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
1675 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
1676 pme_flags, step);
1678 wallcycle_stop(wcycle, ewcPP_PMESENDX);
1680 #endif /* GMX_MPI */
1682 /* Communicate coordinates and sum dipole if necessary */
1683 if (DOMAINDECOMP(cr))
1685 wallcycle_start(wcycle, ewcMOVEX);
1686 dd_move_x(cr->dd, box, x);
1687 wallcycle_stop(wcycle, ewcMOVEX);
1690 if (inputrecNeedMutot(inputrec))
1692 if (bStateChanged)
1694 if (PAR(cr))
1696 gmx_sumd(2*DIM, mu, cr);
1698 for (i = 0; i < 2; i++)
1700 for (j = 0; j < DIM; j++)
1702 fr->mu_tot[i][j] = mu[i*DIM + j];
1706 if (fr->efep == efepNO)
1708 copy_rvec(fr->mu_tot[0], mu_tot);
1710 else
1712 for (j = 0; j < DIM; j++)
1714 mu_tot[j] =
1715 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
1720 /* Reset energies */
1721 reset_enerdata(enerd);
1722 clear_rvecs(SHIFTS, fr->fshift);
1724 if (bNS)
1726 wallcycle_start(wcycle, ewcNS);
1728 if (graph && bStateChanged)
1730 /* Calculate intramolecular shift vectors to make molecules whole */
1731 mk_mshift(fplog, graph, fr->ePBC, box, x);
1734 /* Do the actual neighbour searching */
1735 ns(fplog, fr, box,
1736 groups, top, mdatoms,
1737 cr, nrnb, bFillGrid);
1739 wallcycle_stop(wcycle, ewcNS);
1742 if (inputrec->implicit_solvent && bNS)
1744 make_gb_nblist(cr, inputrec->gb_algorithm,
1745 x, box, fr, &top->idef, graph, fr->born);
1748 if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
1750 wallcycle_start(wcycle, ewcPPDURINGPME);
1751 dd_force_flop_start(cr->dd, nrnb);
1754 if (inputrec->bRot)
1756 /* Enforced rotation has its own cycle counter that starts after the collective
1757 * coordinates have been communicated. It is added to ddCyclF to allow
1758 * for proper load-balancing */
1759 wallcycle_start(wcycle, ewcROT);
1760 do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
1761 wallcycle_stop(wcycle, ewcROT);
1764 /* Start the force cycle counter.
1765 * This counter is stopped after do_force_lowlevel.
1766 * No parallel communication should occur while this counter is running,
1767 * since that will interfere with the dynamic load balancing.
1769 wallcycle_start(wcycle, ewcFORCE);
1771 if (bDoForces)
1773 /* Reset forces for which the virial is calculated separately:
1774 * PME/Ewald forces if necessary */
1775 if (fr->bF_NoVirSum)
1777 if (flags & GMX_FORCE_VIRIAL)
1779 fr->f_novirsum = fr->f_novirsum_alloc;
1780 if (fr->bDomDec)
1782 clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
1784 else
1786 clear_rvecs(homenr, fr->f_novirsum+start);
1789 else
1791 /* We are not calculating the pressure so we do not need
1792 * a separate array for forces that do not contribute
1793 * to the pressure.
1795 fr->f_novirsum = f;
1799 /* Clear the short- and long-range forces */
1800 clear_rvecs(fr->natoms_force_constr, f);
1802 clear_rvec(fr->vir_diag_posres);
1804 if (inputrec->bPull && pull_have_constraint(inputrec->pull_work))
1806 clear_pull_forces(inputrec->pull_work);
1809 /* update QMMMrec, if necessary */
1810 if (fr->bQMMM)
1812 update_QMMMrec(cr, fr, x, mdatoms, box, top);
1815 /* Compute the bonded and non-bonded energies and optionally forces */
1816 do_force_lowlevel(fr, inputrec, &(top->idef),
1817 cr, nrnb, wcycle, mdatoms,
1818 x, hist, f, enerd, fcd, top, fr->born,
1819 bBornRadii, box,
1820 inputrec->fepvals, lambda,
1821 graph, &(top->excls), fr->mu_tot,
1822 flags,
1823 &cycles_pme);
1825 cycles_force = wallcycle_stop(wcycle, ewcFORCE);
1827 if (ed)
1829 do_flood(cr, inputrec, x, f, ed, box, step, bNS);
1832 if (DOMAINDECOMP(cr))
1834 dd_force_flop_stop(cr->dd, nrnb);
1835 if (wcycle)
1837 dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
1841 if (bDoForces)
1843 if (inputrecElecField(inputrec))
1845 /* Compute forces due to electric field */
1846 calc_f_el(MASTER(cr) ? field : NULL,
1847 start, homenr, mdatoms->chargeA, fr->f_novirsum,
1848 inputrec->ex, inputrec->et, t);
1851 /* Communicate the forces */
1852 if (DOMAINDECOMP(cr))
1854 wallcycle_start(wcycle, ewcMOVEF);
1855 dd_move_f(cr->dd, f, fr->fshift);
1856 /* Do we need to communicate the separate force array
1857 * for terms that do not contribute to the single sum virial?
1858 * Position restraints and electric fields do not introduce
1859 * inter-cg forces, only full electrostatics methods do.
1860 * When we do not calculate the virial, fr->f_novirsum = f,
1861 * so we have already communicated these forces.
1863 if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
1864 (flags & GMX_FORCE_VIRIAL))
1866 dd_move_f(cr->dd, fr->f_novirsum, NULL);
1868 wallcycle_stop(wcycle, ewcMOVEF);
1871 /* If we have NoVirSum forces, but we do not calculate the virial,
1872 * we sum fr->f_novirsum=f later.
1874 if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
1876 wallcycle_start(wcycle, ewcVSITESPREAD);
1877 spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
1878 &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1879 wallcycle_stop(wcycle, ewcVSITESPREAD);
1882 if (flags & GMX_FORCE_VIRIAL)
1884 /* Calculation of the virial must be done after vsites! */
1885 calc_virial(0, mdatoms->homenr, x, f,
1886 vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1890 if (inputrec->bPull && pull_have_potential(inputrec->pull_work))
1892 pull_potential_wrapper(cr, inputrec, box, x,
1893 f, vir_force, mdatoms, enerd, lambda, t,
1894 wcycle);
1897 /* Add the forces from enforced rotation potentials (if any) */
1898 if (inputrec->bRot)
1900 wallcycle_start(wcycle, ewcROTadd);
1901 enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
1902 wallcycle_stop(wcycle, ewcROTadd);
1905 /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
1906 IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
1908 if (PAR(cr) && !(cr->duty & DUTY_PME))
1910 /* In case of node-splitting, the PP nodes receive the long-range
1911 * forces, virial and energy from the PME nodes here.
1913 pme_receive_force_ener(cr, wcycle, enerd, fr);
1916 if (bDoForces)
1918 post_process_forces(cr, step, nrnb, wcycle,
1919 top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1920 flags);
1923 if (flags & GMX_FORCE_ENERGY)
1925 /* Sum the potential energy terms from group contributions */
1926 sum_epot(&(enerd->grpp), enerd->term);
1928 if (!EI_TPI(inputrec->eI))
1930 checkPotentialEnergyValidity(enerd);
1936 void do_force(FILE *fplog, t_commrec *cr,
1937 t_inputrec *inputrec,
1938 gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1939 gmx_localtop_t *top,
1940 gmx_groups_t *groups,
1941 matrix box, rvec x[], history_t *hist,
1942 rvec f[],
1943 tensor vir_force,
1944 t_mdatoms *mdatoms,
1945 gmx_enerdata_t *enerd, t_fcdata *fcd,
1946 real *lambda, t_graph *graph,
1947 t_forcerec *fr,
1948 gmx_vsite_t *vsite, rvec mu_tot,
1949 double t, FILE *field, gmx_edsam_t ed,
1950 gmx_bool bBornRadii,
1951 int flags)
1953 /* modify force flag if not doing nonbonded */
1954 if (!fr->bNonbonded)
1956 flags &= ~GMX_FORCE_NONBONDED;
1959 switch (inputrec->cutoff_scheme)
1961 case ecutsVERLET:
1962 do_force_cutsVERLET(fplog, cr, inputrec,
1963 step, nrnb, wcycle,
1964 top,
1965 groups,
1966 box, x, hist,
1967 f, vir_force,
1968 mdatoms,
1969 enerd, fcd,
1970 lambda, graph,
1971 fr, fr->ic,
1972 vsite, mu_tot,
1973 t, field, ed,
1974 bBornRadii,
1975 flags);
1976 break;
1977 case ecutsGROUP:
1978 do_force_cutsGROUP(fplog, cr, inputrec,
1979 step, nrnb, wcycle,
1980 top,
1981 groups,
1982 box, x, hist,
1983 f, vir_force,
1984 mdatoms,
1985 enerd, fcd,
1986 lambda, graph,
1987 fr, vsite, mu_tot,
1988 t, field, ed,
1989 bBornRadii,
1990 flags);
1991 break;
1992 default:
1993 gmx_incons("Invalid cut-off scheme passed!");
1998 void do_constrain_first(FILE *fplog, gmx_constr_t constr,
1999 t_inputrec *ir, t_mdatoms *md,
2000 t_state *state, t_commrec *cr, t_nrnb *nrnb,
2001 t_forcerec *fr, gmx_localtop_t *top)
2003 int i, m, start, end;
2004 gmx_int64_t step;
2005 real dt = ir->delta_t;
2006 real dvdl_dum;
2007 rvec *savex;
2009 /* We need to allocate one element extra, since we might use
2010 * (unaligned) 4-wide SIMD loads to access rvec entries.
2012 snew(savex, state->natoms + 1);
2014 start = 0;
2015 end = md->homenr;
2017 if (debug)
2019 fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
2020 start, md->homenr, end);
2022 /* Do a first constrain to reset particles... */
2023 step = ir->init_step;
2024 if (fplog)
2026 char buf[STEPSTRSIZE];
2027 fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
2028 gmx_step_str(step, buf));
2030 dvdl_dum = 0;
2032 /* constrain the current position */
2033 constrain(NULL, TRUE, FALSE, constr, &(top->idef),
2034 ir, cr, step, 0, 1.0, md,
2035 state->x, state->x, NULL,
2036 fr->bMolPBC, state->box,
2037 state->lambda[efptBONDED], &dvdl_dum,
2038 NULL, NULL, nrnb, econqCoord);
2039 if (EI_VV(ir->eI))
2041 /* constrain the inital velocity, and save it */
2042 /* also may be useful if we need the ekin from the halfstep for velocity verlet */
2043 constrain(NULL, TRUE, FALSE, constr, &(top->idef),
2044 ir, cr, step, 0, 1.0, md,
2045 state->x, state->v, state->v,
2046 fr->bMolPBC, state->box,
2047 state->lambda[efptBONDED], &dvdl_dum,
2048 NULL, NULL, nrnb, econqVeloc);
2050 /* constrain the inital velocities at t-dt/2 */
2051 if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV)
2053 for (i = start; (i < end); i++)
2055 for (m = 0; (m < DIM); m++)
2057 /* Reverse the velocity */
2058 state->v[i][m] = -state->v[i][m];
2059 /* Store the position at t-dt in buf */
2060 savex[i][m] = state->x[i][m] + dt*state->v[i][m];
2063 /* Shake the positions at t=-dt with the positions at t=0
2064 * as reference coordinates.
2066 if (fplog)
2068 char buf[STEPSTRSIZE];
2069 fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
2070 gmx_step_str(step, buf));
2072 dvdl_dum = 0;
2073 constrain(NULL, TRUE, FALSE, constr, &(top->idef),
2074 ir, cr, step, -1, 1.0, md,
2075 state->x, savex, NULL,
2076 fr->bMolPBC, state->box,
2077 state->lambda[efptBONDED], &dvdl_dum,
2078 state->v, NULL, nrnb, econqCoord);
2080 for (i = start; i < end; i++)
2082 for (m = 0; m < DIM; m++)
2084 /* Re-reverse the velocities */
2085 state->v[i][m] = -state->v[i][m];
2089 sfree(savex);
2093 static void
2094 integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
2095 double *enerout, double *virout)
2097 double enersum, virsum;
2098 double invscale, invscale2, invscale3;
2099 double r, ea, eb, ec, pa, pb, pc, pd;
2100 double y0, f, g, h;
2101 int ri, offset;
2102 double tabfactor;
2104 invscale = 1.0/scale;
2105 invscale2 = invscale*invscale;
2106 invscale3 = invscale*invscale2;
2108 /* Following summation derived from cubic spline definition,
2109 * Numerical Recipies in C, second edition, p. 113-116. Exact for
2110 * the cubic spline. We first calculate the negative of the
2111 * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
2112 * add the more standard, abrupt cutoff correction to that result,
2113 * yielding the long-range correction for a switched function. We
2114 * perform both the pressure and energy loops at the same time for
2115 * simplicity, as the computational cost is low. */
2117 if (offstart == 0)
2119 /* Since the dispersion table has been scaled down a factor
2120 * 6.0 and the repulsion a factor 12.0 to compensate for the
2121 * c6/c12 parameters inside nbfp[] being scaled up (to save
2122 * flops in kernels), we need to correct for this.
2124 tabfactor = 6.0;
2126 else
2128 tabfactor = 12.0;
2131 enersum = 0.0;
2132 virsum = 0.0;
2133 for (ri = rstart; ri < rend; ++ri)
2135 r = ri*invscale;
2136 ea = invscale3;
2137 eb = 2.0*invscale2*r;
2138 ec = invscale*r*r;
2140 pa = invscale3;
2141 pb = 3.0*invscale2*r;
2142 pc = 3.0*invscale*r*r;
2143 pd = r*r*r;
2145 /* this "8" is from the packing in the vdwtab array - perhaps
2146 should be defined? */
2148 offset = 8*ri + offstart;
2149 y0 = vdwtab[offset];
2150 f = vdwtab[offset+1];
2151 g = vdwtab[offset+2];
2152 h = vdwtab[offset+3];
2154 enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
2155 virsum += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
2157 *enerout = 4.0*M_PI*enersum*tabfactor;
2158 *virout = 4.0*M_PI*virsum*tabfactor;
2161 void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr)
2163 double eners[2], virs[2], enersum, virsum;
2164 double r0, rc3, rc9;
2165 int ri0, ri1, i;
2166 real scale, *vdwtab;
2168 fr->enershiftsix = 0;
2169 fr->enershifttwelve = 0;
2170 fr->enerdiffsix = 0;
2171 fr->enerdifftwelve = 0;
2172 fr->virdiffsix = 0;
2173 fr->virdifftwelve = 0;
2175 if (eDispCorr != edispcNO)
2177 for (i = 0; i < 2; i++)
2179 eners[i] = 0;
2180 virs[i] = 0;
2182 if ((fr->vdw_modifier == eintmodPOTSHIFT) ||
2183 (fr->vdw_modifier == eintmodPOTSWITCH) ||
2184 (fr->vdw_modifier == eintmodFORCESWITCH) ||
2185 (fr->vdwtype == evdwSHIFT) ||
2186 (fr->vdwtype == evdwSWITCH))
2188 if (((fr->vdw_modifier == eintmodPOTSWITCH) ||
2189 (fr->vdw_modifier == eintmodFORCESWITCH) ||
2190 (fr->vdwtype == evdwSWITCH)) && fr->rvdw_switch == 0)
2192 gmx_fatal(FARGS,
2193 "With dispersion correction rvdw-switch can not be zero "
2194 "for vdw-type = %s", evdw_names[fr->vdwtype]);
2197 /* TODO This code depends on the logic in tables.c that
2198 constructs the table layout, which should be made
2199 explicit in future cleanup. */
2200 GMX_ASSERT(fr->dispersionCorrectionTable->interaction == GMX_TABLE_INTERACTION_VDWREP_VDWDISP,
2201 "Dispersion-correction code needs a table with both repulsion and dispersion terms");
2202 scale = fr->dispersionCorrectionTable->scale;
2203 vdwtab = fr->dispersionCorrectionTable->data;
2205 /* Round the cut-offs to exact table values for precision */
2206 ri0 = static_cast<int>(floor(fr->rvdw_switch*scale));
2207 ri1 = static_cast<int>(ceil(fr->rvdw*scale));
2209 /* The code below has some support for handling force-switching, i.e.
2210 * when the force (instead of potential) is switched over a limited
2211 * region. This leads to a constant shift in the potential inside the
2212 * switching region, which we can handle by adding a constant energy
2213 * term in the force-switch case just like when we do potential-shift.
2215 * For now this is not enabled, but to keep the functionality in the
2216 * code we check separately for switch and shift. When we do force-switch
2217 * the shifting point is rvdw_switch, while it is the cutoff when we
2218 * have a classical potential-shift.
2220 * For a pure potential-shift the potential has a constant shift
2221 * all the way out to the cutoff, and that is it. For other forms
2222 * we need to calculate the constant shift up to the point where we
2223 * start modifying the potential.
2225 ri0 = (fr->vdw_modifier == eintmodPOTSHIFT) ? ri1 : ri0;
2227 r0 = ri0/scale;
2228 rc3 = r0*r0*r0;
2229 rc9 = rc3*rc3*rc3;
2231 if ((fr->vdw_modifier == eintmodFORCESWITCH) ||
2232 (fr->vdwtype == evdwSHIFT))
2234 /* Determine the constant energy shift below rvdw_switch.
2235 * Table has a scale factor since we have scaled it down to compensate
2236 * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
2238 fr->enershiftsix = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
2239 fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
2241 else if (fr->vdw_modifier == eintmodPOTSHIFT)
2243 fr->enershiftsix = (real)(-1.0/(rc3*rc3));
2244 fr->enershifttwelve = (real)( 1.0/(rc9*rc3));
2247 /* Add the constant part from 0 to rvdw_switch.
2248 * This integration from 0 to rvdw_switch overcounts the number
2249 * of interactions by 1, as it also counts the self interaction.
2250 * We will correct for this later.
2252 eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
2253 eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
2255 /* Calculate the contribution in the range [r0,r1] where we
2256 * modify the potential. For a pure potential-shift modifier we will
2257 * have ri0==ri1, and there will not be any contribution here.
2259 for (i = 0; i < 2; i++)
2261 enersum = 0;
2262 virsum = 0;
2263 integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
2264 eners[i] -= enersum;
2265 virs[i] -= virsum;
2268 /* Alright: Above we compensated by REMOVING the parts outside r0
2269 * corresponding to the ideal VdW 1/r6 and /r12 potentials.
2271 * Regardless of whether r0 is the point where we start switching,
2272 * or the cutoff where we calculated the constant shift, we include
2273 * all the parts we are missing out to infinity from r0 by
2274 * calculating the analytical dispersion correction.
2276 eners[0] += -4.0*M_PI/(3.0*rc3);
2277 eners[1] += 4.0*M_PI/(9.0*rc9);
2278 virs[0] += 8.0*M_PI/rc3;
2279 virs[1] += -16.0*M_PI/(3.0*rc9);
2281 else if (fr->vdwtype == evdwCUT ||
2282 EVDW_PME(fr->vdwtype) ||
2283 fr->vdwtype == evdwUSER)
2285 if (fr->vdwtype == evdwUSER && fplog)
2287 fprintf(fplog,
2288 "WARNING: using dispersion correction with user tables\n");
2291 /* Note that with LJ-PME, the dispersion correction is multiplied
2292 * by the difference between the actual C6 and the value of C6
2293 * that would produce the combination rule.
2294 * This means the normal energy and virial difference formulas
2295 * can be used here.
2298 rc3 = fr->rvdw*fr->rvdw*fr->rvdw;
2299 rc9 = rc3*rc3*rc3;
2300 /* Contribution beyond the cut-off */
2301 eners[0] += -4.0*M_PI/(3.0*rc3);
2302 eners[1] += 4.0*M_PI/(9.0*rc9);
2303 if (fr->vdw_modifier == eintmodPOTSHIFT)
2305 /* Contribution within the cut-off */
2306 eners[0] += -4.0*M_PI/(3.0*rc3);
2307 eners[1] += 4.0*M_PI/(3.0*rc9);
2309 /* Contribution beyond the cut-off */
2310 virs[0] += 8.0*M_PI/rc3;
2311 virs[1] += -16.0*M_PI/(3.0*rc9);
2313 else
2315 gmx_fatal(FARGS,
2316 "Dispersion correction is not implemented for vdw-type = %s",
2317 evdw_names[fr->vdwtype]);
2320 /* When we deprecate the group kernels the code below can go too */
2321 if (fr->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP)
2323 /* Calculate self-interaction coefficient (assuming that
2324 * the reciprocal-space contribution is constant in the
2325 * region that contributes to the self-interaction).
2327 fr->enershiftsix = gmx::power6(fr->ewaldcoeff_lj) / 6.0;
2329 eners[0] += -gmx::power3(std::sqrt(M_PI)*fr->ewaldcoeff_lj)/3.0;
2330 virs[0] += gmx::power3(std::sqrt(M_PI)*fr->ewaldcoeff_lj);
2333 fr->enerdiffsix = eners[0];
2334 fr->enerdifftwelve = eners[1];
2335 /* The 0.5 is due to the Gromacs definition of the virial */
2336 fr->virdiffsix = 0.5*virs[0];
2337 fr->virdifftwelve = 0.5*virs[1];
2341 void calc_dispcorr(t_inputrec *ir, t_forcerec *fr,
2342 matrix box, real lambda, tensor pres, tensor virial,
2343 real *prescorr, real *enercorr, real *dvdlcorr)
2345 gmx_bool bCorrAll, bCorrPres;
2346 real dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
2347 int m;
2349 *prescorr = 0;
2350 *enercorr = 0;
2351 *dvdlcorr = 0;
2353 clear_mat(virial);
2354 clear_mat(pres);
2356 if (ir->eDispCorr != edispcNO)
2358 bCorrAll = (ir->eDispCorr == edispcAllEner ||
2359 ir->eDispCorr == edispcAllEnerPres);
2360 bCorrPres = (ir->eDispCorr == edispcEnerPres ||
2361 ir->eDispCorr == edispcAllEnerPres);
2363 invvol = 1/det(box);
2364 if (fr->n_tpi)
2366 /* Only correct for the interactions with the inserted molecule */
2367 dens = (fr->numAtomsForDispersionCorrection - fr->n_tpi)*invvol;
2368 ninter = fr->n_tpi;
2370 else
2372 dens = fr->numAtomsForDispersionCorrection*invvol;
2373 ninter = 0.5*fr->numAtomsForDispersionCorrection;
2376 if (ir->efep == efepNO)
2378 avcsix = fr->avcsix[0];
2379 avctwelve = fr->avctwelve[0];
2381 else
2383 avcsix = (1 - lambda)*fr->avcsix[0] + lambda*fr->avcsix[1];
2384 avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
2387 enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
2388 *enercorr += avcsix*enerdiff;
2389 dvdlambda = 0.0;
2390 if (ir->efep != efepNO)
2392 dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
2394 if (bCorrAll)
2396 enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
2397 *enercorr += avctwelve*enerdiff;
2398 if (fr->efep != efepNO)
2400 dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
2404 if (bCorrPres)
2406 svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
2407 if (ir->eDispCorr == edispcAllEnerPres)
2409 svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
2411 /* The factor 2 is because of the Gromacs virial definition */
2412 spres = -2.0*invvol*svir*PRESFAC;
2414 for (m = 0; m < DIM; m++)
2416 virial[m][m] += svir;
2417 pres[m][m] += spres;
2419 *prescorr += spres;
2422 /* Can't currently control when it prints, for now, just print when degugging */
2423 if (debug)
2425 if (bCorrAll)
2427 fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
2428 avcsix, avctwelve);
2430 if (bCorrPres)
2432 fprintf(debug,
2433 "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
2434 *enercorr, spres, svir);
2436 else
2438 fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
2442 if (fr->efep != efepNO)
2444 *dvdlcorr += dvdlambda;
2449 void do_pbc_first(FILE *fplog, matrix box, t_forcerec *fr,
2450 t_graph *graph, rvec x[])
2452 if (fplog)
2454 fprintf(fplog, "Removing pbc first time\n");
2456 calc_shifts(box, fr->shift_vec);
2457 if (graph)
2459 mk_mshift(fplog, graph, fr->ePBC, box, x);
2460 if (gmx_debug_at)
2462 p_graph(debug, "do_pbc_first 1", graph);
2464 shift_self(graph, box, x);
2465 /* By doing an extra mk_mshift the molecules that are broken
2466 * because they were e.g. imported from another software
2467 * will be made whole again. Such are the healing powers
2468 * of GROMACS.
2470 mk_mshift(fplog, graph, fr->ePBC, box, x);
2471 if (gmx_debug_at)
2473 p_graph(debug, "do_pbc_first 2", graph);
2476 if (fplog)
2478 fprintf(fplog, "Done rmpbc\n");
2482 static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
2483 const gmx_mtop_t *mtop, rvec x[],
2484 gmx_bool bFirst)
2486 t_graph *graph;
2487 int mb, as, mol;
2488 gmx_molblock_t *molb;
2490 if (bFirst && fplog)
2492 fprintf(fplog, "Removing pbc first time\n");
2495 snew(graph, 1);
2496 as = 0;
2497 for (mb = 0; mb < mtop->nmolblock; mb++)
2499 molb = &mtop->molblock[mb];
2500 if (molb->natoms_mol == 1 ||
2501 (!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
2503 /* Just one atom or charge group in the molecule, no PBC required */
2504 as += molb->nmol*molb->natoms_mol;
2506 else
2508 /* Pass NULL iso fplog to avoid graph prints for each molecule type */
2509 mk_graph_ilist(NULL, mtop->moltype[molb->type].ilist,
2510 0, molb->natoms_mol, FALSE, FALSE, graph);
2512 for (mol = 0; mol < molb->nmol; mol++)
2514 mk_mshift(fplog, graph, ePBC, box, x+as);
2516 shift_self(graph, box, x+as);
2517 /* The molecule is whole now.
2518 * We don't need the second mk_mshift call as in do_pbc_first,
2519 * since we no longer need this graph.
2522 as += molb->natoms_mol;
2524 done_graph(graph);
2527 sfree(graph);
2530 void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
2531 const gmx_mtop_t *mtop, rvec x[])
2533 low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE);
2536 void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
2537 gmx_mtop_t *mtop, rvec x[])
2539 low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE);
2542 void put_atoms_in_box_omp(int ePBC, const matrix box, int natoms, rvec x[])
2544 int t, nth;
2545 nth = gmx_omp_nthreads_get(emntDefault);
2547 #pragma omp parallel for num_threads(nth) schedule(static)
2548 for (t = 0; t < nth; t++)
2552 int offset, len;
2554 offset = (natoms*t )/nth;
2555 len = (natoms*(t + 1))/nth - offset;
2556 put_atoms_in_box(ePBC, box, len, x + offset);
2558 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
2562 // TODO This can be cleaned up a lot, and move back to runner.cpp
2563 void finish_run(FILE *fplog, t_commrec *cr,
2564 t_inputrec *inputrec,
2565 t_nrnb nrnb[], gmx_wallcycle_t wcycle,
2566 gmx_walltime_accounting_t walltime_accounting,
2567 nonbonded_verlet_t *nbv,
2568 gmx_bool bWriteStat)
2570 t_nrnb *nrnb_tot = NULL;
2571 double delta_t = 0;
2572 double nbfs = 0, mflop = 0;
2573 double elapsed_time,
2574 elapsed_time_over_all_ranks,
2575 elapsed_time_over_all_threads,
2576 elapsed_time_over_all_threads_over_all_ranks;
2577 /* Control whether it is valid to print a report. Only the
2578 simulation master may print, but it should not do so if the run
2579 terminated e.g. before a scheduled reset step. This is
2580 complicated by the fact that PME ranks are unaware of the
2581 reason why they were sent a pmerecvqxFINISH. To avoid
2582 communication deadlocks, we always do the communication for the
2583 report, even if we've decided not to write the report, because
2584 how long it takes to finish the run is not important when we've
2585 decided not to report on the simulation performance. */
2586 bool printReport = SIMMASTER(cr);
2588 if (!walltime_accounting_get_valid_finish(walltime_accounting))
2590 md_print_warn(cr, fplog,
2591 "Simulation ended prematurely, no performance report will be written.");
2592 printReport = false;
2595 if (cr->nnodes > 1)
2597 snew(nrnb_tot, 1);
2598 #if GMX_MPI
2599 MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
2600 cr->mpi_comm_mysim);
2601 #endif
2603 else
2605 nrnb_tot = nrnb;
2608 elapsed_time = walltime_accounting_get_elapsed_time(walltime_accounting);
2609 elapsed_time_over_all_ranks = elapsed_time;
2610 elapsed_time_over_all_threads = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
2611 elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
2612 #if GMX_MPI
2613 if (cr->nnodes > 1)
2615 /* reduce elapsed_time over all MPI ranks in the current simulation */
2616 MPI_Allreduce(&elapsed_time,
2617 &elapsed_time_over_all_ranks,
2618 1, MPI_DOUBLE, MPI_SUM,
2619 cr->mpi_comm_mysim);
2620 elapsed_time_over_all_ranks /= cr->nnodes;
2621 /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
2622 * current simulation. */
2623 MPI_Allreduce(&elapsed_time_over_all_threads,
2624 &elapsed_time_over_all_threads_over_all_ranks,
2625 1, MPI_DOUBLE, MPI_SUM,
2626 cr->mpi_comm_mysim);
2628 #endif
2630 if (printReport)
2632 print_flop(fplog, nrnb_tot, &nbfs, &mflop);
2634 if (cr->nnodes > 1)
2636 sfree(nrnb_tot);
2639 if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
2641 print_dd_statistics(cr, inputrec, fplog);
2644 /* TODO Move the responsibility for any scaling by thread counts
2645 * to the code that handled the thread region, so that there's a
2646 * mechanism to keep cycle counting working during the transition
2647 * to task parallelism. */
2648 int nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);
2649 int nthreads_pme = gmx_omp_nthreads_get(emntPME);
2650 wallcycle_scale_by_num_threads(wcycle, cr->duty == DUTY_PME, nthreads_pp, nthreads_pme);
2651 auto cycle_sum(wallcycle_sum(cr, wcycle));
2653 if (printReport)
2655 struct gmx_wallclock_gpu_t* gputimes = use_GPU(nbv) ? nbnxn_gpu_get_timings(nbv->gpu_nbv) : NULL;
2657 wallcycle_print(fplog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
2658 elapsed_time_over_all_ranks,
2659 wcycle, cycle_sum, gputimes);
2661 if (EI_DYNAMICS(inputrec->eI))
2663 delta_t = inputrec->delta_t;
2666 if (fplog)
2668 print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
2669 elapsed_time_over_all_ranks,
2670 walltime_accounting_get_nsteps_done(walltime_accounting),
2671 delta_t, nbfs, mflop);
2673 if (bWriteStat)
2675 print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
2676 elapsed_time_over_all_ranks,
2677 walltime_accounting_get_nsteps_done(walltime_accounting),
2678 delta_t, nbfs, mflop);
2683 extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0)
2685 /* this function works, but could probably use a logic rewrite to keep all the different
2686 types of efep straight. */
2688 int i;
2689 t_lambda *fep = ir->fepvals;
2691 if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
2693 for (i = 0; i < efptNR; i++)
2695 lambda[i] = 0.0;
2696 if (lam0)
2698 lam0[i] = 0.0;
2701 return;
2703 else
2705 *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
2706 if checkpoint is set -- a kludge is in for now
2707 to prevent this.*/
2708 for (i = 0; i < efptNR; i++)
2710 /* overwrite lambda state with init_lambda for now for backwards compatibility */
2711 if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
2713 lambda[i] = fep->init_lambda;
2714 if (lam0)
2716 lam0[i] = lambda[i];
2719 else
2721 lambda[i] = fep->all_lambda[i][*fep_state];
2722 if (lam0)
2724 lam0[i] = lambda[i];
2728 if (ir->bSimTemp)
2730 /* need to rescale control temperatures to match current state */
2731 for (i = 0; i < ir->opts.ngtc; i++)
2733 if (ir->opts.ref_t[i] > 0)
2735 ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
2741 /* Send to the log the information on the current lambdas */
2742 if (fplog != NULL)
2744 fprintf(fplog, "Initial vector of lambda components:[ ");
2745 for (i = 0; i < efptNR; i++)
2747 fprintf(fplog, "%10.4f ", lambda[i]);
2749 fprintf(fplog, "]\n");
2751 return;
2755 void init_md(FILE *fplog,
2756 t_commrec *cr, t_inputrec *ir, const gmx_output_env_t *oenv,
2757 double *t, double *t0,
2758 real *lambda, int *fep_state, double *lam0,
2759 t_nrnb *nrnb, gmx_mtop_t *mtop,
2760 gmx_update_t **upd,
2761 int nfile, const t_filenm fnm[],
2762 gmx_mdoutf_t *outf, t_mdebin **mdebin,
2763 tensor force_vir, tensor shake_vir, rvec mu_tot,
2764 gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags,
2765 gmx_wallcycle_t wcycle)
2767 int i;
2769 /* Initial values */
2770 *t = *t0 = ir->init_t;
2772 *bSimAnn = FALSE;
2773 for (i = 0; i < ir->opts.ngtc; i++)
2775 /* set bSimAnn if any group is being annealed */
2776 if (ir->opts.annealing[i] != eannNO)
2778 *bSimAnn = TRUE;
2782 /* Initialize lambda variables */
2783 initialize_lambdas(fplog, ir, fep_state, lambda, lam0);
2785 // TODO upd is never NULL in practice, but the analysers don't know that
2786 if (upd)
2788 *upd = init_update(ir);
2790 if (*bSimAnn)
2792 update_annealing_target_temp(ir, ir->init_t, upd ? *upd : NULL);
2795 if (vcm != NULL)
2797 *vcm = init_vcm(fplog, &mtop->groups, ir);
2800 if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
2802 if (ir->etc == etcBERENDSEN)
2804 please_cite(fplog, "Berendsen84a");
2806 if (ir->etc == etcVRESCALE)
2808 please_cite(fplog, "Bussi2007a");
2810 if (ir->eI == eiSD1)
2812 please_cite(fplog, "Goga2012");
2815 if ((ir->et[XX].n > 0) || (ir->et[YY].n > 0) || (ir->et[ZZ].n > 0))
2817 please_cite(fplog, "Caleman2008a");
2819 init_nrnb(nrnb);
2821 if (nfile != -1)
2823 *outf = init_mdoutf(fplog, nfile, fnm, Flags, cr, ir, mtop, oenv, wcycle);
2825 *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : mdoutf_get_fp_ene(*outf),
2826 mtop, ir, mdoutf_get_fp_dhdl(*outf));
2829 /* Initiate variables */
2830 clear_mat(force_vir);
2831 clear_mat(shake_vir);
2832 clear_rvec(mu_tot);