1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
41 #include<catamount/dclock.h>
47 #ifdef HAVE_SYS_TIME_H
60 #include "chargegroup.h"
83 #include "pull_rotation.h"
84 #include "gmx_random.h"
85 #include "mpelogging.h"
88 #include "gmx_wallcycle.h"
90 #include "nbnxn_search.h"
91 #include "nbnxn_kernels/nbnxn_kernel_ref.h"
92 #include "nbnxn_kernels/nbnxn_kernel_x86_simd128.h"
93 #include "nbnxn_kernels/nbnxn_kernel_x86_simd256.h"
94 #include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
106 #include "nbnxn_cuda_data_mgmt.h"
107 #include "nbnxn_cuda/nbnxn_cuda.h"
110 typedef struct gmx_timeprint
{
115 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
117 gmx_ctime_r(const time_t *clock
,char *buf
, int n
);
123 #ifdef HAVE_GETTIMEOFDAY
127 gettimeofday(&t
,NULL
);
129 seconds
= (double) t
.tv_sec
+ 1e-6*(double)t
.tv_usec
;
135 seconds
= time(NULL
);
142 #define difftime(end,start) ((double)(end)-(double)(start))
144 void print_time(FILE *out
,gmx_runtime_t
*runtime
,gmx_large_int_t step
,
145 t_inputrec
*ir
, t_commrec
*cr
)
148 char timebuf
[STRLEN
];
152 #ifndef GMX_THREAD_MPI
158 fprintf(out
,"step %s",gmx_step_str(step
,buf
));
159 if ((step
>= ir
->nstlist
))
161 runtime
->last
= gmx_gettime();
162 dt
= difftime(runtime
->last
,runtime
->real
);
163 runtime
->time_per_step
= dt
/(step
- ir
->init_step
+ 1);
165 dt
= (ir
->nsteps
+ ir
->init_step
- step
)*runtime
->time_per_step
;
171 finish
= (time_t) (runtime
->last
+ dt
);
172 gmx_ctime_r(&finish
,timebuf
,STRLEN
);
173 sprintf(buf
,"%s",timebuf
);
174 buf
[strlen(buf
)-1]='\0';
175 fprintf(out
,", will finish %s",buf
);
178 fprintf(out
,", remaining runtime: %5d s ",(int)dt
);
182 fprintf(out
," performance: %.1f ns/day ",
183 ir
->delta_t
/1000*24*60*60/runtime
->time_per_step
);
186 #ifndef GMX_THREAD_MPI
200 static double set_proctime(gmx_runtime_t
*runtime
)
206 prev
= runtime
->proc
;
207 runtime
->proc
= dclock();
209 diff
= runtime
->proc
- prev
;
213 prev
= runtime
->proc
;
214 runtime
->proc
= clock();
216 diff
= (double)(runtime
->proc
- prev
)/(double)CLOCKS_PER_SEC
;
220 /* The counter has probably looped, ignore this data */
227 void runtime_start(gmx_runtime_t
*runtime
)
229 runtime
->real
= gmx_gettime();
231 set_proctime(runtime
);
232 runtime
->realtime
= 0;
233 runtime
->proctime
= 0;
235 runtime
->time_per_step
= 0;
238 void runtime_end(gmx_runtime_t
*runtime
)
244 runtime
->proctime
+= set_proctime(runtime
);
245 runtime
->realtime
= now
- runtime
->real
;
249 void runtime_upd_proc(gmx_runtime_t
*runtime
)
251 runtime
->proctime
+= set_proctime(runtime
);
254 void print_date_and_time(FILE *fplog
,int nodeid
,const char *title
,
255 const gmx_runtime_t
*runtime
)
258 char timebuf
[STRLEN
];
259 char time_string
[STRLEN
];
266 tmptime
= (time_t) runtime
->real
;
267 gmx_ctime_r(&tmptime
,timebuf
,STRLEN
);
271 tmptime
= (time_t) gmx_gettime();
272 gmx_ctime_r(&tmptime
,timebuf
,STRLEN
);
274 for(i
=0; timebuf
[i
]>=' '; i
++)
276 time_string
[i
]=timebuf
[i
];
280 fprintf(fplog
,"%s on node %d %s\n",title
,nodeid
,time_string
);
284 static void sum_forces(int start
,int end
,rvec f
[],rvec flr
[])
289 pr_rvecs(debug
,0,"fsr",f
+start
,end
-start
);
290 pr_rvecs(debug
,0,"flr",flr
+start
,end
-start
);
292 for(i
=start
; (i
<end
); i
++)
293 rvec_inc(f
[i
],flr
[i
]);
297 * calc_f_el calculates forces due to an electric field.
299 * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
301 * Et[] contains the parameters for the time dependent
302 * part of the field (not yet used).
303 * Ex[] contains the parameters for
304 * the spatial dependent part of the field. You can have cool periodic
305 * fields in principle, but only a constant field is supported
307 * The function should return the energy due to the electric field
308 * (if any) but for now returns 0.
311 * There can be problems with the virial.
312 * Since the field is not self-consistent this is unavoidable.
313 * For neutral molecules the virial is correct within this approximation.
314 * For neutral systems with many charged molecules the error is small.
315 * But for systems with a net charge or a few charged molecules
316 * the error can be significant when the field is high.
317 * Solution: implement a self-consitent electric field into PME.
319 static void calc_f_el(FILE *fp
,int start
,int homenr
,
320 real charge
[],rvec x
[],rvec f
[],
321 t_cosines Ex
[],t_cosines Et
[],double t
)
327 for(m
=0; (m
<DIM
); m
++)
334 Ext
[m
] = cos(Et
[m
].a
[0]*(t
-t0
))*exp(-sqr(t
-t0
)/(2.0*sqr(Et
[m
].a
[2])));
338 Ext
[m
] = cos(Et
[m
].a
[0]*t
);
347 /* Convert the field strength from V/nm to MD-units */
348 Ext
[m
] *= Ex
[m
].a
[0]*FIELDFAC
;
349 for(i
=start
; (i
<start
+homenr
); i
++)
350 f
[i
][m
] += charge
[i
]*Ext
[m
];
359 fprintf(fp
,"%10g %10g %10g %10g #FIELD\n",t
,
360 Ext
[XX
]/FIELDFAC
,Ext
[YY
]/FIELDFAC
,Ext
[ZZ
]/FIELDFAC
);
364 static void calc_virial(FILE *fplog
,int start
,int homenr
,rvec x
[],rvec f
[],
365 tensor vir_part
,t_graph
*graph
,matrix box
,
366 t_nrnb
*nrnb
,const t_forcerec
*fr
,int ePBC
)
371 /* The short-range virial from surrounding boxes */
373 calc_vir(fplog
,SHIFTS
,fr
->shift_vec
,fr
->fshift
,vir_part
,ePBC
==epbcSCREW
,box
);
374 inc_nrnb(nrnb
,eNR_VIRIAL
,SHIFTS
);
376 /* Calculate partial virial, for local atoms only, based on short range.
377 * Total virial is computed in global_stat, called from do_md
379 f_calc_vir(fplog
,start
,start
+homenr
,x
,f
,vir_part
,graph
,box
);
380 inc_nrnb(nrnb
,eNR_VIRIAL
,homenr
);
382 /* Add position restraint contribution */
383 for(i
=0; i
<DIM
; i
++) {
384 vir_part
[i
][i
] += fr
->vir_diag_posres
[i
];
387 /* Add wall contribution */
388 for(i
=0; i
<DIM
; i
++) {
389 vir_part
[i
][ZZ
] += fr
->vir_wall_z
[i
];
393 pr_rvecs(debug
,0,"vir_part",vir_part
,DIM
);
396 static void posres_wrapper(FILE *fplog
,
404 gmx_enerdata_t
*enerd
,
412 /* Position restraints always require full pbc */
413 set_pbc(&pbc
,ir
->ePBC
,box
);
415 v
= posres(top
->idef
.il
[F_POSRES
].nr
,top
->idef
.il
[F_POSRES
].iatoms
,
416 top
->idef
.iparams_posres
,
417 (const rvec
*)x
,fr
->f_novirsum
,fr
->vir_diag_posres
,
418 ir
->ePBC
==epbcNONE
? NULL
: &pbc
,
419 lambda
[efptRESTRAINT
],&dvdl
,
420 fr
->rc_scaling
,fr
->ePBC
,fr
->posres_com
,fr
->posres_comB
);
423 fprintf(fplog
,sepdvdlformat
,
424 interaction_function
[F_POSRES
].longname
,v
,dvdl
);
426 enerd
->term
[F_POSRES
] += v
;
427 /* If just the force constant changes, the FEP term is linear,
428 * but if k changes, it is not.
430 enerd
->dvdl_nonlin
[efptRESTRAINT
] += dvdl
;
431 inc_nrnb(nrnb
,eNR_POSRES
,top
->idef
.il
[F_POSRES
].nr
/2);
433 if ((ir
->fepvals
->n_lambda
> 0) && (flags
& GMX_FORCE_DHDL
))
435 for(i
=0; i
<enerd
->n_lambda
; i
++)
437 real dvdl_dum
,lambda_dum
;
439 lambda_dum
= (i
==0 ? lambda
[efptRESTRAINT
] : ir
->fepvals
->all_lambda
[efptRESTRAINT
][i
-1]);
440 v
= posres(top
->idef
.il
[F_POSRES
].nr
,top
->idef
.il
[F_POSRES
].iatoms
,
441 top
->idef
.iparams_posres
,
442 (const rvec
*)x
,NULL
,NULL
,
443 ir
->ePBC
==epbcNONE
? NULL
: &pbc
,lambda_dum
,&dvdl
,
444 fr
->rc_scaling
,fr
->ePBC
,fr
->posres_com
,fr
->posres_comB
);
445 enerd
->enerpart_lambda
[i
] += v
;
450 static void pull_potential_wrapper(FILE *fplog
,
458 gmx_enerdata_t
*enerd
,
465 /* Calculate the center of mass forces, this requires communication,
466 * which is why pull_potential is called close to other communication.
467 * The virial contribution is calculated directly,
468 * which is why we call pull_potential after calc_virial.
470 set_pbc(&pbc
,ir
->ePBC
,box
);
472 enerd
->term
[F_COM_PULL
] +=
473 pull_potential(ir
->ePull
,ir
->pull
,mdatoms
,&pbc
,
474 cr
,t
,lambda
[efptRESTRAINT
],x
,f
,vir_force
,&dvdl
);
477 fprintf(fplog
,sepdvdlformat
,"Com pull",enerd
->term
[F_COM_PULL
],dvdl
);
479 enerd
->dvdl_lin
[efptRESTRAINT
] += dvdl
;
482 static void pme_receive_force_ener(FILE *fplog
,
485 gmx_wallcycle_t wcycle
,
486 gmx_enerdata_t
*enerd
,
490 float cycles_ppdpme
,cycles_seppme
;
492 cycles_ppdpme
= wallcycle_stop(wcycle
,ewcPPDURINGPME
);
493 dd_cycles_add(cr
->dd
,cycles_ppdpme
,ddCyclPPduringPME
);
495 /* In case of node-splitting, the PP nodes receive the long-range
496 * forces, virial and energy from the PME nodes here.
498 wallcycle_start(wcycle
,ewcPP_PMEWAITRECVF
);
500 gmx_pme_receive_f(cr
,fr
->f_novirsum
,fr
->vir_el_recip
,&e
,&dvdl
,
504 fprintf(fplog
,sepdvdlformat
,"PME mesh",e
,dvdl
);
506 enerd
->term
[F_COUL_RECIP
] += e
;
507 enerd
->dvdl_lin
[efptCOUL
] += dvdl
;
510 dd_cycles_add(cr
->dd
,cycles_seppme
,ddCyclPME
);
512 wallcycle_stop(wcycle
,ewcPP_PMEWAITRECVF
);
515 static void print_large_forces(FILE *fp
,t_mdatoms
*md
,t_commrec
*cr
,
516 gmx_large_int_t step
,real pforce
,rvec
*x
,rvec
*f
)
520 char buf
[STEPSTRSIZE
];
523 for(i
=md
->start
; i
<md
->start
+md
->homenr
; i
++) {
525 /* We also catch NAN, if the compiler does not optimize this away. */
526 if (fn2
>= pf2
|| fn2
!= fn2
) {
527 fprintf(fp
,"step %s atom %6d x %8.3f %8.3f %8.3f force %12.5e\n",
528 gmx_step_str(step
,buf
),
529 ddglatnr(cr
->dd
,i
),x
[i
][XX
],x
[i
][YY
],x
[i
][ZZ
],sqrt(fn2
));
534 static void post_process_forces(FILE *fplog
,
536 gmx_large_int_t step
,
537 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
544 t_forcerec
*fr
,gmx_vsite_t
*vsite
,
551 /* Spread the mesh force on virtual sites to the other particles...
552 * This is parallellized. MPI communication is performed
553 * if the constructing atoms aren't local.
555 wallcycle_start(wcycle
,ewcVSITESPREAD
);
556 spread_vsite_f(fplog
,vsite
,x
,fr
->f_novirsum
,NULL
,
557 (flags
& GMX_FORCE_VIRIAL
),fr
->vir_el_recip
,
559 &top
->idef
,fr
->ePBC
,fr
->bMolPBC
,graph
,box
,cr
);
560 wallcycle_stop(wcycle
,ewcVSITESPREAD
);
562 if (flags
& GMX_FORCE_VIRIAL
)
564 /* Now add the forces, this is local */
567 sum_forces(0,fr
->f_novirsum_n
,f
,fr
->f_novirsum
);
571 sum_forces(mdatoms
->start
,mdatoms
->start
+mdatoms
->homenr
,
574 if (EEL_FULL(fr
->eeltype
))
576 /* Add the mesh contribution to the virial */
577 m_add(vir_force
,fr
->vir_el_recip
,vir_force
);
581 pr_rvecs(debug
,0,"vir_force",vir_force
,DIM
);
586 if (fr
->print_force
>= 0)
588 print_large_forces(stderr
,mdatoms
,cr
,step
,fr
->print_force
,x
,f
);
592 static void do_nb_verlet(t_forcerec
*fr
,
593 interaction_const_t
*ic
,
594 gmx_enerdata_t
*enerd
,
595 int flags
, int ilocality
,
598 gmx_wallcycle_t wcycle
)
600 int nnbl
, kernel_type
, sh_e
;
602 nonbonded_verlet_group_t
*nbvg
;
604 if (!(flags
& GMX_FORCE_NONBONDED
))
606 /* skip non-bonded calculation */
610 nbvg
= &fr
->nbv
->grp
[ilocality
];
612 /* CUDA kernel launch overhead is already timed separately */
613 if (fr
->cutoff_scheme
!= ecutsVERLET
)
615 gmx_incons("Invalid cut-off scheme passed!");
618 if (nbvg
->kernel_type
!= nbk8x8x8_CUDA
)
620 wallcycle_sub_start(wcycle
, ewcsNONBONDED
);
622 switch (nbvg
->kernel_type
)
625 nbnxn_kernel_ref(&nbvg
->nbl_lists
,
631 enerd
->grpp
.ener
[egCOULSR
],
633 enerd
->grpp
.ener
[egBHAMSR
] :
634 enerd
->grpp
.ener
[egLJSR
]);
637 case nbk4xN_X86_SIMD128
:
638 nbnxn_kernel_x86_simd128(&nbvg
->nbl_lists
,
644 enerd
->grpp
.ener
[egCOULSR
],
646 enerd
->grpp
.ener
[egBHAMSR
] :
647 enerd
->grpp
.ener
[egLJSR
]);
649 case nbk4xN_X86_SIMD256
:
650 nbnxn_kernel_x86_simd256(&nbvg
->nbl_lists
,
656 enerd
->grpp
.ener
[egCOULSR
],
658 enerd
->grpp
.ener
[egBHAMSR
] :
659 enerd
->grpp
.ener
[egLJSR
]);
663 nbnxn_cuda_launch_kernel(fr
->nbv
->cu_nbv
, nbvg
->nbat
, flags
, ilocality
);
666 case nbk8x8x8_PlainC
:
667 nbnxn_kernel_gpu_ref(nbvg
->nbl_lists
.nbl
[0],
672 nbvg
->nbat
->out
[0].f
,
674 enerd
->grpp
.ener
[egCOULSR
],
676 enerd
->grpp
.ener
[egBHAMSR
] :
677 enerd
->grpp
.ener
[egLJSR
]);
681 gmx_incons("Invalid nonbonded kernel type passed!");
684 if (nbvg
->kernel_type
!= nbk8x8x8_CUDA
)
686 wallcycle_sub_stop(wcycle
, ewcsNONBONDED
);
689 /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
690 sh_e
= ((flags
& GMX_FORCE_ENERGY
) ? 1 : 0);
692 ((EEL_RF(ic
->eeltype
) || ic
->eeltype
== eelCUT
) ?
693 eNR_NBNXN_LJ_RF
: eNR_NBNXN_LJ_TAB
) + sh_e
,
694 nbvg
->nbl_lists
.natpair_ljq
);
695 inc_nrnb(nrnb
,eNR_NBNXN_LJ
+sh_e
,nbvg
->nbl_lists
.natpair_lj
);
697 ((EEL_RF(ic
->eeltype
) || ic
->eeltype
== eelCUT
) ?
698 eNR_NBNXN_RF
: eNR_NBNXN_TAB
)+sh_e
,
699 nbvg
->nbl_lists
.natpair_q
);
702 void do_force_cutsVERLET(FILE *fplog
,t_commrec
*cr
,
703 t_inputrec
*inputrec
,
704 gmx_large_int_t step
,t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
707 gmx_groups_t
*groups
,
708 matrix box
,rvec x
[],history_t
*hist
,
712 gmx_enerdata_t
*enerd
,t_fcdata
*fcd
,
713 real
*lambda
,t_graph
*graph
,
714 t_forcerec
*fr
, interaction_const_t
*ic
,
715 gmx_vsite_t
*vsite
,rvec mu_tot
,
716 double t
,FILE *field
,gmx_edsam_t ed
,
724 gmx_bool bSepDVDL
,bStateChanged
,bNS
,bFillGrid
,bCalcCGCM
,bBS
;
725 gmx_bool bDoLongRange
,bDoForces
,bSepLRF
,bUseGPU
,bUseOrEmulGPU
;
726 gmx_bool bDiffKernels
=FALSE
;
730 float cycles_pme
,cycles_force
;
731 nonbonded_verlet_t
*nbv
;
735 nb_kernel_type
= fr
->nbv
->grp
[0].kernel_type
;
737 start
= mdatoms
->start
;
738 homenr
= mdatoms
->homenr
;
740 bSepDVDL
= (fr
->bSepDVDL
&& do_per_step(step
,inputrec
->nstlog
));
742 clear_mat(vir_force
);
745 if (DOMAINDECOMP(cr
))
747 cg1
= cr
->dd
->ncg_tot
;
758 bStateChanged
= (flags
& GMX_FORCE_STATECHANGED
);
759 bNS
= (flags
& GMX_FORCE_NS
) && (fr
->bAllvsAll
==FALSE
);
760 bFillGrid
= (bNS
&& bStateChanged
);
761 bCalcCGCM
= (bFillGrid
&& !DOMAINDECOMP(cr
));
762 bDoLongRange
= (fr
->bTwinRange
&& bNS
&& (flags
& GMX_FORCE_DOLR
));
763 bDoForces
= (flags
& GMX_FORCE_FORCES
);
764 bSepLRF
= (bDoLongRange
&& bDoForces
&& (flags
& GMX_FORCE_SEPLRF
));
765 bUseGPU
= fr
->nbv
->bUseGPU
;
766 bUseOrEmulGPU
= bUseGPU
|| (nbv
->grp
[0].kernel_type
== nbk8x8x8_PlainC
);
770 update_forcerec(fplog
,fr
,box
);
772 if (NEED_MUTOT(*inputrec
))
774 /* Calculate total (local) dipole moment in a temporary common array.
775 * This makes it possible to sum them over nodes faster.
777 calc_mu(start
,homenr
,
778 x
,mdatoms
->chargeA
,mdatoms
->chargeB
,mdatoms
->nChargePerturbed
,
783 if (fr
->ePBC
!= epbcNONE
) {
784 /* Compute shift vectors every step,
785 * because of pressure coupling or box deformation!
787 if ((flags
& GMX_FORCE_DYNAMICBOX
) && bStateChanged
)
788 calc_shifts(box
,fr
->shift_vec
);
791 put_atoms_in_box_omp(fr
->ePBC
,box
,homenr
,x
);
792 inc_nrnb(nrnb
,eNR_SHIFTX
,homenr
);
794 else if (EI_ENERGY_MINIMIZATION(inputrec
->eI
) && graph
) {
795 unshift_self(graph
,box
,x
);
799 nbnxn_atomdata_copy_shiftvec(flags
& GMX_FORCE_DYNAMICBOX
,
800 fr
->shift_vec
,nbv
->grp
[0].nbat
);
803 if (!(cr
->duty
& DUTY_PME
)) {
804 /* Send particle coordinates to the pme nodes.
805 * Since this is only implemented for domain decomposition
806 * and domain decomposition does not use the graph,
807 * we do not need to worry about shifting.
810 wallcycle_start(wcycle
,ewcPP_PMESENDX
);
811 GMX_MPE_LOG(ev_send_coordinates_start
);
813 bBS
= (inputrec
->nwall
== 2);
816 svmul(inputrec
->wall_ewald_zfac
,boxs
[ZZ
],boxs
[ZZ
]);
819 gmx_pme_send_x(cr
,bBS
? boxs
: box
,x
,
820 mdatoms
->nChargePerturbed
,lambda
[efptCOUL
],
821 (flags
& (GMX_FORCE_VIRIAL
| GMX_FORCE_ENERGY
)),step
);
823 GMX_MPE_LOG(ev_send_coordinates_finish
);
824 wallcycle_stop(wcycle
,ewcPP_PMESENDX
);
828 /* do gridding for pair search */
831 if (graph
&& bStateChanged
)
833 /* Calculate intramolecular shift vectors to make molecules whole */
834 mk_mshift(fplog
,graph
,fr
->ePBC
,box
,x
);
838 box_diag
[XX
] = box
[XX
][XX
];
839 box_diag
[YY
] = box
[YY
][YY
];
840 box_diag
[ZZ
] = box
[ZZ
][ZZ
];
842 wallcycle_start(wcycle
,ewcNS
);
845 wallcycle_sub_start(wcycle
,ewcsNBS_GRID_LOCAL
);
846 nbnxn_put_on_grid(nbv
->nbs
,fr
->ePBC
,box
,
848 0,mdatoms
->homenr
,-1,fr
->cginfo
,x
,
850 nbv
->grp
[eintLocal
].kernel_type
,
851 nbv
->grp
[eintLocal
].nbat
);
852 wallcycle_sub_stop(wcycle
,ewcsNBS_GRID_LOCAL
);
856 wallcycle_sub_start(wcycle
,ewcsNBS_GRID_NONLOCAL
);
857 nbnxn_put_on_grid_nonlocal(nbv
->nbs
,domdec_zones(cr
->dd
),
859 nbv
->grp
[eintNonlocal
].kernel_type
,
860 nbv
->grp
[eintNonlocal
].nbat
);
861 wallcycle_sub_stop(wcycle
,ewcsNBS_GRID_NONLOCAL
);
864 if (nbv
->ngrp
== 1 ||
865 nbv
->grp
[eintNonlocal
].nbat
== nbv
->grp
[eintLocal
].nbat
)
867 nbnxn_atomdata_set(nbv
->grp
[eintLocal
].nbat
,eatAll
,
868 nbv
->nbs
,mdatoms
,fr
->cginfo
);
872 nbnxn_atomdata_set(nbv
->grp
[eintLocal
].nbat
,eatLocal
,
873 nbv
->nbs
,mdatoms
,fr
->cginfo
);
874 nbnxn_atomdata_set(nbv
->grp
[eintNonlocal
].nbat
,eatAll
,
875 nbv
->nbs
,mdatoms
,fr
->cginfo
);
877 wallcycle_stop(wcycle
, ewcNS
);
880 /* initialize the GPU atom data and copy shift vector */
885 wallcycle_start_nocount(wcycle
, ewcLAUNCH_GPU_NB
);
886 nbnxn_cuda_init_atomdata(nbv
->cu_nbv
, nbv
->grp
[eintLocal
].nbat
);
887 wallcycle_stop(wcycle
, ewcLAUNCH_GPU_NB
);
890 wallcycle_start_nocount(wcycle
, ewcLAUNCH_GPU_NB
);
891 nbnxn_cuda_upload_shiftvec(nbv
->cu_nbv
, nbv
->grp
[eintLocal
].nbat
);
892 wallcycle_stop(wcycle
, ewcLAUNCH_GPU_NB
);
895 /* do local pair search */
898 wallcycle_start_nocount(wcycle
,ewcNS
);
899 wallcycle_sub_start(wcycle
,ewcsNBS_SEARCH_LOCAL
);
900 nbnxn_make_pairlist(nbv
->nbs
,nbv
->grp
[eintLocal
].nbat
,
903 nbv
->min_ci_balanced
,
904 &nbv
->grp
[eintLocal
].nbl_lists
,
906 nbv
->grp
[eintLocal
].kernel_type
,
908 wallcycle_sub_stop(wcycle
,ewcsNBS_SEARCH_LOCAL
);
912 /* initialize local pair-list on the GPU */
913 nbnxn_cuda_init_pairlist(nbv
->cu_nbv
,
914 nbv
->grp
[eintLocal
].nbl_lists
.nbl
[0],
917 wallcycle_stop(wcycle
, ewcNS
);
921 wallcycle_start(wcycle
, ewcNB_XF_BUF_OPS
);
922 wallcycle_sub_start(wcycle
, ewcsNB_X_BUF_OPS
);
923 nbnxn_atomdata_copy_x_to_nbat_x(nbv
->nbs
,eatLocal
,FALSE
,x
,
924 nbv
->grp
[eintLocal
].nbat
);
925 wallcycle_sub_stop(wcycle
, ewcsNB_X_BUF_OPS
);
926 wallcycle_stop(wcycle
, ewcNB_XF_BUF_OPS
);
931 wallcycle_start(wcycle
,ewcLAUNCH_GPU_NB
);
932 /* launch local nonbonded F on GPU */
933 do_nb_verlet(fr
, ic
, enerd
, flags
, eintLocal
, enbvClearFNo
,
935 wallcycle_stop(wcycle
,ewcLAUNCH_GPU_NB
);
938 /* Communicate coordinates and sum dipole if necessary +
939 do non-local pair search */
940 if (DOMAINDECOMP(cr
))
942 bDiffKernels
= (nbv
->grp
[eintNonlocal
].kernel_type
!=
943 nbv
->grp
[eintLocal
].kernel_type
);
947 /* With GPU+CPU non-bonded calculations we need to copy
948 * the local coordinates to the non-local nbat struct
949 * (in CPU format) as the non-local kernel call also
950 * calculates the local - non-local interactions.
952 wallcycle_start(wcycle
, ewcNB_XF_BUF_OPS
);
953 wallcycle_sub_start(wcycle
, ewcsNB_X_BUF_OPS
);
954 nbnxn_atomdata_copy_x_to_nbat_x(nbv
->nbs
,eatLocal
,TRUE
,x
,
955 nbv
->grp
[eintNonlocal
].nbat
);
956 wallcycle_sub_stop(wcycle
, ewcsNB_X_BUF_OPS
);
957 wallcycle_stop(wcycle
, ewcNB_XF_BUF_OPS
);
962 wallcycle_start_nocount(wcycle
,ewcNS
);
963 wallcycle_sub_start(wcycle
,ewcsNBS_SEARCH_NONLOCAL
);
967 nbnxn_grid_add_simple(nbv
->nbs
,nbv
->grp
[eintNonlocal
].nbat
);
970 nbnxn_make_pairlist(nbv
->nbs
,nbv
->grp
[eintNonlocal
].nbat
,
973 nbv
->min_ci_balanced
,
974 &nbv
->grp
[eintNonlocal
].nbl_lists
,
976 nbv
->grp
[eintNonlocal
].kernel_type
,
979 wallcycle_sub_stop(wcycle
,ewcsNBS_SEARCH_NONLOCAL
);
981 if (nbv
->grp
[eintNonlocal
].kernel_type
== nbk8x8x8_CUDA
)
983 /* initialize non-local pair-list on the GPU */
984 nbnxn_cuda_init_pairlist(nbv
->cu_nbv
,
985 nbv
->grp
[eintNonlocal
].nbl_lists
.nbl
[0],
988 wallcycle_stop(wcycle
,ewcNS
);
992 wallcycle_start(wcycle
,ewcMOVEX
);
993 dd_move_x(cr
->dd
,box
,x
);
995 /* When we don't need the total dipole we sum it in global_stat */
996 if (bStateChanged
&& NEED_MUTOT(*inputrec
))
998 gmx_sumd(2*DIM
,mu
,cr
);
1000 wallcycle_stop(wcycle
,ewcMOVEX
);
1002 wallcycle_start(wcycle
, ewcNB_XF_BUF_OPS
);
1003 wallcycle_sub_start(wcycle
, ewcsNB_X_BUF_OPS
);
1004 nbnxn_atomdata_copy_x_to_nbat_x(nbv
->nbs
,eatNonlocal
,FALSE
,x
,
1005 nbv
->grp
[eintNonlocal
].nbat
);
1006 wallcycle_sub_stop(wcycle
, ewcsNB_X_BUF_OPS
);
1007 cycles_force
+= wallcycle_stop(wcycle
, ewcNB_XF_BUF_OPS
);
1010 if (bUseGPU
&& !bDiffKernels
)
1012 wallcycle_start(wcycle
,ewcLAUNCH_GPU_NB
);
1013 /* launch non-local nonbonded F on GPU */
1014 do_nb_verlet(fr
, ic
, enerd
, flags
, eintNonlocal
, enbvClearFNo
,
1016 cycles_force
+= wallcycle_stop(wcycle
,ewcLAUNCH_GPU_NB
);
1022 /* launch D2H copy-back F */
1023 wallcycle_start_nocount(wcycle
, ewcLAUNCH_GPU_NB
);
1024 if (DOMAINDECOMP(cr
) && !bDiffKernels
)
1026 nbnxn_cuda_launch_cpyback(nbv
->cu_nbv
, nbv
->grp
[eintNonlocal
].nbat
,
1027 flags
, eatNonlocal
);
1029 nbnxn_cuda_launch_cpyback(nbv
->cu_nbv
, nbv
->grp
[eintLocal
].nbat
,
1031 cycles_force
+= wallcycle_stop(wcycle
,ewcLAUNCH_GPU_NB
);
1034 if (bStateChanged
&& NEED_MUTOT(*inputrec
))
1038 gmx_sumd(2*DIM
,mu
,cr
);
1045 fr
->mu_tot
[i
][j
] = mu
[i
*DIM
+ j
];
1049 if (fr
->efep
== efepNO
)
1051 copy_rvec(fr
->mu_tot
[0],mu_tot
);
1055 for(j
=0; j
<DIM
; j
++)
1058 (1.0 - lambda
[efptCOUL
])*fr
->mu_tot
[0][j
] +
1059 lambda
[efptCOUL
]*fr
->mu_tot
[1][j
];
1063 /* Reset energies */
1064 reset_enerdata(&(inputrec
->opts
),fr
,bNS
,enerd
,MASTER(cr
));
1065 clear_rvecs(SHIFTS
,fr
->fshift
);
1067 if (DOMAINDECOMP(cr
))
1069 if (!(cr
->duty
& DUTY_PME
))
1071 wallcycle_start(wcycle
,ewcPPDURINGPME
);
1072 dd_force_flop_start(cr
->dd
,nrnb
);
1076 /* Start the force cycle counter.
1077 * This counter is stopped in do_forcelow_level.
1078 * No parallel communication should occur while this counter is running,
1079 * since that will interfere with the dynamic load balancing.
1081 wallcycle_start(wcycle
,ewcFORCE
);
1084 /* Reset forces for which the virial is calculated separately:
1085 * PME/Ewald forces if necessary */
1086 if (fr
->bF_NoVirSum
)
1088 if (flags
& GMX_FORCE_VIRIAL
)
1090 fr
->f_novirsum
= fr
->f_novirsum_alloc
;
1091 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1094 clear_rvecs(fr
->f_novirsum_n
,fr
->f_novirsum
);
1098 clear_rvecs(homenr
,fr
->f_novirsum
+start
);
1100 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1104 /* We are not calculating the pressure so we do not need
1105 * a separate array for forces that do not contribute
1114 /* Add the long range forces to the short range forces */
1115 for(i
=0; i
<fr
->natoms_force_constr
; i
++)
1117 copy_rvec(fr
->f_twin
[i
],f
[i
]);
1120 else if (!(fr
->bTwinRange
&& bNS
))
1122 /* Clear the short-range forces */
1123 clear_rvecs(fr
->natoms_force_constr
,f
);
1126 clear_rvec(fr
->vir_diag_posres
);
1128 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1130 if (inputrec
->ePull
== epullCONSTRAINT
)
1132 clear_pull_forces(inputrec
->pull
);
1135 /* update QMMMrec, if necessary */
1138 update_QMMMrec(cr
,fr
,x
,mdatoms
,box
,top
);
1141 if ((flags
& GMX_FORCE_BONDED
) && top
->idef
.il
[F_POSRES
].nr
> 0)
1143 posres_wrapper(fplog
,flags
,bSepDVDL
,inputrec
,nrnb
,top
,box
,x
,
1147 /* Compute the bonded and non-bonded energies and optionally forces */
1148 /* if we use the GPU turn off the nonbonded */
1149 do_force_lowlevel(fplog
,step
,fr
,inputrec
,&(top
->idef
),
1150 cr
,nrnb
,wcycle
,mdatoms
,&(inputrec
->opts
),
1151 x
,hist
,f
,enerd
,fcd
,mtop
,top
,fr
->born
,
1152 &(top
->atomtypes
),bBornRadii
,box
,
1153 inputrec
->fepvals
,lambda
,graph
,&(top
->excls
),fr
->mu_tot
,
1154 ((nb_kernel_type
== nbk8x8x8_CUDA
|| nb_kernel_type
== nbk8x8x8_PlainC
)
1155 ? flags
&~GMX_FORCE_NONBONDED
: flags
),
1160 /* Maybe we should move this into do_force_lowlevel */
1161 do_nb_verlet(fr
, ic
, enerd
, flags
, eintLocal
, enbvClearFYes
,
1166 if (!bUseOrEmulGPU
|| bDiffKernels
)
1170 if (DOMAINDECOMP(cr
))
1172 do_nb_verlet(fr
, ic
, enerd
, flags
, eintNonlocal
,
1173 bDiffKernels
? enbvClearFYes
: enbvClearFNo
,
1183 aloc
= eintNonlocal
;
1186 /* Add all the non-bonded force to the normal force array.
1187 * This can be split into a local a non-local part when overlapping
1188 * communication with calculation with domain decomposition.
1190 cycles_force
+= wallcycle_stop(wcycle
,ewcFORCE
);
1191 wallcycle_start(wcycle
, ewcNB_XF_BUF_OPS
);
1192 wallcycle_sub_start(wcycle
, ewcsNB_F_BUF_OPS
);
1193 nbnxn_atomdata_add_nbat_f_to_f(nbv
->nbs
,eatAll
,nbv
->grp
[aloc
].nbat
,f
);
1194 wallcycle_sub_stop(wcycle
, ewcsNB_F_BUF_OPS
);
1195 cycles_force
+= wallcycle_stop(wcycle
, ewcNB_XF_BUF_OPS
);
1196 wallcycle_start_nocount(wcycle
,ewcFORCE
);
1198 /* if there are multiple fshift output buffers reduce them */
1199 if ((flags
& GMX_FORCE_VIRIAL
) &&
1200 nbv
->grp
[aloc
].nbl_lists
.nnbl
> 1)
1202 nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv
->grp
[aloc
].nbat
,
1207 cycles_force
+= wallcycle_stop(wcycle
,ewcFORCE
);
1208 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1212 do_flood(fplog
,cr
,x
,f
,ed
,box
,step
,bNS
);
1215 if (bUseOrEmulGPU
&& !bDiffKernels
)
1217 /* wait for non-local forces (or calculate in emulation mode) */
1218 if (DOMAINDECOMP(cr
))
1222 wallcycle_start(wcycle
,ewcWAIT_GPU_NB_NL
);
1223 nbnxn_cuda_wait_gpu(nbv
->cu_nbv
,
1224 nbv
->grp
[eintNonlocal
].nbat
,
1226 enerd
->grpp
.ener
[egLJSR
], enerd
->grpp
.ener
[egCOULSR
],
1228 cycles_force
+= wallcycle_stop(wcycle
,ewcWAIT_GPU_NB_NL
);
1232 wallcycle_start_nocount(wcycle
,ewcFORCE
);
1233 do_nb_verlet(fr
, ic
, enerd
, flags
, eintNonlocal
, enbvClearFYes
,
1235 cycles_force
+= wallcycle_stop(wcycle
,ewcFORCE
);
1237 wallcycle_start(wcycle
, ewcNB_XF_BUF_OPS
);
1238 wallcycle_sub_start(wcycle
, ewcsNB_F_BUF_OPS
);
1239 /* skip the reduction if there was no non-local work to do */
1240 if (nbv
->grp
[eintLocal
].nbl_lists
.nbl
[0]->nsci
> 0)
1242 nbnxn_atomdata_add_nbat_f_to_f(nbv
->nbs
,eatNonlocal
,
1243 nbv
->grp
[eintNonlocal
].nbat
,f
);
1245 wallcycle_sub_stop(wcycle
, ewcsNB_F_BUF_OPS
);
1246 cycles_force
+= wallcycle_stop(wcycle
, ewcNB_XF_BUF_OPS
);
1252 /* Communicate the forces */
1255 wallcycle_start(wcycle
,ewcMOVEF
);
1256 if (DOMAINDECOMP(cr
))
1258 dd_move_f(cr
->dd
,f
,fr
->fshift
);
1259 /* Do we need to communicate the separate force array
1260 * for terms that do not contribute to the single sum virial?
1261 * Position restraints and electric fields do not introduce
1262 * inter-cg forces, only full electrostatics methods do.
1263 * When we do not calculate the virial, fr->f_novirsum = f,
1264 * so we have already communicated these forces.
1266 if (EEL_FULL(fr
->eeltype
) && cr
->dd
->n_intercg_excl
&&
1267 (flags
& GMX_FORCE_VIRIAL
))
1269 dd_move_f(cr
->dd
,fr
->f_novirsum
,NULL
);
1273 /* We should not update the shift forces here,
1274 * since f_twin is already included in f.
1276 dd_move_f(cr
->dd
,fr
->f_twin
,NULL
);
1279 wallcycle_stop(wcycle
,ewcMOVEF
);
1285 /* wait for local forces (or calculate in emulation mode) */
1288 wallcycle_start(wcycle
,ewcWAIT_GPU_NB_L
);
1289 nbnxn_cuda_wait_gpu(nbv
->cu_nbv
,
1290 nbv
->grp
[eintLocal
].nbat
,
1292 enerd
->grpp
.ener
[egLJSR
], enerd
->grpp
.ener
[egCOULSR
],
1294 wallcycle_stop(wcycle
,ewcWAIT_GPU_NB_L
);
1296 /* now clear the GPU outputs while we finish the step on the CPU */
1297 nbnxn_cuda_clear_outputs(nbv
->cu_nbv
, flags
);
1301 wallcycle_start_nocount(wcycle
,ewcFORCE
);
1302 do_nb_verlet(fr
, ic
, enerd
, flags
, eintLocal
,
1303 DOMAINDECOMP(cr
) ? enbvClearFNo
: enbvClearFYes
,
1305 wallcycle_stop(wcycle
,ewcFORCE
);
1307 wallcycle_start(wcycle
, ewcNB_XF_BUF_OPS
);
1308 wallcycle_sub_start(wcycle
, ewcsNB_F_BUF_OPS
);
1309 if (nbv
->grp
[eintLocal
].nbl_lists
.nbl
[0]->nsci
> 0)
1311 /* skip the reduction if there was no non-local work to do */
1312 nbnxn_atomdata_add_nbat_f_to_f(nbv
->nbs
,eatLocal
,
1313 nbv
->grp
[eintLocal
].nbat
,f
);
1315 wallcycle_sub_stop(wcycle
, ewcsNB_F_BUF_OPS
);
1316 wallcycle_stop(wcycle
, ewcNB_XF_BUF_OPS
);
1319 if (DOMAINDECOMP(cr
))
1321 dd_force_flop_stop(cr
->dd
,nrnb
);
1324 dd_cycles_add(cr
->dd
,cycles_force
-cycles_pme
,ddCyclF
);
1330 if (IR_ELEC_FIELD(*inputrec
))
1332 /* Compute forces due to electric field */
1333 calc_f_el(MASTER(cr
) ? field
: NULL
,
1334 start
,homenr
,mdatoms
->chargeA
,x
,fr
->f_novirsum
,
1335 inputrec
->ex
,inputrec
->et
,t
);
1338 /* If we have NoVirSum forces, but we do not calculate the virial,
1339 * we sum fr->f_novirum=f later.
1341 if (vsite
&& !(fr
->bF_NoVirSum
&& !(flags
& GMX_FORCE_VIRIAL
)))
1343 wallcycle_start(wcycle
,ewcVSITESPREAD
);
1344 spread_vsite_f(fplog
,vsite
,x
,f
,fr
->fshift
,FALSE
,NULL
,nrnb
,
1345 &top
->idef
,fr
->ePBC
,fr
->bMolPBC
,graph
,box
,cr
);
1346 wallcycle_stop(wcycle
,ewcVSITESPREAD
);
1350 wallcycle_start(wcycle
,ewcVSITESPREAD
);
1351 spread_vsite_f(fplog
,vsite
,x
,fr
->f_twin
,NULL
,FALSE
,NULL
,
1353 &top
->idef
,fr
->ePBC
,fr
->bMolPBC
,graph
,box
,cr
);
1354 wallcycle_stop(wcycle
,ewcVSITESPREAD
);
1358 if (flags
& GMX_FORCE_VIRIAL
)
1360 /* Calculation of the virial must be done after vsites! */
1361 calc_virial(fplog
,mdatoms
->start
,mdatoms
->homenr
,x
,f
,
1362 vir_force
,graph
,box
,nrnb
,fr
,inputrec
->ePBC
);
1366 if (inputrec
->ePull
== epullUMBRELLA
|| inputrec
->ePull
== epullCONST_F
)
1368 pull_potential_wrapper(fplog
,bSepDVDL
,cr
,inputrec
,box
,x
,
1369 f
,vir_force
,mdatoms
,enerd
,lambda
,t
);
1372 if (PAR(cr
) && !(cr
->duty
& DUTY_PME
))
1374 /* In case of node-splitting, the PP nodes receive the long-range
1375 * forces, virial and energy from the PME nodes here.
1377 pme_receive_force_ener(fplog
,bSepDVDL
,cr
,wcycle
,enerd
,fr
);
1382 post_process_forces(fplog
,cr
,step
,nrnb
,wcycle
,
1383 top
,box
,x
,f
,vir_force
,mdatoms
,graph
,fr
,vsite
,
1387 /* Sum the potential energy terms from group contributions */
1388 sum_epot(&(inputrec
->opts
),enerd
);
1391 void do_force_cutsGROUP(FILE *fplog
,t_commrec
*cr
,
1392 t_inputrec
*inputrec
,
1393 gmx_large_int_t step
,t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
1394 gmx_localtop_t
*top
,
1396 gmx_groups_t
*groups
,
1397 matrix box
,rvec x
[],history_t
*hist
,
1401 gmx_enerdata_t
*enerd
,t_fcdata
*fcd
,
1402 real
*lambda
,t_graph
*graph
,
1403 t_forcerec
*fr
,gmx_vsite_t
*vsite
,rvec mu_tot
,
1404 double t
,FILE *field
,gmx_edsam_t ed
,
1405 gmx_bool bBornRadii
,
1411 gmx_bool bSepDVDL
,bStateChanged
,bNS
,bFillGrid
,bCalcCGCM
,bBS
;
1412 gmx_bool bDoLongRange
,bDoForces
,bSepLRF
;
1413 gmx_bool bDoAdressWF
;
1415 rvec vzero
,box_diag
;
1416 real e
,v
,dvdlambda
[efptNR
];
1418 float cycles_pme
,cycles_force
;
1420 start
= mdatoms
->start
;
1421 homenr
= mdatoms
->homenr
;
1423 bSepDVDL
= (fr
->bSepDVDL
&& do_per_step(step
,inputrec
->nstlog
));
1425 clear_mat(vir_force
);
1429 pd_cg_range(cr
,&cg0
,&cg1
);
1434 if (DOMAINDECOMP(cr
))
1436 cg1
= cr
->dd
->ncg_tot
;
1448 bStateChanged
= (flags
& GMX_FORCE_STATECHANGED
);
1449 bNS
= (flags
& GMX_FORCE_NS
) && (fr
->bAllvsAll
==FALSE
);
1450 bFillGrid
= (bNS
&& bStateChanged
);
1451 bCalcCGCM
= (bFillGrid
&& !DOMAINDECOMP(cr
));
1452 bDoLongRange
= (fr
->bTwinRange
&& bNS
&& (flags
& GMX_FORCE_DOLR
));
1453 bDoForces
= (flags
& GMX_FORCE_FORCES
);
1454 bSepLRF
= (bDoLongRange
&& bDoForces
&& (flags
& GMX_FORCE_SEPLRF
));
1455 /* should probably move this to the forcerec since it doesn't change */
1456 bDoAdressWF
= ((fr
->adress_type
!=eAdressOff
));
1460 update_forcerec(fplog
,fr
,box
);
1462 if (NEED_MUTOT(*inputrec
))
1464 /* Calculate total (local) dipole moment in a temporary common array.
1465 * This makes it possible to sum them over nodes faster.
1467 calc_mu(start
,homenr
,
1468 x
,mdatoms
->chargeA
,mdatoms
->chargeB
,mdatoms
->nChargePerturbed
,
1473 if (fr
->ePBC
!= epbcNONE
) {
1474 /* Compute shift vectors every step,
1475 * because of pressure coupling or box deformation!
1477 if ((flags
& GMX_FORCE_DYNAMICBOX
) && bStateChanged
)
1478 calc_shifts(box
,fr
->shift_vec
);
1481 put_charge_groups_in_box(fplog
,cg0
,cg1
,fr
->ePBC
,box
,
1482 &(top
->cgs
),x
,fr
->cg_cm
);
1483 inc_nrnb(nrnb
,eNR_CGCM
,homenr
);
1484 inc_nrnb(nrnb
,eNR_RESETX
,cg1
-cg0
);
1486 else if (EI_ENERGY_MINIMIZATION(inputrec
->eI
) && graph
) {
1487 unshift_self(graph
,box
,x
);
1490 else if (bCalcCGCM
) {
1491 calc_cgcm(fplog
,cg0
,cg1
,&(top
->cgs
),x
,fr
->cg_cm
);
1492 inc_nrnb(nrnb
,eNR_CGCM
,homenr
);
1497 move_cgcm(fplog
,cr
,fr
->cg_cm
);
1500 pr_rvecs(debug
,0,"cgcm",fr
->cg_cm
,top
->cgs
.nr
);
1504 if (!(cr
->duty
& DUTY_PME
)) {
1505 /* Send particle coordinates to the pme nodes.
1506 * Since this is only implemented for domain decomposition
1507 * and domain decomposition does not use the graph,
1508 * we do not need to worry about shifting.
1511 wallcycle_start(wcycle
,ewcPP_PMESENDX
);
1512 GMX_MPE_LOG(ev_send_coordinates_start
);
1514 bBS
= (inputrec
->nwall
== 2);
1517 svmul(inputrec
->wall_ewald_zfac
,boxs
[ZZ
],boxs
[ZZ
]);
1520 gmx_pme_send_x(cr
,bBS
? boxs
: box
,x
,
1521 mdatoms
->nChargePerturbed
,lambda
[efptCOUL
],
1522 (flags
& (GMX_FORCE_VIRIAL
| GMX_FORCE_ENERGY
)),step
);
1524 GMX_MPE_LOG(ev_send_coordinates_finish
);
1525 wallcycle_stop(wcycle
,ewcPP_PMESENDX
);
1527 #endif /* GMX_MPI */
1529 /* Communicate coordinates and sum dipole if necessary */
1532 wallcycle_start(wcycle
,ewcMOVEX
);
1533 if (DOMAINDECOMP(cr
))
1535 dd_move_x(cr
->dd
,box
,x
);
1539 move_x(fplog
,cr
,GMX_LEFT
,GMX_RIGHT
,x
,nrnb
);
1541 wallcycle_stop(wcycle
,ewcMOVEX
);
1544 /* update adress weight beforehand */
1545 if(bStateChanged
&& bDoAdressWF
)
1547 /* need pbc for adress weight calculation with pbc_dx */
1548 set_pbc(&pbc
,inputrec
->ePBC
,box
);
1549 if(fr
->adress_site
== eAdressSITEcog
)
1551 update_adress_weights_cog(top
->idef
.iparams
,top
->idef
.il
,x
,fr
,mdatoms
,
1552 inputrec
->ePBC
==epbcNONE
? NULL
: &pbc
);
1554 else if (fr
->adress_site
== eAdressSITEcom
)
1556 update_adress_weights_com(fplog
,cg0
,cg1
,&(top
->cgs
),x
,fr
,mdatoms
,
1557 inputrec
->ePBC
==epbcNONE
? NULL
: &pbc
);
1559 else if (fr
->adress_site
== eAdressSITEatomatom
){
1560 update_adress_weights_atom_per_atom(cg0
,cg1
,&(top
->cgs
),x
,fr
,mdatoms
,
1561 inputrec
->ePBC
==epbcNONE
? NULL
: &pbc
);
1565 update_adress_weights_atom(cg0
,cg1
,&(top
->cgs
),x
,fr
,mdatoms
,
1566 inputrec
->ePBC
==epbcNONE
? NULL
: &pbc
);
1570 if (NEED_MUTOT(*inputrec
))
1577 gmx_sumd(2*DIM
,mu
,cr
);
1583 fr
->mu_tot
[i
][j
] = mu
[i
*DIM
+ j
];
1587 if (fr
->efep
== efepNO
)
1589 copy_rvec(fr
->mu_tot
[0],mu_tot
);
1593 for(j
=0; j
<DIM
; j
++)
1596 (1.0 - lambda
[efptCOUL
])*fr
->mu_tot
[0][j
] + lambda
[efptCOUL
]*fr
->mu_tot
[1][j
];
1601 /* Reset energies */
1602 reset_enerdata(&(inputrec
->opts
),fr
,bNS
,enerd
,MASTER(cr
));
1603 clear_rvecs(SHIFTS
,fr
->fshift
);
1607 wallcycle_start(wcycle
,ewcNS
);
1609 if (graph
&& bStateChanged
)
1611 /* Calculate intramolecular shift vectors to make molecules whole */
1612 mk_mshift(fplog
,graph
,fr
->ePBC
,box
,x
);
1615 /* Reset long range forces if necessary */
1618 /* Reset the (long-range) forces if necessary */
1619 clear_rvecs(fr
->natoms_force_constr
,bSepLRF
? fr
->f_twin
: f
);
1622 /* Do the actual neighbour searching and if twin range electrostatics
1623 * also do the calculation of long range forces and energies.
1625 for (i
=0;i
<efptNR
;i
++) {dvdlambda
[i
] = 0;}
1627 groups
,&(inputrec
->opts
),top
,mdatoms
,
1628 cr
,nrnb
,lambda
,dvdlambda
,&enerd
->grpp
,bFillGrid
,
1629 bDoLongRange
,bDoForces
,bSepLRF
? fr
->f_twin
: f
);
1632 fprintf(fplog
,sepdvdlformat
,"LR non-bonded",0.0,dvdlambda
);
1634 enerd
->dvdl_lin
[efptVDW
] += dvdlambda
[efptVDW
];
1635 enerd
->dvdl_lin
[efptCOUL
] += dvdlambda
[efptCOUL
];
1637 wallcycle_stop(wcycle
,ewcNS
);
1640 if (inputrec
->implicit_solvent
&& bNS
)
1642 make_gb_nblist(cr
,inputrec
->gb_algorithm
,inputrec
->rlist
,
1643 x
,box
,fr
,&top
->idef
,graph
,fr
->born
);
1646 if (DOMAINDECOMP(cr
))
1648 if (!(cr
->duty
& DUTY_PME
))
1650 wallcycle_start(wcycle
,ewcPPDURINGPME
);
1651 dd_force_flop_start(cr
->dd
,nrnb
);
1657 /* Enforced rotation has its own cycle counter that starts after the collective
1658 * coordinates have been communicated. It is added to ddCyclF to allow
1659 * for proper load-balancing */
1660 wallcycle_start(wcycle
,ewcROT
);
1661 do_rotation(cr
,inputrec
,box
,x
,t
,step
,wcycle
,bNS
);
1662 wallcycle_stop(wcycle
,ewcROT
);
1665 /* Start the force cycle counter.
1666 * This counter is stopped in do_forcelow_level.
1667 * No parallel communication should occur while this counter is running,
1668 * since that will interfere with the dynamic load balancing.
1670 wallcycle_start(wcycle
,ewcFORCE
);
1674 /* Reset forces for which the virial is calculated separately:
1675 * PME/Ewald forces if necessary */
1676 if (fr
->bF_NoVirSum
)
1678 if (flags
& GMX_FORCE_VIRIAL
)
1680 fr
->f_novirsum
= fr
->f_novirsum_alloc
;
1681 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1684 clear_rvecs(fr
->f_novirsum_n
,fr
->f_novirsum
);
1688 clear_rvecs(homenr
,fr
->f_novirsum
+start
);
1690 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1694 /* We are not calculating the pressure so we do not need
1695 * a separate array for forces that do not contribute
1704 /* Add the long range forces to the short range forces */
1705 for(i
=0; i
<fr
->natoms_force_constr
; i
++)
1707 copy_rvec(fr
->f_twin
[i
],f
[i
]);
1710 else if (!(fr
->bTwinRange
&& bNS
))
1712 /* Clear the short-range forces */
1713 clear_rvecs(fr
->natoms_force_constr
,f
);
1716 clear_rvec(fr
->vir_diag_posres
);
1718 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1720 if (inputrec
->ePull
== epullCONSTRAINT
)
1722 clear_pull_forces(inputrec
->pull
);
1725 /* update QMMMrec, if necessary */
1728 update_QMMMrec(cr
,fr
,x
,mdatoms
,box
,top
);
1731 if ((flags
& GMX_FORCE_BONDED
) && top
->idef
.il
[F_POSRES
].nr
> 0)
1733 posres_wrapper(fplog
,flags
,bSepDVDL
,inputrec
,nrnb
,top
,box
,x
,
1737 /* Compute the bonded and non-bonded energies and optionally forces */
1738 do_force_lowlevel(fplog
,step
,fr
,inputrec
,&(top
->idef
),
1739 cr
,nrnb
,wcycle
,mdatoms
,&(inputrec
->opts
),
1740 x
,hist
,f
,enerd
,fcd
,mtop
,top
,fr
->born
,
1741 &(top
->atomtypes
),bBornRadii
,box
,
1742 inputrec
->fepvals
,lambda
,
1743 graph
,&(top
->excls
),fr
->mu_tot
,
1747 cycles_force
= wallcycle_stop(wcycle
,ewcFORCE
);
1748 GMX_BARRIER(cr
->mpi_comm_mygroup
);
1752 do_flood(fplog
,cr
,x
,f
,ed
,box
,step
,bNS
);
1755 if (DOMAINDECOMP(cr
))
1757 dd_force_flop_stop(cr
->dd
,nrnb
);
1760 dd_cycles_add(cr
->dd
,cycles_force
-cycles_pme
,ddCyclF
);
1766 if (IR_ELEC_FIELD(*inputrec
))
1768 /* Compute forces due to electric field */
1769 calc_f_el(MASTER(cr
) ? field
: NULL
,
1770 start
,homenr
,mdatoms
->chargeA
,x
,fr
->f_novirsum
,
1771 inputrec
->ex
,inputrec
->et
,t
);
1774 if (bDoAdressWF
&& fr
->adress_icor
== eAdressICThermoForce
)
1776 /* Compute thermodynamic force in hybrid AdResS region */
1777 adress_thermo_force(start
,homenr
,&(top
->cgs
),x
,fr
->f_novirsum
,fr
,mdatoms
,
1778 inputrec
->ePBC
==epbcNONE
? NULL
: &pbc
);
1781 /* Communicate the forces */
1784 wallcycle_start(wcycle
,ewcMOVEF
);
1785 if (DOMAINDECOMP(cr
))
1787 dd_move_f(cr
->dd
,f
,fr
->fshift
);
1788 /* Do we need to communicate the separate force array
1789 * for terms that do not contribute to the single sum virial?
1790 * Position restraints and electric fields do not introduce
1791 * inter-cg forces, only full electrostatics methods do.
1792 * When we do not calculate the virial, fr->f_novirsum = f,
1793 * so we have already communicated these forces.
1795 if (EEL_FULL(fr
->eeltype
) && cr
->dd
->n_intercg_excl
&&
1796 (flags
& GMX_FORCE_VIRIAL
))
1798 dd_move_f(cr
->dd
,fr
->f_novirsum
,NULL
);
1802 /* We should not update the shift forces here,
1803 * since f_twin is already included in f.
1805 dd_move_f(cr
->dd
,fr
->f_twin
,NULL
);
1810 pd_move_f(cr
,f
,nrnb
);
1813 pd_move_f(cr
,fr
->f_twin
,nrnb
);
1816 wallcycle_stop(wcycle
,ewcMOVEF
);
1819 /* If we have NoVirSum forces, but we do not calculate the virial,
1820 * we sum fr->f_novirum=f later.
1822 if (vsite
&& !(fr
->bF_NoVirSum
&& !(flags
& GMX_FORCE_VIRIAL
)))
1824 wallcycle_start(wcycle
,ewcVSITESPREAD
);
1825 spread_vsite_f(fplog
,vsite
,x
,f
,fr
->fshift
,FALSE
,NULL
,nrnb
,
1826 &top
->idef
,fr
->ePBC
,fr
->bMolPBC
,graph
,box
,cr
);
1827 wallcycle_stop(wcycle
,ewcVSITESPREAD
);
1831 wallcycle_start(wcycle
,ewcVSITESPREAD
);
1832 spread_vsite_f(fplog
,vsite
,x
,fr
->f_twin
,NULL
,FALSE
,NULL
,
1834 &top
->idef
,fr
->ePBC
,fr
->bMolPBC
,graph
,box
,cr
);
1835 wallcycle_stop(wcycle
,ewcVSITESPREAD
);
1839 if (flags
& GMX_FORCE_VIRIAL
)
1841 /* Calculation of the virial must be done after vsites! */
1842 calc_virial(fplog
,mdatoms
->start
,mdatoms
->homenr
,x
,f
,
1843 vir_force
,graph
,box
,nrnb
,fr
,inputrec
->ePBC
);
1847 if (inputrec
->ePull
== epullUMBRELLA
|| inputrec
->ePull
== epullCONST_F
)
1849 pull_potential_wrapper(fplog
,bSepDVDL
,cr
,inputrec
,box
,x
,
1850 f
,vir_force
,mdatoms
,enerd
,lambda
,t
);
1853 /* Add the forces from enforced rotation potentials (if any) */
1856 wallcycle_start(wcycle
,ewcROTadd
);
1857 enerd
->term
[F_COM_PULL
] += add_rot_forces(inputrec
->rot
, f
, cr
,step
,t
);
1858 wallcycle_stop(wcycle
,ewcROTadd
);
1861 if (PAR(cr
) && !(cr
->duty
& DUTY_PME
))
1863 /* In case of node-splitting, the PP nodes receive the long-range
1864 * forces, virial and energy from the PME nodes here.
1866 pme_receive_force_ener(fplog
,bSepDVDL
,cr
,wcycle
,enerd
,fr
);
1871 post_process_forces(fplog
,cr
,step
,nrnb
,wcycle
,
1872 top
,box
,x
,f
,vir_force
,mdatoms
,graph
,fr
,vsite
,
1876 /* Sum the potential energy terms from group contributions */
1877 sum_epot(&(inputrec
->opts
),enerd
);
1880 void do_force(FILE *fplog
,t_commrec
*cr
,
1881 t_inputrec
*inputrec
,
1882 gmx_large_int_t step
,t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
1883 gmx_localtop_t
*top
,
1885 gmx_groups_t
*groups
,
1886 matrix box
,rvec x
[],history_t
*hist
,
1890 gmx_enerdata_t
*enerd
,t_fcdata
*fcd
,
1891 real
*lambda
,t_graph
*graph
,
1893 gmx_vsite_t
*vsite
,rvec mu_tot
,
1894 double t
,FILE *field
,gmx_edsam_t ed
,
1895 gmx_bool bBornRadii
,
1898 /* modify force flag if not doing nonbonded */
1899 if (!fr
->bNonbonded
)
1901 flags
&= ~GMX_FORCE_NONBONDED
;
1904 switch (inputrec
->cutoff_scheme
)
1907 do_force_cutsVERLET(fplog
, cr
, inputrec
,
1923 do_force_cutsGROUP(fplog
, cr
, inputrec
,
1938 gmx_incons("Invalid cut-off scheme passed!");
1943 void do_constrain_first(FILE *fplog
,gmx_constr_t constr
,
1944 t_inputrec
*ir
,t_mdatoms
*md
,
1945 t_state
*state
,rvec
*f
,
1946 t_graph
*graph
,t_commrec
*cr
,t_nrnb
*nrnb
,
1947 t_forcerec
*fr
, gmx_localtop_t
*top
, tensor shake_vir
)
1950 gmx_large_int_t step
;
1951 real dt
=ir
->delta_t
;
1955 snew(savex
,state
->natoms
);
1958 end
= md
->homenr
+ start
;
1961 fprintf(debug
,"vcm: start=%d, homenr=%d, end=%d\n",
1962 start
,md
->homenr
,end
);
1963 /* Do a first constrain to reset particles... */
1964 step
= ir
->init_step
;
1967 char buf
[STEPSTRSIZE
];
1968 fprintf(fplog
,"\nConstraining the starting coordinates (step %s)\n",
1969 gmx_step_str(step
,buf
));
1973 /* constrain the current position */
1974 constrain(NULL
,TRUE
,FALSE
,constr
,&(top
->idef
),
1975 ir
,NULL
,cr
,step
,0,md
,
1976 state
->x
,state
->x
,NULL
,
1977 fr
->bMolPBC
,state
->box
,
1978 state
->lambda
[efptBONDED
],&dvdl_dum
,
1979 NULL
,NULL
,nrnb
,econqCoord
,
1980 ir
->epc
==epcMTTK
,state
->veta
,state
->veta
);
1983 /* constrain the inital velocity, and save it */
1984 /* also may be useful if we need the ekin from the halfstep for velocity verlet */
1985 /* might not yet treat veta correctly */
1986 constrain(NULL
,TRUE
,FALSE
,constr
,&(top
->idef
),
1987 ir
,NULL
,cr
,step
,0,md
,
1988 state
->x
,state
->v
,state
->v
,
1989 fr
->bMolPBC
,state
->box
,
1990 state
->lambda
[efptBONDED
],&dvdl_dum
,
1991 NULL
,NULL
,nrnb
,econqVeloc
,
1992 ir
->epc
==epcMTTK
,state
->veta
,state
->veta
);
1994 /* constrain the inital velocities at t-dt/2 */
1995 if (EI_STATE_VELOCITY(ir
->eI
) && ir
->eI
!=eiVV
)
1997 for(i
=start
; (i
<end
); i
++)
1999 for(m
=0; (m
<DIM
); m
++)
2001 /* Reverse the velocity */
2002 state
->v
[i
][m
] = -state
->v
[i
][m
];
2003 /* Store the position at t-dt in buf */
2004 savex
[i
][m
] = state
->x
[i
][m
] + dt
*state
->v
[i
][m
];
2007 /* Shake the positions at t=-dt with the positions at t=0
2008 * as reference coordinates.
2012 char buf
[STEPSTRSIZE
];
2013 fprintf(fplog
,"\nConstraining the coordinates at t0-dt (step %s)\n",
2014 gmx_step_str(step
,buf
));
2017 constrain(NULL
,TRUE
,FALSE
,constr
,&(top
->idef
),
2018 ir
,NULL
,cr
,step
,-1,md
,
2019 state
->x
,savex
,NULL
,
2020 fr
->bMolPBC
,state
->box
,
2021 state
->lambda
[efptBONDED
],&dvdl_dum
,
2022 state
->v
,NULL
,nrnb
,econqCoord
,
2023 ir
->epc
==epcMTTK
,state
->veta
,state
->veta
);
2025 for(i
=start
; i
<end
; i
++) {
2026 for(m
=0; m
<DIM
; m
++) {
2027 /* Re-reverse the velocities */
2028 state
->v
[i
][m
] = -state
->v
[i
][m
];
2035 void calc_enervirdiff(FILE *fplog
,int eDispCorr
,t_forcerec
*fr
)
2037 double eners
[2],virs
[2],enersum
,virsum
,y0
,f
,g
,h
;
2038 double r0
,r1
,r
,rc3
,rc9
,ea
,eb
,ec
,pa
,pb
,pc
,pd
;
2039 double invscale
,invscale2
,invscale3
;
2040 int ri0
,ri1
,ri
,i
,offstart
,offset
;
2043 fr
->enershiftsix
= 0;
2044 fr
->enershifttwelve
= 0;
2045 fr
->enerdiffsix
= 0;
2046 fr
->enerdifftwelve
= 0;
2048 fr
->virdifftwelve
= 0;
2050 if (eDispCorr
!= edispcNO
) {
2051 for(i
=0; i
<2; i
++) {
2055 if ((fr
->vdwtype
== evdwSWITCH
) || (fr
->vdwtype
== evdwSHIFT
)) {
2056 if (fr
->rvdw_switch
== 0)
2058 "With dispersion correction rvdw-switch can not be zero "
2059 "for vdw-type = %s",evdw_names
[fr
->vdwtype
]);
2061 scale
= fr
->nblists
[0].tab
.scale
;
2062 vdwtab
= fr
->nblists
[0].vdwtab
;
2064 /* Round the cut-offs to exact table values for precision */
2065 ri0
= floor(fr
->rvdw_switch
*scale
);
2066 ri1
= ceil(fr
->rvdw
*scale
);
2072 if (fr
->vdwtype
== evdwSHIFT
) {
2073 /* Determine the constant energy shift below rvdw_switch */
2074 fr
->enershiftsix
= (real
)(-1.0/(rc3
*rc3
)) - vdwtab
[8*ri0
];
2075 fr
->enershifttwelve
= (real
)( 1.0/(rc9
*rc3
)) - vdwtab
[8*ri0
+ 4];
2077 /* Add the constant part from 0 to rvdw_switch.
2078 * This integration from 0 to rvdw_switch overcounts the number
2079 * of interactions by 1, as it also counts the self interaction.
2080 * We will correct for this later.
2082 eners
[0] += 4.0*M_PI
*fr
->enershiftsix
*rc3
/3.0;
2083 eners
[1] += 4.0*M_PI
*fr
->enershifttwelve
*rc3
/3.0;
2085 invscale
= 1.0/(scale
);
2086 invscale2
= invscale
*invscale
;
2087 invscale3
= invscale
*invscale2
;
2089 /* following summation derived from cubic spline definition,
2090 Numerical Recipies in C, second edition, p. 113-116. Exact
2091 for the cubic spline. We first calculate the negative of
2092 the energy from rvdw to rvdw_switch, assuming that g(r)=1,
2093 and then add the more standard, abrupt cutoff correction to
2094 that result, yielding the long-range correction for a
2095 switched function. We perform both the pressure and energy
2096 loops at the same time for simplicity, as the computational
2100 enersum
= 0.0; virsum
= 0.0;
2105 for (ri
=ri0
; ri
<ri1
; ri
++) {
2108 eb
= 2.0*invscale2
*r
;
2112 pb
= 3.0*invscale2
*r
;
2113 pc
= 3.0*invscale
*r
*r
;
2116 /* this "8" is from the packing in the vdwtab array - perhaps
2117 should be #define'ed? */
2118 offset
= 8*ri
+ offstart
;
2119 y0
= vdwtab
[offset
];
2120 f
= vdwtab
[offset
+1];
2121 g
= vdwtab
[offset
+2];
2122 h
= vdwtab
[offset
+3];
2124 enersum
+= y0
*(ea
/3 + eb
/2 + ec
) + f
*(ea
/4 + eb
/3 + ec
/2)+
2125 g
*(ea
/5 + eb
/4 + ec
/3) + h
*(ea
/6 + eb
/5 + ec
/4);
2126 virsum
+= f
*(pa
/4 + pb
/3 + pc
/2 + pd
) +
2127 2*g
*(pa
/5 + pb
/4 + pc
/3 + pd
/2) + 3*h
*(pa
/6 + pb
/5 + pc
/4 + pd
/3);
2130 enersum
*= 4.0*M_PI
;
2132 eners
[i
] -= enersum
;
2136 /* now add the correction for rvdw_switch to infinity */
2137 eners
[0] += -4.0*M_PI
/(3.0*rc3
);
2138 eners
[1] += 4.0*M_PI
/(9.0*rc9
);
2139 virs
[0] += 8.0*M_PI
/rc3
;
2140 virs
[1] += -16.0*M_PI
/(3.0*rc9
);
2142 else if ((fr
->vdwtype
== evdwCUT
) || (fr
->vdwtype
== evdwUSER
)) {
2143 if (fr
->vdwtype
== evdwUSER
&& fplog
)
2145 "WARNING: using dispersion correction with user tables\n");
2146 rc3
= fr
->rvdw
*fr
->rvdw
*fr
->rvdw
;
2148 /* Contribution beyond the cut-off */
2149 eners
[0] += -4.0*M_PI
/(3.0*rc3
);
2150 eners
[1] += 4.0*M_PI
/(9.0*rc9
);
2151 if (fr
->cutoff_scheme
== ecutsVERLET
&& fr
->ic
->sh_invrc6
!= 0) {
2152 /* Contribution within the cut-off */
2153 eners
[0] += -4.0*M_PI
/(3.0*rc3
);
2154 eners
[1] += 4.0*M_PI
/(3.0*rc9
);
2156 /* Contribution beyond the cut-off */
2157 virs
[0] += 8.0*M_PI
/rc3
;
2158 virs
[1] += -16.0*M_PI
/(3.0*rc9
);
2161 "Dispersion correction is not implemented for vdw-type = %s",
2162 evdw_names
[fr
->vdwtype
]);
2164 fr
->enerdiffsix
= eners
[0];
2165 fr
->enerdifftwelve
= eners
[1];
2166 /* The 0.5 is due to the Gromacs definition of the virial */
2167 fr
->virdiffsix
= 0.5*virs
[0];
2168 fr
->virdifftwelve
= 0.5*virs
[1];
2172 void calc_dispcorr(FILE *fplog
,t_inputrec
*ir
,t_forcerec
*fr
,
2173 gmx_large_int_t step
,int natoms
,
2174 matrix box
,real lambda
,tensor pres
,tensor virial
,
2175 real
*prescorr
, real
*enercorr
, real
*dvdlcorr
)
2177 gmx_bool bCorrAll
,bCorrPres
;
2178 real dvdlambda
,invvol
,dens
,ninter
,avcsix
,avctwelve
,enerdiff
,svir
=0,spres
=0;
2188 if (ir
->eDispCorr
!= edispcNO
) {
2189 bCorrAll
= (ir
->eDispCorr
== edispcAllEner
||
2190 ir
->eDispCorr
== edispcAllEnerPres
);
2191 bCorrPres
= (ir
->eDispCorr
== edispcEnerPres
||
2192 ir
->eDispCorr
== edispcAllEnerPres
);
2194 invvol
= 1/det(box
);
2197 /* Only correct for the interactions with the inserted molecule */
2198 dens
= (natoms
- fr
->n_tpi
)*invvol
;
2203 dens
= natoms
*invvol
;
2204 ninter
= 0.5*natoms
;
2207 if (ir
->efep
== efepNO
)
2209 avcsix
= fr
->avcsix
[0];
2210 avctwelve
= fr
->avctwelve
[0];
2214 avcsix
= (1 - lambda
)*fr
->avcsix
[0] + lambda
*fr
->avcsix
[1];
2215 avctwelve
= (1 - lambda
)*fr
->avctwelve
[0] + lambda
*fr
->avctwelve
[1];
2218 enerdiff
= ninter
*(dens
*fr
->enerdiffsix
- fr
->enershiftsix
);
2219 *enercorr
+= avcsix
*enerdiff
;
2221 if (ir
->efep
!= efepNO
)
2223 dvdlambda
+= (fr
->avcsix
[1] - fr
->avcsix
[0])*enerdiff
;
2227 enerdiff
= ninter
*(dens
*fr
->enerdifftwelve
- fr
->enershifttwelve
);
2228 *enercorr
+= avctwelve
*enerdiff
;
2229 if (fr
->efep
!= efepNO
)
2231 dvdlambda
+= (fr
->avctwelve
[1] - fr
->avctwelve
[0])*enerdiff
;
2237 svir
= ninter
*dens
*avcsix
*fr
->virdiffsix
/3.0;
2238 if (ir
->eDispCorr
== edispcAllEnerPres
)
2240 svir
+= ninter
*dens
*avctwelve
*fr
->virdifftwelve
/3.0;
2242 /* The factor 2 is because of the Gromacs virial definition */
2243 spres
= -2.0*invvol
*svir
*PRESFAC
;
2245 for(m
=0; m
<DIM
; m
++) {
2246 virial
[m
][m
] += svir
;
2247 pres
[m
][m
] += spres
;
2252 /* Can't currently control when it prints, for now, just print when degugging */
2256 fprintf(debug
,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
2262 "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
2263 *enercorr
,spres
,svir
);
2267 fprintf(debug
,"Long Range LJ corr.: Epot %10g\n",*enercorr
);
2271 if (fr
->bSepDVDL
&& do_per_step(step
,ir
->nstlog
))
2273 fprintf(fplog
,sepdvdlformat
,"Dispersion correction",
2274 *enercorr
,dvdlambda
);
2276 if (fr
->efep
!= efepNO
)
2278 *dvdlcorr
+= dvdlambda
;
2283 void do_pbc_first(FILE *fplog
,matrix box
,t_forcerec
*fr
,
2284 t_graph
*graph
,rvec x
[])
2287 fprintf(fplog
,"Removing pbc first time\n");
2288 calc_shifts(box
,fr
->shift_vec
);
2290 mk_mshift(fplog
,graph
,fr
->ePBC
,box
,x
);
2292 p_graph(debug
,"do_pbc_first 1",graph
);
2293 shift_self(graph
,box
,x
);
2294 /* By doing an extra mk_mshift the molecules that are broken
2295 * because they were e.g. imported from another software
2296 * will be made whole again. Such are the healing powers
2299 mk_mshift(fplog
,graph
,fr
->ePBC
,box
,x
);
2301 p_graph(debug
,"do_pbc_first 2",graph
);
2304 fprintf(fplog
,"Done rmpbc\n");
2307 static void low_do_pbc_mtop(FILE *fplog
,int ePBC
,matrix box
,
2308 gmx_mtop_t
*mtop
,rvec x
[],
2313 gmx_molblock_t
*molb
;
2315 if (bFirst
&& fplog
)
2316 fprintf(fplog
,"Removing pbc first time\n");
2320 for(mb
=0; mb
<mtop
->nmolblock
; mb
++) {
2321 molb
= &mtop
->molblock
[mb
];
2322 if (molb
->natoms_mol
== 1 ||
2323 (!bFirst
&& mtop
->moltype
[molb
->type
].cgs
.nr
== 1)) {
2324 /* Just one atom or charge group in the molecule, no PBC required */
2325 as
+= molb
->nmol
*molb
->natoms_mol
;
2327 /* Pass NULL iso fplog to avoid graph prints for each molecule type */
2328 mk_graph_ilist(NULL
,mtop
->moltype
[molb
->type
].ilist
,
2329 0,molb
->natoms_mol
,FALSE
,FALSE
,graph
);
2331 for(mol
=0; mol
<molb
->nmol
; mol
++) {
2332 mk_mshift(fplog
,graph
,ePBC
,box
,x
+as
);
2334 shift_self(graph
,box
,x
+as
);
2335 /* The molecule is whole now.
2336 * We don't need the second mk_mshift call as in do_pbc_first,
2337 * since we no longer need this graph.
2340 as
+= molb
->natoms_mol
;
2348 void do_pbc_first_mtop(FILE *fplog
,int ePBC
,matrix box
,
2349 gmx_mtop_t
*mtop
,rvec x
[])
2351 low_do_pbc_mtop(fplog
,ePBC
,box
,mtop
,x
,TRUE
);
2354 void do_pbc_mtop(FILE *fplog
,int ePBC
,matrix box
,
2355 gmx_mtop_t
*mtop
,rvec x
[])
2357 low_do_pbc_mtop(fplog
,ePBC
,box
,mtop
,x
,FALSE
);
2360 void finish_run(FILE *fplog
,t_commrec
*cr
,const char *confout
,
2361 t_inputrec
*inputrec
,
2362 t_nrnb nrnb
[],gmx_wallcycle_t wcycle
,
2363 gmx_runtime_t
*runtime
,
2364 wallclock_gpu_t
*gputimes
,
2366 gmx_bool bWriteStat
)
2369 t_nrnb
*nrnb_tot
=NULL
;
2373 wallcycle_sum(cr
,wcycle
);
2379 MPI_Allreduce(nrnb
->n
,nrnb_tot
->n
,eNRNB
,MPI_DOUBLE
,MPI_SUM
,
2380 cr
->mpi_comm_mysim
);
2388 #if defined(GMX_MPI) && !defined(GMX_THREAD_MPI)
2391 /* reduce nodetime over all MPI processes in the current simulation */
2393 MPI_Allreduce(&runtime
->proctime
,&sum
,1,MPI_DOUBLE
,MPI_SUM
,
2394 cr
->mpi_comm_mysim
);
2395 runtime
->proctime
= sum
;
2401 print_flop(fplog
,nrnb_tot
,&nbfs
,&mflop
);
2408 if ((cr
->duty
& DUTY_PP
) && DOMAINDECOMP(cr
))
2410 print_dd_statistics(cr
,inputrec
,fplog
);
2422 snew(nrnb_all
,cr
->nnodes
);
2423 nrnb_all
[0] = *nrnb
;
2424 for(s
=1; s
<cr
->nnodes
; s
++)
2426 MPI_Recv(nrnb_all
[s
].n
,eNRNB
,MPI_DOUBLE
,s
,0,
2427 cr
->mpi_comm_mysim
,&stat
);
2429 pr_load(fplog
,cr
,nrnb_all
);
2434 MPI_Send(nrnb
->n
,eNRNB
,MPI_DOUBLE
,MASTERRANK(cr
),0,
2435 cr
->mpi_comm_mysim
);
2442 wallcycle_print(fplog
,cr
->nnodes
,cr
->npmenodes
,runtime
->realtime
,
2445 if (EI_DYNAMICS(inputrec
->eI
))
2447 delta_t
= inputrec
->delta_t
;
2456 print_perf(fplog
,runtime
->proctime
,runtime
->realtime
,
2457 cr
->nnodes
-cr
->npmenodes
,
2458 runtime
->nsteps_done
,delta_t
,nbfs
,mflop
,
2463 print_perf(stderr
,runtime
->proctime
,runtime
->realtime
,
2464 cr
->nnodes
-cr
->npmenodes
,
2465 runtime
->nsteps_done
,delta_t
,nbfs
,mflop
,
2471 extern void initialize_lambdas(FILE *fplog
,t_inputrec
*ir
,int *fep_state
,real
*lambda
,double *lam0
)
2473 /* this function works, but could probably use a logic rewrite to keep all the different
2474 types of efep straight. */
2477 t_lambda
*fep
= ir
->fepvals
;
2479 if ((ir
->efep
==efepNO
) && (ir
->bSimTemp
== FALSE
)) {
2480 for (i
=0;i
<efptNR
;i
++) {
2489 *fep_state
= fep
->init_fep_state
; /* this might overwrite the checkpoint
2490 if checkpoint is set -- a kludge is in for now
2492 for (i
=0;i
<efptNR
;i
++)
2494 /* overwrite lambda state with init_lambda for now for backwards compatibility */
2495 if (fep
->init_lambda
>=0) /* if it's -1, it was never initializd */
2497 lambda
[i
] = fep
->init_lambda
;
2499 lam0
[i
] = lambda
[i
];
2504 lambda
[i
] = fep
->all_lambda
[i
][*fep_state
];
2506 lam0
[i
] = lambda
[i
];
2511 /* need to rescale control temperatures to match current state */
2512 for (i
=0;i
<ir
->opts
.ngtc
;i
++) {
2513 if (ir
->opts
.ref_t
[i
] > 0) {
2514 ir
->opts
.ref_t
[i
] = ir
->simtempvals
->temperatures
[*fep_state
];
2520 /* Send to the log the information on the current lambdas */
2523 fprintf(fplog
,"Initial vector of lambda components:[ ");
2524 for (i
=0;i
<efptNR
;i
++)
2526 fprintf(fplog
,"%10.4f ",lambda
[i
]);
2528 fprintf(fplog
,"]\n");
2534 void init_md(FILE *fplog
,
2535 t_commrec
*cr
,t_inputrec
*ir
,const output_env_t oenv
,
2536 double *t
,double *t0
,
2537 real
*lambda
, int *fep_state
, double *lam0
,
2538 t_nrnb
*nrnb
,gmx_mtop_t
*mtop
,
2540 int nfile
,const t_filenm fnm
[],
2541 gmx_mdoutf_t
**outf
,t_mdebin
**mdebin
,
2542 tensor force_vir
,tensor shake_vir
,rvec mu_tot
,
2543 gmx_bool
*bSimAnn
,t_vcm
**vcm
, t_state
*state
, unsigned long Flags
)
2548 /* Initial values */
2549 *t
= *t0
= ir
->init_t
;
2552 for(i
=0;i
<ir
->opts
.ngtc
;i
++)
2554 /* set bSimAnn if any group is being annealed */
2555 if(ir
->opts
.annealing
[i
]!=eannNO
)
2562 update_annealing_target_temp(&(ir
->opts
),ir
->init_t
);
2565 /* Initialize lambda variables */
2566 initialize_lambdas(fplog
,ir
,fep_state
,lambda
,lam0
);
2570 *upd
= init_update(fplog
,ir
);
2576 *vcm
= init_vcm(fplog
,&mtop
->groups
,ir
);
2579 if (EI_DYNAMICS(ir
->eI
) && !(Flags
& MD_APPENDFILES
))
2581 if (ir
->etc
== etcBERENDSEN
)
2583 please_cite(fplog
,"Berendsen84a");
2585 if (ir
->etc
== etcVRESCALE
)
2587 please_cite(fplog
,"Bussi2007a");
2595 *outf
= init_mdoutf(nfile
,fnm
,Flags
,cr
,ir
,oenv
);
2597 *mdebin
= init_mdebin((Flags
& MD_APPENDFILES
) ? NULL
: (*outf
)->fp_ene
,
2598 mtop
,ir
, (*outf
)->fp_dhdl
);
2603 please_cite(fplog
,"Fritsch12");
2604 please_cite(fplog
,"Junghans10");
2606 /* Initiate variables */
2607 clear_mat(force_vir
);
2608 clear_mat(shake_vir
);