1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
40 #ifdef GMX_THREAD_SHM_FDECOMP
41 #include <thread_mpi.h>
58 #include "gmx_fatal.h"
64 #include "nonbonded.h"
66 #include "nb_kernel_c/nb_kernel_c.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
72 /* 1,4 interactions uses kernel 330 directly */
73 #include "nb_kernel_c/nb_kernel330.h"
75 #ifdef GMX_PPC_ALTIVEC
76 #include "nb_kernel_ppc_altivec/nb_kernel_ppc_altivec.h"
79 #if defined(GMX_IA32_SSE)
80 #include "nb_kernel_ia32_sse/nb_kernel_ia32_sse.h"
83 #if defined(GMX_IA32_SSE2)
84 #include "nb_kernel_ia32_sse2/nb_kernel_ia32_sse2.h"
87 #if defined(GMX_X86_64_SSE)
88 #include "nb_kernel_x86_64_sse/nb_kernel_x86_64_sse.h"
91 #if defined(GMX_X86_64_SSE2)
92 #include "nb_kernel_x86_64_sse2/nb_kernel_x86_64_sse2.h"
97 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
99 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
103 #if defined(GMX_FORTRAN)
105 # include "nb_kernel_f77_double/nb_kernel_f77_double.h"
107 # include "nb_kernel_f77_single/nb_kernel_f77_single.h"
111 #if (defined GMX_IA64_ASM && defined GMX_DOUBLE)
112 #include "nb_kernel_ia64_double/nb_kernel_ia64_double.h"
115 #if (defined GMX_IA64_ASM && !defined GMX_DOUBLE)
116 #include "nb_kernel_ia64_single/nb_kernel_ia64_single.h"
120 #include "nb_kernel_power6/nb_kernel_power6.h"
124 #include "nb_kernel_bluegene/nb_kernel_bluegene.h"
129 enum { TABLE_NONE
, TABLE_COMBINED
, TABLE_COUL
, TABLE_VDW
, TABLE_NR
};
132 /* Table version for each kernel.
135 nb_kernel_table
[eNR_NBKERNEL_NR
] =
137 TABLE_NONE
, /* kernel010 */
138 TABLE_NONE
, /* kernel020 */
139 TABLE_VDW
, /* kernel030 */
140 TABLE_NONE
, /* kernel100 */
141 TABLE_NONE
, /* kernel101 */
142 TABLE_NONE
, /* kernel102 */
143 TABLE_NONE
, /* kernel103 */
144 TABLE_NONE
, /* kernel104 */
145 TABLE_NONE
, /* kernel110 */
146 TABLE_NONE
, /* kernel111 */
147 TABLE_NONE
, /* kernel112 */
148 TABLE_NONE
, /* kernel113 */
149 TABLE_NONE
, /* kernel114 */
150 TABLE_NONE
, /* kernel120 */
151 TABLE_NONE
, /* kernel121 */
152 TABLE_NONE
, /* kernel122 */
153 TABLE_NONE
, /* kernel123 */
154 TABLE_NONE
, /* kernel124 */
155 TABLE_VDW
, /* kernel130 */
156 TABLE_VDW
, /* kernel131 */
157 TABLE_VDW
, /* kernel132 */
158 TABLE_VDW
, /* kernel133 */
159 TABLE_VDW
, /* kernel134 */
160 TABLE_NONE
, /* kernel200 */
161 TABLE_NONE
, /* kernel201 */
162 TABLE_NONE
, /* kernel202 */
163 TABLE_NONE
, /* kernel203 */
164 TABLE_NONE
, /* kernel204 */
165 TABLE_NONE
, /* kernel210 */
166 TABLE_NONE
, /* kernel211 */
167 TABLE_NONE
, /* kernel212 */
168 TABLE_NONE
, /* kernel213 */
169 TABLE_NONE
, /* kernel214 */
170 TABLE_NONE
, /* kernel220 */
171 TABLE_NONE
, /* kernel221 */
172 TABLE_NONE
, /* kernel222 */
173 TABLE_NONE
, /* kernel223 */
174 TABLE_NONE
, /* kernel224 */
175 TABLE_VDW
, /* kernel230 */
176 TABLE_VDW
, /* kernel231 */
177 TABLE_VDW
, /* kernel232 */
178 TABLE_VDW
, /* kernel233 */
179 TABLE_VDW
, /* kernel234 */
180 TABLE_COUL
, /* kernel300 */
181 TABLE_COUL
, /* kernel301 */
182 TABLE_COUL
, /* kernel302 */
183 TABLE_COUL
, /* kernel303 */
184 TABLE_COUL
, /* kernel304 */
185 TABLE_COUL
, /* kernel310 */
186 TABLE_COUL
, /* kernel311 */
187 TABLE_COUL
, /* kernel312 */
188 TABLE_COUL
, /* kernel313 */
189 TABLE_COUL
, /* kernel314 */
190 TABLE_COUL
, /* kernel320 */
191 TABLE_COUL
, /* kernel321 */
192 TABLE_COUL
, /* kernel322 */
193 TABLE_COUL
, /* kernel323 */
194 TABLE_COUL
, /* kernel324 */
195 TABLE_COMBINED
, /* kernel330 */
196 TABLE_COMBINED
, /* kernel331 */
197 TABLE_COMBINED
, /* kernel332 */
198 TABLE_COMBINED
, /* kernel333 */
199 TABLE_COMBINED
, /* kernel334 */
200 TABLE_NONE
, /* kernel400 */
201 TABLE_NONE
, /* kernel410 */
202 TABLE_VDW
/* kernel430 */
207 static nb_kernel_t
**
208 nb_kernel_list
= NULL
;
212 gmx_setup_kernels(FILE *fplog
,bool bGenericKernelOnly
)
216 snew(nb_kernel_list
,eNR_NBKERNEL_NR
);
218 /* Note that later calls overwrite earlier, so the preferred (fastest)
219 * version should be at the end. For instance, we call SSE after 3DNow.
222 for(i
=0; i
<eNR_NBKERNEL_NR
; i
++)
224 nb_kernel_list
[i
] = NULL
;
227 if (bGenericKernelOnly
)
234 fprintf(fplog
,"Configuring nonbonded kernels...\n");
237 nb_kernel_setup(fplog
,nb_kernel_list
);
239 if(getenv("GMX_NOOPTIMIZEDKERNELS") != NULL
)
244 /* Setup kernels. The last called setup routine will overwrite earlier assignments,
245 * so we should e.g. test SSE3 support _after_ SSE2 support,
246 * and call e.g. Fortran setup before SSE.
249 #if defined(GMX_FORTRAN) && defined(GMX_DOUBLE)
250 nb_kernel_setup_f77_double(fplog
,nb_kernel_list
);
253 #if defined(GMX_FORTRAN) && !defined(GMX_DOUBLE)
254 nb_kernel_setup_f77_single(fplog
,nb_kernel_list
);
258 nb_kernel_setup_bluegene(fplog
,nb_kernel_list
);
262 nb_kernel_setup_power6(fplog
,nb_kernel_list
);
265 #ifdef GMX_PPC_ALTIVEC
266 nb_kernel_setup_ppc_altivec(fplog
,nb_kernel_list
);
269 #if defined(GMX_IA32_SSE)
270 nb_kernel_setup_ia32_sse(fplog
,nb_kernel_list
);
273 #if defined(GMX_IA32_SSE2)
274 nb_kernel_setup_ia32_sse2(fplog
,nb_kernel_list
);
277 #if defined(GMX_X86_64_SSE)
278 nb_kernel_setup_x86_64_sse(fplog
,nb_kernel_list
);
281 #if defined(GMX_X86_64_SSE2)
282 nb_kernel_setup_x86_64_sse2(fplog
,nb_kernel_list
);
285 #if (defined GMX_IA64_ASM && defined GMX_DOUBLE)
286 nb_kernel_setup_ia64_double(fplog
,nb_kernel_list
);
289 #if (defined GMX_IA64_ASM && !defined GMX_DOUBLE)
290 nb_kernel_setup_ia64_single(fplog
,nb_kernel_list
);
295 fprintf(fplog
,"\n\n");
300 void do_nonbonded(t_commrec
*cr
,t_forcerec
*fr
,
301 rvec x
[],rvec f
[],t_mdatoms
*mdatoms
,t_blocka
*excl
,
302 real egnb
[],real egcoul
[],real egpol
[],rvec box_size
,
303 t_nrnb
*nrnb
,real lambda
,real
*dvdlambda
,
304 int nls
,int eNL
,int flags
)
306 bool bLR
,bDoForces
,bForeignLambda
;
309 int n
,n0
,n1
,i
,i0
,i1
,nrnb_ind
,sz
;
312 nb_kernel_t
* kernelptr
;
317 int outeriter
,inneriter
;
318 real
* tabledata
= NULL
;
321 bLR
= (flags
& GMX_DONB_LR
);
322 bDoForces
= (flags
& GMX_DONB_FORCES
);
323 bForeignLambda
= (flags
& GMX_DONB_FOREIGNLAMBDA
);
325 gbdata
.gb_epsilon_solvent
= fr
->gb_epsilon_solvent
;
326 gbdata
.epsilon_r
= fr
->epsilon_r
;
333 #if (defined GMX_SSE2 || defined GMX_X86_64_SSE || defined GMX_X86_64_SSE2 || defined GMX_IA32_SSE || defined GMX_IA32_SSE2)
335 if(fr
->UseOptimizedKernels
)
337 nb_kernel_allvsallgb_sse2_double(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,egpol
,
338 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
342 nb_kernel_allvsallgb(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,egpol
,
343 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
345 # else /* not double */
346 if(fr
->UseOptimizedKernels
)
348 nb_kernel_allvsallgb_sse2_single(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,egpol
,
349 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
353 nb_kernel_allvsallgb(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,egpol
,
354 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
356 # endif /* double/single alt. */
357 #else /* no SSE support compiled in */
358 nb_kernel_allvsallgb(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,egpol
,
359 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
361 inc_nrnb(nrnb
,eNR_NBKERNEL_ALLVSALLGB
,inneriter
);
365 #if (defined GMX_SSE2 || defined GMX_X86_64_SSE || defined GMX_X86_64_SSE2 || defined GMX_IA32_SSE || defined GMX_IA32_SSE2)
367 if(fr
->UseOptimizedKernels
)
369 nb_kernel_allvsall_sse2_double(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,
370 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
374 nb_kernel_allvsall(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,
375 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
378 # else /* not double */
379 if(fr
->UseOptimizedKernels
)
381 nb_kernel_allvsall_sse2_single(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,
382 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
386 nb_kernel_allvsall(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,
387 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
390 # endif /* double/single check */
391 #else /* No SSE2 support compiled in */
392 nb_kernel_allvsall(fr
,mdatoms
,excl
,x
[0],f
[0],egcoul
,egnb
,
393 &outeriter
,&inneriter
,&fr
->AllvsAll_work
);
396 inc_nrnb(nrnb
,eNR_NBKERNEL_ALLVSALL
,inneriter
);
398 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,outeriter
);
424 if(nb_kernel_list
== NULL
)
426 gmx_fatal(FARGS
,"gmx_setup_kernels has not been called");
429 fshift
= fr
->fshift
[0];
431 for(n
=n0
; (n
<n1
); n
++)
433 nblists
= &fr
->nblists
[n
];
434 for(i
=i0
; (i
<i1
); i
++)
436 outeriter
= inneriter
= 0;
440 nlist
= &(nblists
->nlist_lr
[i
]);
444 nlist
= &(nblists
->nlist_sr
[i
]);
449 nrnb_ind
= nlist
->il_code
;
451 if(nrnb_ind
==eNR_NBKERNEL_FREE_ENERGY
)
453 /* generic free energy, use combined table */
454 tabledata
= nblists
->tab
.tab
;
460 /* We don't need the non-perturbed interactions */
464 tabletype
= nb_kernel_table
[nrnb_ind
];
466 /* normal kernels, not free energy */
469 nrnb_ind
+= eNR_NBKERNEL_NR
/2;
472 if(tabletype
== TABLE_COMBINED
)
474 tabledata
= nblists
->tab
.tab
;
476 else if(tabletype
== TABLE_COUL
)
478 tabledata
= nblists
->coultab
;
480 else if(tabletype
== TABLE_VDW
)
482 tabledata
= nblists
->vdwtab
;
492 if(nlist
->free_energy
)
496 gmx_fatal(FARGS
,"Cannot do free energy Buckingham interactions.");
499 gmx_nb_free_energy_kernel(nlist
->icoul
,
535 else if (nlist
->enlist
== enlistCG_CG
)
537 /* Call the charge group based inner loop */
538 gmx_nb_generic_cg_kernel(nlist
,
553 /* Not free energy */
555 kernelptr
= nb_kernel_list
[nrnb_ind
];
557 if (kernelptr
== NULL
)
559 /* Call a generic nonbonded kernel */
561 /* If you want to hack/test your own interactions,
562 * do it in this routine and make sure it is called
563 * by setting the environment variable GMX_NB_GENERIC.
565 gmx_nb_generic_kernel(nlist
,
580 /* Call nonbonded kernel from function pointer */
582 (*kernelptr
)( &(nlist
->nri
),
601 &(nblists
->tab
.scale
),
616 /* Update flop accounting */
618 /* Outer loop in kernel */
619 switch (nlist
->enlist
) {
620 case enlistATOM_ATOM
: fac
= 1; break;
621 case enlistSPC_ATOM
: fac
= 3; break;
622 case enlistSPC_SPC
: fac
= 9; break;
623 case enlistTIP4P_ATOM
: fac
= 4; break;
624 case enlistTIP4P_TIP4P
: fac
= 16; break;
625 case enlistCG_CG
: fac
= 1; break;
627 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,fac
*outeriter
);
629 /* inner loop in kernel */
630 inc_nrnb(nrnb
,nrnb_ind
,inneriter
);
638 do_listed_vdw_q(int ftype
,int nbonds
,
639 const t_iatom iatoms
[],const t_iparams iparams
[],
640 const rvec x
[],rvec f
[],rvec fshift
[],
641 const t_pbc
*pbc
,const t_graph
*g
,
642 real lambda
,real
*dvdlambda
,
644 const t_forcerec
*fr
,gmx_grppairener_t
*grppener
,
645 int *global_atom_index
)
647 static bool bWarn
=FALSE
;
648 real eps
,r2
,*tab
,rtab2
=0;
649 rvec dx
,x14
[2],f14
[2];
651 int typeA
[2]={0,0},typeB
[2]={0,1};
652 real chargeA
[2]={0,0},chargeB
[2];
653 int gid
,shift_vir
,shift_f
;
654 int j_index
[] = { 0, 1 };
657 int outeriter
,inneriter
;
660 real krf
,crf
,tabscale
;
663 real
*egnb
=NULL
,*egcoul
=NULL
;
666 bool bMolPBC
,bFreeEnergy
;
668 #if GMX_THREAD_SHM_FDECOMP
675 #if GMX_THREAD_SHM_FDECOMP
676 pthread_mutex_initialize(&mtx
);
679 bMolPBC
= fr
->bMolPBC
;
684 eps
= fr
->epsfac
*fr
->fudgeQQ
;
686 egnb
= grppener
->ener
[egLJ14
];
687 egcoul
= grppener
->ener
[egCOUL14
];
692 egnb
= grppener
->ener
[egLJSR
];
693 egcoul
= grppener
->ener
[egCOULSR
];
696 gmx_fatal(FARGS
,"Unknown function type %d in do_nonbonded14",
700 rtab2
= sqr(fr
->tab14
.r
);
701 tabscale
= fr
->tab14
.scale
;
706 /* Determine the values for icoul/ivdw. */
710 else if(fr
->bcoultab
)
714 else if(fr
->eeltype
== eelRF_NEC
)
737 /* We don't do SSE or altivec here, due to large overhead for 4-fold
738 * unrolling on short lists
742 for(i
=0; (i
<nbonds
); )
747 gid
= GID(md
->cENER
[ai
],md
->cENER
[aj
],md
->nenergrp
);
752 (fr
->efep
!= efepNO
&&
753 ((md
->nPerturbed
&& (md
->bPerturbed
[ai
] || md
->bPerturbed
[aj
])) ||
754 iparams
[itype
].lj14
.c6A
!= iparams
[itype
].lj14
.c6B
||
755 iparams
[itype
].lj14
.c12A
!= iparams
[itype
].lj14
.c12B
));
756 chargeA
[0] = md
->chargeA
[ai
];
757 chargeA
[1] = md
->chargeA
[aj
];
758 nbfp
= (real
*)&(iparams
[itype
].lj14
.c6A
);
761 eps
= fr
->epsfac
*iparams
[itype
].ljc14
.fqq
;
762 chargeA
[0] = iparams
[itype
].ljc14
.qi
;
763 chargeA
[1] = iparams
[itype
].ljc14
.qj
;
764 nbfp
= (real
*)&(iparams
[itype
].ljc14
.c6
);
767 chargeA
[0] = iparams
[itype
].ljcnb
.qi
;
768 chargeA
[1] = iparams
[itype
].ljcnb
.qj
;
769 nbfp
= (real
*)&(iparams
[itype
].ljcnb
.c6
);
775 /* This is a bonded interaction, atoms are in the same box */
777 r2
= distance2(x
[ai
],x
[aj
]);
781 /* Apply full periodic boundary conditions */
782 shift_f
= pbc_dx_aiuc(pbc
,x
[ai
],x
[aj
],dx
);
790 fprintf(stderr
,"Warning: 1-4 interaction between %d and %d "
791 "at distance %.3f which is larger than the 1-4 table size %.3f nm\n",
792 glatnr(global_atom_index
,ai
),
793 glatnr(global_atom_index
,aj
),
794 sqrt(r2
), sqrt(rtab2
));
795 fprintf(stderr
,"These are ignored for the rest of the simulation\n");
796 fprintf(stderr
,"This usually means your system is exploding,\n"
797 "if not, you should increase table-extension in your mdp file\n"
798 "or with user tables increase the table size\n");
802 fprintf(debug
,"%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
803 x
[ai
][XX
],x
[ai
][YY
],x
[ai
][ZZ
],
804 x
[aj
][XX
],x
[aj
][YY
],x
[aj
][ZZ
],
805 glatnr(global_atom_index
,ai
),
806 glatnr(global_atom_index
,aj
),
811 copy_rvec(x
[ai
],x14
[0]);
812 copy_rvec(x
[aj
],x14
[1]);
816 fprintf(debug
,"LJ14: grp-i=%2d, grp-j=%2d, ngrp=%2d, GID=%d\n",
817 md
->cENER
[ai
],md
->cENER
[aj
],md
->nenergrp
,gid
);
820 outeriter
= inneriter
= count
= 0;
823 chargeB
[0] = md
->chargeB
[ai
];
824 chargeB
[1] = md
->chargeB
[aj
];
825 /* We pass &(iparams[itype].lj14.c6A) as LJ parameter matrix
827 * Here we use that the LJ-14 parameters are stored in iparams
828 * as c6A,c12A,c6B,c12B, which are referenced correctly
829 * in the innerloops if we assign type combinations 0-0 and 0-1
830 * to atom pair ai-aj in topologies A and B respectively.
834 gmx_fatal(FARGS
,"Cannot do free energy Buckingham interactions.");
837 gmx_nb_free_energy_kernel(icoul
,
875 /* Not perturbed - call kernel 330 */
911 rvec_inc(f
[ai
],f14
[0]);
912 rvec_dec(f
[aj
],f14
[0]);
916 /* Correct the shift forces using the graph */
917 ivec_sub(SHIFT_IVEC(g
,ai
),SHIFT_IVEC(g
,aj
),dt
);
918 shift_vir
= IVEC2IS(dt
);
919 rvec_inc(fshift
[shift_vir
],f14
[0]);
920 rvec_dec(fshift
[CENTRAL
],f14
[0]);
923 /* flops: eNR_KERNEL_OUTER + eNR_KERNEL330 + 12 */