1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
54 #include "gmx_fatal.h"
55 #include "mtop_util.h"
68 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
69 #include "genborn_sse2_double.h"
70 #include "genborn_allvsall_sse2_double.h"
73 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
74 #include "genborn_sse2_single.h"
75 #include "genborn_allvsall_sse2_single.h"
77 #endif /* GMX_DOUBLE */
79 #include "genborn_allvsall.h"
81 /*#define DISABLE_SSE*/
90 typedef struct gbtmpnbls
{
96 /* This function is exactly the same as the one in bondfree.c. The reason
97 * it is copied here is that the bonded gb-interactions are evaluated
98 * not in calc_bonds, but rather in calc_gb_forces
100 static int pbc_rvec_sub(const t_pbc
*pbc
,const rvec xi
,const rvec xj
,rvec dx
)
103 return pbc_dx_aiuc(pbc
,xi
,xj
,dx
);
111 int init_gb_nblist(int natoms
, t_nblist
*nl
)
113 nl
->maxnri
= natoms
*4;
123 /*nl->nltype = nltype;*/
125 srenew(nl
->iinr
, nl
->maxnri
);
126 srenew(nl
->gid
, nl
->maxnri
);
127 srenew(nl
->shift
, nl
->maxnri
);
128 srenew(nl
->jindex
, nl
->maxnri
+1);
135 int print_nblist(int natoms
, t_nblist
*nl
)
137 int i
,k
,ai
,aj
,nj0
,nj1
;
139 printf("genborn.c: print_nblist, natoms=%d\n",nl
->nri
);
141 for(i
=0;i
<nl
->nri
;i
++)
150 printf("ai=%d, aj=%d\n",ai
,aj
);
158 void gb_pd_send(t_commrec
*cr
, real
*send_data
, int nr
)
162 int *index
,*sendc
,*disp
;
164 snew(sendc
,cr
->nnodes
);
165 snew(disp
,cr
->nnodes
);
167 index
= pd_index(cr
);
170 /* Setup count/index arrays */
171 for(i
=0;i
<cr
->nnodes
;i
++)
173 sendc
[i
] = index
[i
+1]-index
[i
];
177 /* Do communication */
178 MPI_Gatherv(send_data
+index
[cur
],sendc
[cur
],GMX_MPI_REAL
,send_data
,sendc
,
179 disp
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
180 MPI_Bcast(send_data
,nr
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
186 int init_gb_plist(t_params
*p_list
)
189 p_list
->param
= NULL
;
196 int init_gb_still(const t_commrec
*cr
, t_forcerec
*fr
,
197 const t_atomtypes
*atype
, t_idef
*idef
, t_atoms
*atoms
,
198 gmx_genborn_t
*born
,int natoms
)
201 int i
,j
,i1
,i2
,k
,m
,nbond
,nang
,ia
,ib
,ic
,id
,nb
,idx
,idx2
,at
;
205 real r
,ri
,rj
,ri2
,ri3
,rj2
,r2
,r3
,r4
,rk
,ratio
,term
,h
,doffset
;
206 real p1
,p2
,p3
,factor
,cosine
,rab
,rbc
;
213 snew(born
->gpol_still_work
,natoms
+3);
219 pd_at_range(cr
,&at0
,&at1
);
221 for(i
=0;i
<natoms
;i
++)
238 doffset
= born
->gb_doffset
;
240 for(i
=0;i
<natoms
;i
++)
242 born
->gpol_globalindex
[i
]=born
->vsolv_globalindex
[i
]=
243 born
->gb_radius_globalindex
[i
]=0;
246 /* Compute atomic solvation volumes for Still method */
247 for(i
=0;i
<natoms
;i
++)
249 ri
=atype
->gb_radius
[atoms
->atom
[i
].type
];
250 born
->gb_radius_globalindex
[i
] = ri
;
252 born
->vsolv_globalindex
[i
]=(4*M_PI
/3)*r3
;
255 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
257 m
=idef
->il
[F_GB12
].iatoms
[j
];
258 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
259 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
261 r
=1.01*idef
->iparams
[m
].gb
.st
;
263 ri
= atype
->gb_radius
[atoms
->atom
[ia
].type
];
264 rj
= atype
->gb_radius
[atoms
->atom
[ib
].type
];
270 ratio
= (rj2
-ri2
-r
*r
)/(2*ri
*r
);
272 term
= (M_PI
/3.0)*h
*h
*(3.0*ri
-h
);
280 born
->vsolv_globalindex
[ia
] -= term
;
283 ratio
= (ri2
-rj2
-r
*r
)/(2*rj
*r
);
285 term
= (M_PI
/3.0)*h
*h
*(3.0*rj
-h
);
293 born
->vsolv_globalindex
[ib
] -= term
;
299 gmx_sum(natoms
,vsol
,cr
);
301 for(i
=0;i
<natoms
;i
++)
303 born
->vsolv_globalindex
[i
]=born
->vsolv_globalindex
[i
]-vsol
[i
];
307 /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
310 for(j
=0;j
<natoms
;j
++)
312 if(born
->use_globalindex
[j
]==1)
314 born
->gpol_globalindex
[j
]=-0.5*ONE_4PI_EPS0
/
315 (atype
->gb_radius
[atoms
->atom
[j
].type
]-doffset
+STILL_P1
);
320 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
322 m
=idef
->il
[F_GB12
].iatoms
[j
];
323 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
324 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
326 r
=idef
->iparams
[m
].gb
.st
;
332 gp
[ia
]+=STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
333 gp
[ib
]+=STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
337 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
338 STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
339 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
340 STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
345 for(j
=0;j
<idef
->il
[F_GB13
].nr
;j
+=3)
347 m
=idef
->il
[F_GB13
].iatoms
[j
];
348 ia
=idef
->il
[F_GB13
].iatoms
[j
+1];
349 ib
=idef
->il
[F_GB13
].iatoms
[j
+2];
351 r
=idef
->iparams
[m
].gb
.st
;
356 gp
[ia
]+=STILL_P3
*born
->vsolv
[ib
]/r4
;
357 gp
[ib
]+=STILL_P3
*born
->vsolv
[ia
]/r4
;
361 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
362 STILL_P3
*born
->vsolv_globalindex
[ib
]/r4
;
363 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
364 STILL_P3
*born
->vsolv_globalindex
[ia
]/r4
;
370 gmx_sum(natoms
,gp
,cr
);
372 for(i
=0;i
<natoms
;i
++)
374 born
->gpol_globalindex
[i
]=born
->gpol_globalindex
[i
]+gp
[i
];
384 /* Initialize all GB datastructs and compute polarization energies */
385 int init_gb(gmx_genborn_t
**p_born
,
386 const t_commrec
*cr
, t_forcerec
*fr
, const t_inputrec
*ir
,
387 const gmx_mtop_t
*mtop
, real rgbradii
, int gb_algorithm
)
389 int i
,j
,m
,ai
,aj
,jj
,natoms
,nalloc
;
390 real rai
,sk
,p
,doffset
;
394 gmx_localtop_t
*localtop
;
396 natoms
= mtop
->natoms
;
398 atoms
= gmx_mtop_global_atoms(mtop
);
399 localtop
= gmx_mtop_generate_local_top(mtop
,ir
);
406 snew(born
->drobc
, natoms
);
407 snew(born
->bRad
, natoms
);
409 /* Allocate memory for the global data arrays */
410 snew(born
->param_globalindex
, natoms
+3);
411 snew(born
->gpol_globalindex
, natoms
+3);
412 snew(born
->vsolv_globalindex
, natoms
+3);
413 snew(born
->gb_radius_globalindex
, natoms
+3);
414 snew(born
->use_globalindex
, natoms
+3);
416 snew(fr
->invsqrta
, natoms
);
417 snew(fr
->dvda
, natoms
);
420 fr
->dadx_rawptr
= NULL
;
422 born
->gpol_still_work
= NULL
;
423 born
->gpol_hct_work
= NULL
;
425 /* snew(born->asurf,natoms); */
426 /* snew(born->dasurf,natoms); */
428 /* Initialize the gb neighbourlist */
429 init_gb_nblist(natoms
,&(fr
->gblist
));
431 /* Do the Vsites exclusions (if any) */
432 for(i
=0;i
<natoms
;i
++)
434 jj
= atoms
.atom
[i
].type
;
435 if (mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
] > 0)
437 born
->use_globalindex
[i
] = 1;
441 born
->use_globalindex
[i
] = 0;
444 /* If we have a Vsite, put vs_globalindex[i]=0 */
445 if (C6 (fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
446 C12(fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
447 atoms
.atom
[i
].q
== 0)
449 born
->use_globalindex
[i
]=0;
453 /* Copy algorithm parameters from inputrecord to local structure */
454 born
->obc_alpha
= ir
->gb_obc_alpha
;
455 born
->obc_beta
= ir
->gb_obc_beta
;
456 born
->obc_gamma
= ir
->gb_obc_gamma
;
457 born
->gb_doffset
= ir
->gb_dielectric_offset
;
458 born
->gb_epsilon_solvent
= ir
->gb_epsilon_solvent
;
459 born
->epsilon_r
= ir
->epsilon_r
;
461 doffset
= born
->gb_doffset
;
463 /* Set the surface tension */
464 born
->sa_surface_tension
= ir
->sa_surface_tension
;
466 /* If Still model, initialise the polarisation energies */
467 if(gb_algorithm
==egbSTILL
)
469 init_gb_still(cr
, fr
,&(mtop
->atomtypes
), &(localtop
->idef
), &atoms
,
474 /* If HCT/OBC, precalculate the sk*atype->S_hct factors */
475 else if(gb_algorithm
==egbHCT
|| gb_algorithm
==egbOBC
)
478 snew(born
->gpol_hct_work
, natoms
+3);
480 for(i
=0;i
<natoms
;i
++)
482 if(born
->use_globalindex
[i
]==1)
484 rai
= mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
]-doffset
;
485 sk
= rai
* mtop
->atomtypes
.S_hct
[atoms
.atom
[i
].type
];
486 born
->param_globalindex
[i
] = sk
;
487 born
->gb_radius_globalindex
[i
] = rai
;
491 born
->param_globalindex
[i
] = 0;
492 born
->gb_radius_globalindex
[i
] = 0;
497 /* Allocate memory for work arrays for temporary use */
498 snew(born
->work
,natoms
+4);
499 snew(born
->count
,natoms
);
500 snew(born
->nblist_work
,natoms
);
502 /* Domain decomposition specific stuff */
511 calc_gb_rad_still(t_commrec
*cr
, t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
512 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
513 gmx_genborn_t
*born
,t_mdatoms
*md
)
515 int i
,k
,n
,nj0
,nj1
,ai
,aj
,type
;
518 real gpi
,dr
,dr2
,dr4
,idr4
,rvdw
,ratio
,ccf
,theta
,term
,rai
,raj
;
519 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
520 real rinv
,idr2
,idr6
,vaj
,dccf
,cosq
,sinq
,prod
,gpi2
;
522 real vai
, prod_ai
, icf4
,icf6
;
524 factor
= 0.5*ONE_4PI_EPS0
;
527 for(i
=0;i
<born
->nr
;i
++)
529 born
->gpol_still_work
[i
]=0;
532 for(i
=0;i
<nl
->nri
;i
++ )
537 nj1
= nl
->jindex
[i
+1];
539 /* Load shifts for this list */
540 shift
= nl
->shift
[i
];
541 shX
= fr
->shift_vec
[shift
][0];
542 shY
= fr
->shift_vec
[shift
][1];
543 shZ
= fr
->shift_vec
[shift
][2];
547 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
548 vai
= born
->vsolv
[ai
];
549 prod_ai
= STILL_P4
*vai
;
551 /* Load atom i coordinates, add shift vectors */
552 ix1
= shX
+ x
[ai
][0];
553 iy1
= shY
+ x
[ai
][1];
554 iz1
= shZ
+ x
[ai
][2];
567 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
568 rinv
= gmx_invsqrt(dr2
);
573 raj
= top
->atomtypes
.gb_radius
[md
->typeA
[aj
]];
577 ratio
= dr2
/ (rvdw
* rvdw
);
578 vaj
= born
->vsolv
[aj
];
580 if(ratio
>STILL_P5INV
)
587 theta
= ratio
*STILL_PIP5
;
589 term
= 0.5*(1.0-cosq
);
591 sinq
= 1.0 - cosq
*cosq
;
592 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
597 icf6
= (4*ccf
-dccf
)*idr6
;
599 born
->gpol_still_work
[aj
] += prod_ai
*icf4
;
602 /* Save ai->aj and aj->ai chain rule terms */
603 fr
->dadx
[n
++] = prod
*icf6
;
604 fr
->dadx
[n
++] = prod_ai
*icf6
;
606 born
->gpol_still_work
[ai
]+=gpi
;
609 /* Parallel summations */
612 gmx_sum(natoms
, born
->gpol_still_work
, cr
);
614 else if(DOMAINDECOMP(cr
))
616 dd_atom_sum_real(cr
->dd
, born
->gpol_still_work
);
619 /* Calculate the radii */
620 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
622 if(born
->use
[i
] != 0)
625 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
627 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
628 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
632 /* Extra communication required for DD */
635 dd_atom_spread_real(cr
->dd
, born
->bRad
);
636 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
645 calc_gb_rad_hct(t_commrec
*cr
,t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
646 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
647 gmx_genborn_t
*born
,t_mdatoms
*md
)
649 int i
,k
,n
,ai
,aj
,nj0
,nj1
,at0
,at1
;
652 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk_ai
,sk2
,sk2_ai
,lij
,uij
,diff2
,tmp
,sum_ai
;
653 real rad
,min_rad
,rinv
,rai_inv
;
654 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
655 real lij2
, uij2
, lij3
, uij3
, t1
,t2
,t3
;
656 real lij_inv
,dlij
,duij
,sk2_rinv
,prod
,log_term
;
657 real doffset
,raj_inv
,dadx_val
;
660 doffset
= born
->gb_doffset
;
661 gb_radius
= born
->gb_radius
;
663 for(i
=0;i
<born
->nr
;i
++)
665 born
->gpol_hct_work
[i
] = 0;
668 /* Keep the compiler happy */
672 for(i
=0;i
<nl
->nri
;i
++)
677 nj1
= nl
->jindex
[i
+1];
679 /* Load shifts for this list */
680 shift
= nl
->shift
[i
];
681 shX
= fr
->shift_vec
[shift
][0];
682 shY
= fr
->shift_vec
[shift
][1];
683 shZ
= fr
->shift_vec
[shift
][2];
688 sk_ai
= born
->param
[ai
];
689 sk2_ai
= sk_ai
*sk_ai
;
691 /* Load atom i coordinates, add shift vectors */
692 ix1
= shX
+ x
[ai
][0];
693 iy1
= shY
+ x
[ai
][1];
694 iz1
= shZ
+ x
[ai
][2];
710 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
711 rinv
= gmx_invsqrt(dr2
);
714 sk
= born
->param
[aj
];
717 /* aj -> ai interaction */
738 lij_inv
= gmx_invsqrt(lij2
);
741 prod
= 0.25*sk2_rinv
;
743 log_term
= log(uij
*lij_inv
);
745 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
750 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
753 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
754 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
755 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
757 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
758 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
759 /* rb2 is moved to chainrule */
767 fr
->dadx
[n
++] = dadx_val
;
770 /* ai -> aj interaction */
773 lij
= 1.0/(dr
-sk_ai
);
786 uij
= 1.0/(dr
+sk_ai
);
792 lij_inv
= gmx_invsqrt(lij2
);
793 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
795 prod
= 0.25 * sk2_rinv
;
797 /* log_term = table_log(uij*lij_inv,born->log_table,
798 LOG_TABLE_ACCURACY); */
799 log_term
= log(uij
*lij_inv
);
801 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
806 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
810 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
811 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
812 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
814 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
815 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */
817 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
823 fr
->dadx
[n
++] = dadx_val
;
826 born
->gpol_hct_work
[ai
] += sum_ai
;
829 /* Parallel summations */
832 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
834 else if(DOMAINDECOMP(cr
))
836 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
839 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
841 if(born
->use
[i
] != 0)
843 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]]-doffset
;
844 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
845 min_rad
= rai
+ doffset
;
848 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
849 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
853 /* Extra communication required for DD */
856 dd_atom_spread_real(cr
->dd
, born
->bRad
);
857 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
865 calc_gb_rad_obc(t_commrec
*cr
, t_forcerec
*fr
, int natoms
, gmx_localtop_t
*top
,
866 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
)
868 int i
,k
,ai
,aj
,nj0
,nj1
,n
,at0
,at1
;
871 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk2
,lij
,uij
,diff2
,tmp
,sum_ai
;
872 real rad
, min_rad
,sum_ai2
,sum_ai3
,tsum
,tchain
,rinv
,rai_inv
,lij_inv
,rai_inv2
;
873 real log_term
,prod
,sk2_rinv
,sk_ai
,sk2_ai
;
874 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
875 real lij2
,uij2
,lij3
,uij3
,dlij
,duij
,t1
,t2
,t3
;
876 real doffset
,raj_inv
,dadx_val
;
879 /* Keep the compiler happy */
884 doffset
= born
->gb_doffset
;
885 gb_radius
= born
->gb_radius
;
887 for(i
=0;i
<born
->nr
;i
++)
889 born
->gpol_hct_work
[i
] = 0;
892 for(i
=0;i
<nl
->nri
;i
++)
897 nj1
= nl
->jindex
[i
+1];
899 /* Load shifts for this list */
900 shift
= nl
->shift
[i
];
901 shX
= fr
->shift_vec
[shift
][0];
902 shY
= fr
->shift_vec
[shift
][1];
903 shZ
= fr
->shift_vec
[shift
][2];
908 sk_ai
= born
->param
[ai
];
909 sk2_ai
= sk_ai
*sk_ai
;
911 /* Load atom i coordinates, add shift vectors */
912 ix1
= shX
+ x
[ai
][0];
913 iy1
= shY
+ x
[ai
][1];
914 iz1
= shZ
+ x
[ai
][2];
930 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
931 rinv
= gmx_invsqrt(dr2
);
934 /* sk is precalculated in init_gb() */
935 sk
= born
->param
[aj
];
938 /* aj -> ai interaction */
958 lij_inv
= gmx_invsqrt(lij2
);
961 prod
= 0.25*sk2_rinv
;
963 log_term
= log(uij
*lij_inv
);
965 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
969 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
973 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
974 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
975 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
977 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
985 fr
->dadx
[n
++] = dadx_val
;
987 /* ai -> aj interaction */
990 lij
= 1.0/(dr
-sk_ai
);
1003 uij
= 1.0/(dr
+sk_ai
);
1009 lij_inv
= gmx_invsqrt(lij2
);
1010 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
1011 sk2_rinv
= sk2
*rinv
;
1012 prod
= 0.25 * sk2_rinv
;
1014 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1015 log_term
= log(uij
*lij_inv
);
1017 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1021 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
1024 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1025 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1026 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1028 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1030 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
1037 fr
->dadx
[n
++] = dadx_val
;
1040 born
->gpol_hct_work
[ai
] += sum_ai
;
1044 /* Parallel summations */
1047 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
1049 else if(DOMAINDECOMP(cr
))
1051 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
1054 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
1056 if(born
->use
[i
] != 0)
1058 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]];
1062 sum_ai
= rai
* born
->gpol_hct_work
[i
];
1063 sum_ai2
= sum_ai
* sum_ai
;
1064 sum_ai3
= sum_ai2
* sum_ai
;
1066 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
1067 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
1068 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
1070 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
1072 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
1073 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
1077 /* Extra (local) communication required for DD */
1078 if(DOMAINDECOMP(cr
))
1080 dd_atom_spread_real(cr
->dd
, born
->bRad
);
1081 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
1082 dd_atom_spread_real(cr
->dd
, born
->drobc
);
1091 int calc_gb_rad(t_commrec
*cr
, t_forcerec
*fr
, t_inputrec
*ir
,gmx_localtop_t
*top
,
1092 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
,t_nrnb
*nrnb
)
1098 if(fr
->bAllvsAll
&& fr
->dadx
==NULL
)
1100 /* We might need up to 8 atoms of padding before and after,
1101 * and another 4 units to guarantee SSE alignment.
1103 fr
->nalloc_dadx
= 2*(md
->homenr
+12)*(md
->nr
/2+1+12);
1104 snew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1105 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1109 /* In the SSE-enabled gb-loops, when writing to dadx, we
1110 * always write 2*4 elements at a time, even in the case with only
1111 * 1-3 j particles, where we only really need to write 2*(1-3)
1112 * elements. This is because we want dadx to be aligned to a 16-
1113 * byte boundary, and being able to use _mm_store/load_ps
1115 ndadx
= 2 * (nl
->nrj
+ 3*nl
->nri
);
1117 /* First, reallocate the dadx array, we need 3 extra for SSE */
1118 if (ndadx
+ 3 > fr
->nalloc_dadx
)
1120 fr
->nalloc_dadx
= over_alloc_large(ndadx
) + 3;
1121 srenew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1122 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1128 cnt
= md
->homenr
*(md
->nr
/2+1);
1130 if(ir
->gb_algorithm
==egbSTILL
)
1132 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || (!defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1133 if(fr
->UseOptimizedKernels
)
1135 genborn_allvsall_calc_still_radii_sse2_single(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1139 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1141 #elif ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || (defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1142 if(fr
->UseOptimizedKernels
)
1144 genborn_allvsall_calc_still_radii_sse2_double(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1148 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1151 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1153 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_STILL
,cnt
);
1155 else if(ir
->gb_algorithm
==egbHCT
|| ir
->gb_algorithm
==egbOBC
)
1157 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || (!defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1158 if(fr
->UseOptimizedKernels
)
1160 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1164 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1166 #elif ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || (defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1167 if(fr
->UseOptimizedKernels
)
1169 genborn_allvsall_calc_hct_obc_radii_sse2_double(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1173 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1176 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1178 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_HCT_OBC
,cnt
);
1182 gmx_fatal(FARGS
,"Bad gb algorithm for all-vs-all interactions");
1184 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1189 /* Switch for determining which algorithm to use for Born radii calculation */
1192 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1193 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1194 switch(ir
->gb_algorithm
)
1197 if(fr
->UseOptimizedKernels
)
1199 calc_gb_rad_still_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
);
1203 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1207 if(fr
->UseOptimizedKernels
)
1209 calc_gb_rad_hct_obc_sse2_double(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1213 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1217 if(fr
->UseOptimizedKernels
)
1219 calc_gb_rad_hct_obc_sse2_double(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1223 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1228 gmx_fatal(FARGS
, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1231 switch(ir
->gb_algorithm
)
1234 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1237 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1240 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1244 gmx_fatal(FARGS
, "Unknown double precision algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1251 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ) )
1252 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1253 switch(ir
->gb_algorithm
)
1256 if(fr
->UseOptimizedKernels
)
1258 calc_gb_rad_still_sse2_single(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
);
1262 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1266 if(fr
->UseOptimizedKernels
)
1268 calc_gb_rad_hct_obc_sse2_single(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1272 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1277 if(fr
->UseOptimizedKernels
)
1279 calc_gb_rad_hct_obc_sse2_single(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1283 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1288 gmx_fatal(FARGS
, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1292 switch(ir
->gb_algorithm
)
1295 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1298 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1301 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1305 gmx_fatal(FARGS
, "Unknown algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1308 #endif /* Single precision sse */
1310 #endif /* Double or single precision */
1312 if(fr
->bAllvsAll
==FALSE
)
1314 switch(ir
->gb_algorithm
)
1317 inc_nrnb(nrnb
,eNR_BORN_RADII_STILL
,nl
->nrj
);
1321 inc_nrnb(nrnb
,eNR_BORN_RADII_HCT_OBC
,nl
->nrj
);
1327 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,nl
->nri
);
1335 real
gb_bonds_tab(rvec x
[], rvec f
[], rvec fshift
[], real
*charge
, real
*p_gbtabscale
,
1336 real
*invsqrta
, real
*dvda
, real
*GBtab
, t_idef
*idef
, real epsilon_r
,
1337 real gb_epsilon_solvent
, real facel
, const t_pbc
*pbc
, const t_graph
*graph
)
1339 int i
,j
,n0
,m
,nnn
,type
,ai
,aj
;
1345 real isaprod
,qq
,gbscale
,gbtabscale
,Y
,F
,Geps
,Heps2
,Fp
,VV
,FF
,rt
,eps
,eps2
;
1346 real vgb
,fgb
,vcoul
,fijC
,dvdatmp
,fscal
,dvdaj
;
1352 t_iatom
*forceatoms
;
1354 /* Scale the electrostatics by gb_epsilon_solvent */
1355 facel
= facel
* ((1.0/epsilon_r
) - 1.0/gb_epsilon_solvent
);
1357 gbtabscale
=*p_gbtabscale
;
1360 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1362 forceatoms
= idef
->il
[j
].iatoms
;
1364 for(i
=0;i
<idef
->il
[j
].nr
; )
1366 /* To avoid reading in the interaction type, we just increment i to pass over
1367 * the types in the forceatoms array, this saves some memory accesses
1370 ai
= forceatoms
[i
++];
1371 aj
= forceatoms
[i
++];
1373 ki
= pbc_rvec_sub(pbc
,x
[ai
],x
[aj
],dx
);
1374 rsq11
= iprod(dx
,dx
);
1376 isai
= invsqrta
[ai
];
1377 iq
= (-1)*facel
*charge
[ai
];
1379 rinv11
= gmx_invsqrt(rsq11
);
1380 isaj
= invsqrta
[aj
];
1381 isaprod
= isai
*isaj
;
1382 qq
= isaprod
*iq
*charge
[aj
];
1383 gbscale
= isaprod
*gbtabscale
;
1392 Geps
= eps
*GBtab
[nnn
+2];
1393 Heps2
= eps2
*GBtab
[nnn
+3];
1396 FF
= Fp
+Geps
+2.0*Heps2
;
1398 fijC
= qq
*FF
*gbscale
;
1399 dvdatmp
= -(vgb
+fijC
*r
)*0.5;
1400 dvda
[aj
] = dvda
[aj
] + dvdatmp
*isaj
*isaj
;
1401 dvda
[ai
] = dvda
[ai
] + dvdatmp
*isai
*isai
;
1402 vctot
= vctot
+ vgb
;
1403 fgb
= -(fijC
)*rinv11
;
1406 ivec_sub(SHIFT_IVEC(graph
,ai
),SHIFT_IVEC(graph
,aj
),dt
);
1410 for (m
=0; (m
<DIM
); m
++) { /* 15 */
1414 fshift
[ki
][m
]+=fscal
;
1415 fshift
[CENTRAL
][m
]-=fscal
;
1423 real
calc_gb_selfcorrections(t_commrec
*cr
, int natoms
,
1424 real
*charge
, gmx_genborn_t
*born
, real
*dvda
, t_mdatoms
*md
, double facel
)
1427 real rai
,e
,derb
,q
,q2
,fi
,rai_inv
,vtot
;
1431 pd_at_range(cr
,&at0
,&at1
);
1433 else if(DOMAINDECOMP(cr
))
1436 at1
=cr
->dd
->nat_home
;
1445 /* Scale the electrostatics by gb_epsilon_solvent */
1446 facel
= facel
* ((1.0/born
->epsilon_r
) - 1.0/born
->gb_epsilon_solvent
);
1450 /* Apply self corrections */
1451 for(i
=at0
;i
<at1
;i
++)
1455 if(born
->use
[ai
]==1)
1457 rai
= born
->bRad
[ai
];
1463 derb
= 0.5*e
*rai_inv
*rai_inv
;
1464 dvda
[ai
] += derb
*rai
;
1473 real
calc_gb_nonpolar(t_commrec
*cr
, t_forcerec
*fr
,int natoms
,gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1474 const t_atomtypes
*atype
, real
*dvda
,int gb_algorithm
, t_mdatoms
*md
)
1477 real e
,es
,rai
,rbi
,term
,probe
,tmp
,factor
;
1478 real rbi_inv
,rbi_inv2
;
1480 /* To keep the compiler happy */
1485 pd_at_range(cr
,&at0
,&at1
);
1487 else if(DOMAINDECOMP(cr
))
1490 at1
= cr
->dd
->nat_home
;
1498 /* factor is the surface tension */
1499 factor
= born
->sa_surface_tension
;
1502 // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
1503 if(gb_algorithm==egbSTILL)
1505 factor=0.0049*100*CAL2JOULE;
1509 factor=0.0054*100*CAL2JOULE;
1512 /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1518 for(i
=at0
;i
<at1
;i
++)
1522 if(born
->use
[ai
]==1)
1524 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
1525 rbi_inv
= fr
->invsqrta
[ai
];
1526 rbi_inv2
= rbi_inv
* rbi_inv
;
1527 tmp
= (rai
*rbi_inv2
)*(rai
*rbi_inv2
);
1529 e
= factor
*term
*(rai
+probe
)*(rai
+probe
)*tmp
;
1530 dvda
[ai
] = dvda
[ai
] - 6*e
*rbi_inv2
;
1540 real
calc_gb_chainrule(int natoms
, t_nblist
*nl
, real
*dadx
, real
*dvda
, rvec x
[], rvec t
[], rvec fshift
[],
1541 rvec shift_vec
[], int gb_algorithm
, gmx_genborn_t
*born
, t_mdatoms
*md
)
1543 int i
,k
,n
,ai
,aj
,nj0
,nj1
,n0
,n1
;
1546 real fgb
,fij
,rb2
,rbi
,fix1
,fiy1
,fiz1
;
1547 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
,rsq11
;
1548 real rinv11
,tx
,ty
,tz
,rbai
,rbaj
,fgb_ai
;
1558 if(gb_algorithm
==egbSTILL
)
1562 rbi
= born
->bRad
[i
];
1563 rb
[i
] = (2 * rbi
* rbi
* dvda
[i
])/ONE_4PI_EPS0
;
1566 else if(gb_algorithm
==egbHCT
)
1570 rbi
= born
->bRad
[i
];
1571 rb
[i
] = rbi
* rbi
* dvda
[i
];
1574 else if(gb_algorithm
==egbOBC
)
1578 rbi
= born
->bRad
[i
];
1579 rb
[i
] = rbi
* rbi
* born
->drobc
[i
] * dvda
[i
];
1583 for(i
=0;i
<nl
->nri
;i
++)
1587 nj0
= nl
->jindex
[i
];
1588 nj1
= nl
->jindex
[i
+1];
1590 /* Load shifts for this list */
1591 shift
= nl
->shift
[i
];
1592 shX
= shift_vec
[shift
][0];
1593 shY
= shift_vec
[shift
][1];
1594 shZ
= shift_vec
[shift
][2];
1596 /* Load atom i coordinates, add shift vectors */
1597 ix1
= shX
+ x
[ai
][0];
1598 iy1
= shY
+ x
[ai
][1];
1599 iz1
= shZ
+ x
[ai
][2];
1607 for(k
=nj0
;k
<nj1
;k
++)
1621 fgb
= rbai
*dadx
[n
++];
1622 fgb_ai
= rbaj
*dadx
[n
++];
1624 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1635 /* Update force on atom aj */
1636 t
[aj
][0] = t
[aj
][0] - tx
;
1637 t
[aj
][1] = t
[aj
][1] - ty
;
1638 t
[aj
][2] = t
[aj
][2] - tz
;
1641 /* Update force and shift forces on atom ai */
1642 t
[ai
][0] = t
[ai
][0] + fix1
;
1643 t
[ai
][1] = t
[ai
][1] + fiy1
;
1644 t
[ai
][2] = t
[ai
][2] + fiz1
;
1646 fshift
[shift
][0] = fshift
[shift
][0] + fix1
;
1647 fshift
[shift
][1] = fshift
[shift
][1] + fiy1
;
1648 fshift
[shift
][2] = fshift
[shift
][2] + fiz1
;
1657 calc_gb_forces(t_commrec
*cr
, t_mdatoms
*md
, gmx_genborn_t
*born
, gmx_localtop_t
*top
, const t_atomtypes
*atype
,
1658 rvec x
[], rvec f
[], t_forcerec
*fr
, t_idef
*idef
, int gb_algorithm
, int sa_algorithm
, t_nrnb
*nrnb
, gmx_bool bRad
,
1659 const t_pbc
*pbc
, const t_graph
*graph
, gmx_enerdata_t
*enerd
)
1666 const t_pbc
*pbc_null
;
1673 if(sa_algorithm
== esaAPPROX
)
1675 /* Do a simple ACE type approximation for the non-polar solvation */
1676 enerd
->term
[F_NPSOLVATION
] += calc_gb_nonpolar(cr
, fr
,born
->nr
, born
, top
, atype
, fr
->dvda
, gb_algorithm
,md
);
1679 /* Calculate the bonded GB-interactions using either table or analytical formula */
1680 enerd
->term
[F_GBPOL
] += gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1681 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1683 /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1684 enerd
->term
[F_GBPOL
] += calc_gb_selfcorrections(cr
,born
->nr
,md
->chargeA
, born
, fr
->dvda
, md
, fr
->epsfac
);
1686 /* If parallel, sum the derivative of the potential w.r.t the born radii */
1689 gmx_sum(md
->nr
,fr
->dvda
, cr
);
1691 else if(DOMAINDECOMP(cr
))
1693 dd_atom_sum_real(cr
->dd
,fr
->dvda
);
1694 dd_atom_spread_real(cr
->dd
,fr
->dvda
);
1699 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || (!defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1700 if(fr
->UseOptimizedKernels
)
1702 genborn_allvsall_calc_chainrule_sse2_single(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1706 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1708 #elif ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || (defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1709 if(fr
->UseOptimizedKernels
)
1711 genborn_allvsall_calc_chainrule_sse2_double(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1715 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1718 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1720 cnt
= md
->homenr
*(md
->nr
/2+1);
1721 inc_nrnb(nrnb
,eNR_BORN_AVA_CHAINRULE
,cnt
);
1722 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1728 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || (defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1729 if(fr
->UseOptimizedKernels
)
1731 calc_gb_chainrule_sse2_double(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1732 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1733 gb_algorithm
, born
, md
);
1737 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1738 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1741 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1742 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1747 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || (!defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
1748 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1749 if(fr
->UseOptimizedKernels
)
1751 calc_gb_chainrule_sse2_single(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1752 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1753 gb_algorithm
, born
, md
);
1757 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1758 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1762 /* Calculate the forces due to chain rule terms with non sse code */
1763 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1764 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1770 inc_nrnb(nrnb
,eNR_BORN_CHAINRULE
,fr
->gblist
.nrj
);
1771 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,fr
->gblist
.nri
);
1776 static void add_j_to_gblist(gbtmpnbl_t
*list
,int aj
)
1778 if (list
->naj
>= list
->aj_nalloc
)
1780 list
->aj_nalloc
= over_alloc_large(list
->naj
+1);
1781 srenew(list
->aj
,list
->aj_nalloc
);
1784 list
->aj
[list
->naj
++] = aj
;
1787 static gbtmpnbl_t
*find_gbtmplist(struct gbtmpnbls
*lists
,int shift
)
1791 /* Search the list with the same shift, if there is one */
1793 while (ind
< lists
->nlist
&& shift
!= lists
->list
[ind
].shift
)
1797 if (ind
== lists
->nlist
)
1799 if (lists
->nlist
== lists
->list_nalloc
)
1801 lists
->list_nalloc
++;
1802 srenew(lists
->list
,lists
->list_nalloc
);
1803 for(i
=lists
->nlist
; i
<lists
->list_nalloc
; i
++)
1805 lists
->list
[i
].aj
= NULL
;
1806 lists
->list
[i
].aj_nalloc
= 0;
1811 lists
->list
[lists
->nlist
].shift
= shift
;
1812 lists
->list
[lists
->nlist
].naj
= 0;
1816 return &lists
->list
[ind
];
1819 static void add_bondeds_to_gblist(t_ilist
*il
,
1820 gmx_bool bMolPBC
,t_pbc
*pbc
,t_graph
*g
,rvec
*x
,
1821 struct gbtmpnbls
*nls
)
1823 int ind
,j
,ai
,aj
,shift
,found
;
1829 for(ind
=0; ind
<il
->nr
; ind
+=3)
1831 ai
= il
->iatoms
[ind
+1];
1832 aj
= il
->iatoms
[ind
+2];
1837 rvec_sub(x
[ai
],x
[aj
],dx
);
1838 ivec_sub(SHIFT_IVEC(g
,ai
),SHIFT_IVEC(g
,aj
),dt
);
1839 shift
= IVEC2IS(dt
);
1843 shift
= pbc_dx_aiuc(pbc
,x
[ai
],x
[aj
],dx
);
1846 /* Find the list for this shift or create one */
1847 list
= find_gbtmplist(&nls
[ai
],shift
);
1851 /* So that we do not add the same bond twice.
1852 * This happens with some constraints between 1-3 atoms
1853 * that are in the bond-list but should not be in the GB nb-list */
1854 for(j
=0;j
<list
->naj
;j
++)
1856 if (list
->aj
[j
] == aj
)
1866 gmx_incons("ai == aj");
1869 add_j_to_gblist(list
,aj
);
1875 compare_int (const void * a
, const void * b
)
1877 return ( *(int*)a
- *(int*)b
);
1882 int make_gb_nblist(t_commrec
*cr
, int gb_algorithm
, real gbcut
,
1883 rvec x
[], matrix box
,
1884 t_forcerec
*fr
, t_idef
*idef
, t_graph
*graph
, gmx_genborn_t
*born
)
1886 int i
,l
,ii
,j
,k
,n
,nj0
,nj1
,ai
,aj
,at0
,at1
,found
,shift
,s
;
1891 struct gbtmpnbls
*nls
;
1892 gbtmpnbl_t
*list
=NULL
;
1894 set_pbc(&pbc
,fr
->ePBC
,box
);
1895 nls
= born
->nblist_work
;
1897 for(i
=0;i
<born
->nr
;i
++)
1904 set_pbc_dd(&pbc
,fr
->ePBC
,cr
->dd
,TRUE
,box
);
1907 switch (gb_algorithm
)
1911 /* Loop over 1-2, 1-3 and 1-4 interactions */
1912 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1914 add_bondeds_to_gblist(&idef
->il
[j
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1918 /* Loop over 1-4 interactions */
1919 add_bondeds_to_gblist(&idef
->il
[F_GB14
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1922 gmx_incons("Unknown GB algorithm");
1925 /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1926 for(n
=0; (n
<fr
->nnblists
); n
++)
1928 for(i
=0; (i
<eNL_NR
); i
++)
1930 nblist
=&(fr
->nblists
[n
].nlist_sr
[i
]);
1932 if (nblist
->nri
> 0 && (i
==eNL_VDWQQ
|| i
==eNL_QQ
))
1934 for(j
=0;j
<nblist
->nri
;j
++)
1936 ai
= nblist
->iinr
[j
];
1937 shift
= nblist
->shift
[j
];
1939 /* Find the list for this shift or create one */
1940 list
= find_gbtmplist(&nls
[ai
],shift
);
1942 nj0
= nblist
->jindex
[j
];
1943 nj1
= nblist
->jindex
[j
+1];
1945 /* Add all the j-atoms in the non-bonded list to the GB list */
1946 for(k
=nj0
;k
<nj1
;k
++)
1948 add_j_to_gblist(list
,nblist
->jjnr
[k
]);
1955 /* Zero out some counters */
1959 fr
->gblist
.jindex
[0] = fr
->gblist
.nri
;
1961 for(i
=0;i
<fr
->natoms_force
;i
++)
1963 for(s
=0; s
<nls
[i
].nlist
; s
++)
1965 list
= &nls
[i
].list
[s
];
1967 /* Only add those atoms that actually have neighbours */
1968 if (born
->use
[i
] != 0)
1970 fr
->gblist
.iinr
[fr
->gblist
.nri
] = i
;
1971 fr
->gblist
.shift
[fr
->gblist
.nri
] = list
->shift
;
1974 for(k
=0; k
<list
->naj
; k
++)
1976 /* Memory allocation for jjnr */
1977 if(fr
->gblist
.nrj
>= fr
->gblist
.maxnrj
)
1979 fr
->gblist
.maxnrj
+= over_alloc_large(fr
->gblist
.maxnrj
);
1983 fprintf(debug
,"Increasing GB neighbourlist j size to %d\n",fr
->gblist
.maxnrj
);
1986 srenew(fr
->gblist
.jjnr
,fr
->gblist
.maxnrj
);
1990 if(i
== list
->aj
[k
])
1992 gmx_incons("i == list->aj[k]");
1994 fr
->gblist
.jjnr
[fr
->gblist
.nrj
++] = list
->aj
[k
];
1997 fr
->gblist
.jindex
[fr
->gblist
.nri
] = fr
->gblist
.nrj
;
2004 for(i
=0;i
<fr
->gblist
.nri
;i
++)
2006 nj0
= fr
->gblist
.jindex
[i
];
2007 nj1
= fr
->gblist
.jindex
[i
+1];
2008 ai
= fr
->gblist
.iinr
[i
];
2011 for(j
=nj0
;j
<nj1
;j
++)
2013 if(fr
->gblist
.jjnr
[j
]<ai
)
2014 fr
->gblist
.jjnr
[j
]+=fr
->natoms_force
;
2016 qsort(fr
->gblist
.jjnr
+nj0
,nj1
-nj0
,sizeof(int),compare_int
);
2018 for(j
=nj0
;j
<nj1
;j
++)
2020 if(fr
->gblist
.jjnr
[j
]>=fr
->natoms_force
)
2021 fr
->gblist
.jjnr
[j
]-=fr
->natoms_force
;
2030 void make_local_gb(const t_commrec
*cr
, gmx_genborn_t
*born
, int gb_algorithm
)
2033 gmx_domdec_t
*dd
=NULL
;
2035 if(DOMAINDECOMP(cr
))
2043 /* Single node or particle decomp (global==local), just copy pointers and return */
2044 if(gb_algorithm
==egbSTILL
)
2046 born
->gpol
= born
->gpol_globalindex
;
2047 born
->vsolv
= born
->vsolv_globalindex
;
2048 born
->gb_radius
= born
->gb_radius_globalindex
;
2052 born
->param
= born
->param_globalindex
;
2053 born
->gb_radius
= born
->gb_radius_globalindex
;
2056 born
->use
= born
->use_globalindex
;
2061 /* Reallocation of local arrays if necessary */
2062 /* fr->natoms_force is equal to dd->nat_tot */
2063 if (DOMAINDECOMP(cr
) && dd
->nat_tot
> born
->nalloc
)
2067 nalloc
= dd
->nat_tot
;
2069 /* Arrays specific to different gb algorithms */
2070 if (gb_algorithm
== egbSTILL
)
2072 srenew(born
->gpol
, nalloc
+3);
2073 srenew(born
->vsolv
, nalloc
+3);
2074 srenew(born
->gb_radius
, nalloc
+3);
2075 for(i
=born
->nalloc
; (i
<nalloc
+3); i
++)
2079 born
->gb_radius
[i
] = 0;
2084 srenew(born
->param
, nalloc
+3);
2085 srenew(born
->gb_radius
, nalloc
+3);
2086 for(i
=born
->nalloc
; (i
<nalloc
+3); i
++)
2089 born
->gb_radius
[i
] = 0;
2093 /* All gb-algorithms use the array for vsites exclusions */
2094 srenew(born
->use
, nalloc
+3);
2095 for(i
=born
->nalloc
; (i
<nalloc
+3); i
++)
2100 born
->nalloc
= nalloc
;
2103 /* With dd, copy algorithm specific arrays */
2104 if(gb_algorithm
==egbSTILL
)
2106 for(i
=at0
;i
<at1
;i
++)
2108 born
->gpol
[i
] = born
->gpol_globalindex
[dd
->gatindex
[i
]];
2109 born
->vsolv
[i
] = born
->vsolv_globalindex
[dd
->gatindex
[i
]];
2110 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2111 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];
2116 for(i
=at0
;i
<at1
;i
++)
2118 born
->param
[i
] = born
->param_globalindex
[dd
->gatindex
[i
]];
2119 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2120 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];