1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
54 #include "gmx_fatal.h"
55 #include "mtop_util.h"
68 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
69 #include "genborn_sse2_double.h"
72 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
73 #include "genborn_sse2_single.h"
74 #include "genborn_allvsall_sse2_single.h"
76 #endif /* GMX_DOUBLE */
78 #include "genborn_allvsall.h"
80 /*#define DISABLE_SSE*/
89 typedef struct gbtmpnbls
{
95 /* This function is exactly the same as the one in bondfree.c. The reason
96 * it is copied here is that the bonded gb-interactions are evaluated
97 * not in calc_bonds, but rather in calc_gb_forces
99 static int pbc_rvec_sub(const t_pbc
*pbc
,const rvec xi
,const rvec xj
,rvec dx
)
102 return pbc_dx_aiuc(pbc
,xi
,xj
,dx
);
110 int init_gb_nblist(int natoms
, t_nblist
*nl
)
112 nl
->maxnri
= natoms
*4;
122 /*nl->nltype = nltype;*/
124 srenew(nl
->iinr
, nl
->maxnri
);
125 srenew(nl
->gid
, nl
->maxnri
);
126 srenew(nl
->shift
, nl
->maxnri
);
127 srenew(nl
->jindex
, nl
->maxnri
+1);
134 int print_nblist(int natoms
, t_nblist
*nl
)
136 int i
,k
,ai
,aj
,nj0
,nj1
;
138 printf("genborn.c: print_nblist, natoms=%d\n",nl
->nri
);
140 for(i
=0;i
<nl
->nri
;i
++)
149 printf("ai=%d, aj=%d\n",ai
,aj
);
161 void fill_log_table(const int n
, real
*table
)
167 int incr
= 1 << (23-n
);
170 logfactor
= 1.0/log(2.0);
172 log_table
.exp
= 0x3F800000;
176 /* log2(numlog)=log(numlog)/log(2.0) */
177 table
[i
]=log(log_table
.numlog
)*logfactor
;
183 real
table_log(real val
, const real
*table
, const int n
)
185 int *const exp_ptr
= ((int*)&val
);
187 const int log_2
= ((x
>>23) & 255) - 127;
191 return ((val
+log_2
)*0.69314718);
194 void gb_pd_send(t_commrec
*cr
, real
*send_data
, int nr
)
198 int *index
,*sendc
,*disp
;
200 snew(sendc
,cr
->nnodes
);
201 snew(disp
,cr
->nnodes
);
203 index
= pd_index(cr
);
206 /* Setup count/index arrays */
207 for(i
=0;i
<cr
->nnodes
;i
++)
209 sendc
[i
] = index
[i
+1]-index
[i
];
213 /* Do communication */
214 MPI_Gatherv(send_data
+index
[cur
],sendc
[cur
],GMX_MPI_REAL
,send_data
,sendc
,
215 disp
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
216 MPI_Bcast(send_data
,nr
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
222 int init_gb_plist(t_params
*p_list
)
225 p_list
->param
= NULL
;
232 int init_gb_still(const t_commrec
*cr
, t_forcerec
*fr
,
233 const t_atomtypes
*atype
, t_idef
*idef
, t_atoms
*atoms
,
234 gmx_genborn_t
*born
,int natoms
)
237 int i
,j
,i1
,i2
,k
,m
,nbond
,nang
,ia
,ib
,ic
,id
,nb
,idx
,idx2
,at
;
241 real r
,ri
,rj
,ri2
,ri3
,rj2
,r2
,r3
,r4
,rk
,ratio
,term
,h
,doffset
;
242 real p1
,p2
,p3
,factor
,cosine
,rab
,rbc
;
249 snew(born
->gpol_still_work
,natoms
+3);
255 pd_at_range(cr
,&at0
,&at1
);
257 for(i
=0;i
<natoms
;i
++)
274 doffset
= born
->gb_doffset
;
276 for(i
=0;i
<natoms
;i
++)
278 born
->gpol_globalindex
[i
]=born
->vsolv_globalindex
[i
]=
279 born
->gb_radius_globalindex
[i
]=0;
282 /* Compute atomic solvation volumes for Still method */
283 for(i
=0;i
<natoms
;i
++)
285 ri
=atype
->gb_radius
[atoms
->atom
[i
].type
];
286 born
->gb_radius_globalindex
[i
] = ri
;
288 born
->vsolv_globalindex
[i
]=(4*M_PI
/3)*r3
;
291 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
293 m
=idef
->il
[F_GB12
].iatoms
[j
];
294 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
295 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
297 r
=1.01*idef
->iparams
[m
].gb
.st
;
299 ri
= atype
->gb_radius
[atoms
->atom
[ia
].type
];
300 rj
= atype
->gb_radius
[atoms
->atom
[ib
].type
];
306 ratio
= (rj2
-ri2
-r
*r
)/(2*ri
*r
);
308 term
= (M_PI
/3.0)*h
*h
*(3.0*ri
-h
);
316 born
->vsolv_globalindex
[ia
] -= term
;
319 ratio
= (ri2
-rj2
-r
*r
)/(2*rj
*r
);
321 term
= (M_PI
/3.0)*h
*h
*(3.0*rj
-h
);
329 born
->vsolv_globalindex
[ib
] -= term
;
335 gmx_sum(natoms
,vsol
,cr
);
337 for(i
=0;i
<natoms
;i
++)
339 born
->vsolv_globalindex
[i
]=born
->vsolv_globalindex
[i
]-vsol
[i
];
343 /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
346 for(j
=0;j
<natoms
;j
++)
348 if(born
->use_globalindex
[j
]==1)
350 born
->gpol_globalindex
[j
]=-0.5*ONE_4PI_EPS0
/
351 (atype
->gb_radius
[atoms
->atom
[j
].type
]-doffset
+STILL_P1
);
356 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
358 m
=idef
->il
[F_GB12
].iatoms
[j
];
359 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
360 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
362 r
=idef
->iparams
[m
].gb
.st
;
368 gp
[ia
]+=STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
369 gp
[ib
]+=STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
373 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
374 STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
375 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
376 STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
381 for(j
=0;j
<idef
->il
[F_GB13
].nr
;j
+=3)
383 m
=idef
->il
[F_GB13
].iatoms
[j
];
384 ia
=idef
->il
[F_GB13
].iatoms
[j
+1];
385 ib
=idef
->il
[F_GB13
].iatoms
[j
+2];
387 r
=idef
->iparams
[m
].gb
.st
;
392 gp
[ia
]+=STILL_P3
*born
->vsolv
[ib
]/r4
;
393 gp
[ib
]+=STILL_P3
*born
->vsolv
[ia
]/r4
;
397 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
398 STILL_P3
*born
->vsolv_globalindex
[ib
]/r4
;
399 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
400 STILL_P3
*born
->vsolv_globalindex
[ia
]/r4
;
406 gmx_sum(natoms
,gp
,cr
);
408 for(i
=0;i
<natoms
;i
++)
410 born
->gpol_globalindex
[i
]=born
->gpol_globalindex
[i
]+gp
[i
];
422 #define LOG_TABLE_ACCURACY 15 /* Accuracy of the table logarithm */
425 /* Initialize all GB datastructs and compute polarization energies */
426 int init_gb(gmx_genborn_t
**p_born
,
427 const t_commrec
*cr
, t_forcerec
*fr
, const t_inputrec
*ir
,
428 const gmx_mtop_t
*mtop
, real rgbradii
, int gb_algorithm
)
430 int i
,j
,m
,ai
,aj
,jj
,natoms
,nalloc
;
431 real rai
,sk
,p
,doffset
;
435 gmx_localtop_t
*localtop
;
437 natoms
= mtop
->natoms
;
439 atoms
= gmx_mtop_global_atoms(mtop
);
440 localtop
= gmx_mtop_generate_local_top(mtop
,ir
);
445 born
->nr
= fr
->natoms_force
;
448 snew(born
->drobc
, natoms
);
449 snew(born
->bRad
, natoms
);
451 /* Allocate memory for the global data arrays */
452 snew(born
->param_globalindex
, natoms
+3);
453 snew(born
->gpol_globalindex
, natoms
+3);
454 snew(born
->vsolv_globalindex
, natoms
+3);
455 snew(born
->gb_radius_globalindex
, natoms
+3);
456 snew(born
->use_globalindex
, natoms
+3);
458 snew(fr
->invsqrta
, natoms
);
459 snew(fr
->dvda
, natoms
);
462 fr
->dadx_rawptr
= NULL
;
464 born
->gpol_still_work
= NULL
;
465 born
->gpol_hct_work
= NULL
;
467 /* snew(born->asurf,natoms); */
468 /* snew(born->dasurf,natoms); */
470 /* Initialize the gb neighbourlist */
471 init_gb_nblist(natoms
,&(fr
->gblist
));
473 /* Do the Vsites exclusions (if any) */
474 for(i
=0;i
<natoms
;i
++)
476 jj
= atoms
.atom
[i
].type
;
477 if (mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
] > 0)
479 born
->use_globalindex
[i
] = 1;
483 born
->use_globalindex
[i
] = 0;
486 /* If we have a Vsite, put vs_globalindex[i]=0 */
487 if (C6 (fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
488 C12(fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
489 atoms
.atom
[i
].q
== 0)
491 born
->use_globalindex
[i
]=0;
495 /* Copy algorithm parameters from inputrecord to local structure */
496 born
->obc_alpha
= ir
->gb_obc_alpha
;
497 born
->obc_beta
= ir
->gb_obc_beta
;
498 born
->obc_gamma
= ir
->gb_obc_gamma
;
499 born
->gb_doffset
= ir
->gb_dielectric_offset
;
500 born
->gb_epsilon_solvent
= ir
->gb_epsilon_solvent
;
501 born
->epsilon_r
= ir
->epsilon_r
;
503 doffset
= born
->gb_doffset
;
505 /* If Still model, initialise the polarisation energies */
506 if(gb_algorithm
==egbSTILL
)
508 init_gb_still(cr
, fr
,&(mtop
->atomtypes
), &(localtop
->idef
), &atoms
,
513 /* If HCT/OBC, precalculate the sk*atype->S_hct factors */
514 else if(gb_algorithm
==egbHCT
|| gb_algorithm
==egbOBC
)
517 snew(born
->gpol_hct_work
, natoms
+3);
519 for(i
=0;i
<natoms
;i
++)
521 if(born
->use_globalindex
[i
]==1)
523 rai
= mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
]-doffset
;
524 sk
= rai
* mtop
->atomtypes
.S_hct
[atoms
.atom
[i
].type
];
525 born
->param_globalindex
[i
] = sk
;
526 born
->gb_radius_globalindex
[i
] = rai
;
530 born
->param_globalindex
[i
] = 0;
531 born
->gb_radius_globalindex
[i
] = 0;
536 /* Init the logarithm table */
537 p
=pow(2,LOG_TABLE_ACCURACY
);
538 snew(born
->log_table
, p
);
540 fill_log_table(LOG_TABLE_ACCURACY
, born
->log_table
);
542 /* Allocate memory for work arrays for temporary use */
543 snew(born
->work
,natoms
+4);
544 snew(born
->count
,natoms
);
545 snew(born
->nblist_work
,natoms
);
547 /* Domain decomposition specific stuff */
550 snew(born
->dd_work
,natoms
);
551 born
->nlocal
= cr
->dd
->nat_tot
; /* cr->dd->nat_tot will be zero here */
560 calc_gb_rad_still(t_commrec
*cr
, t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
561 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
562 gmx_genborn_t
*born
,t_mdatoms
*md
)
564 int i
,k
,n
,nj0
,nj1
,ai
,aj
,type
;
567 real gpi
,dr
,dr2
,dr4
,idr4
,rvdw
,ratio
,ccf
,theta
,term
,rai
,raj
;
568 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
569 real rinv
,idr2
,idr6
,vaj
,dccf
,cosq
,sinq
,prod
,gpi2
;
571 real vai
, prod_ai
, icf4
,icf6
;
573 factor
= 0.5*ONE_4PI_EPS0
;
576 for(i
=0;i
<born
->nr
;i
++)
578 born
->gpol_still_work
[i
]=0;
581 for(i
=0;i
<nl
->nri
;i
++ )
586 nj1
= nl
->jindex
[i
+1];
588 /* Load shifts for this list */
589 shift
= nl
->shift
[i
];
590 shX
= fr
->shift_vec
[shift
][0];
591 shY
= fr
->shift_vec
[shift
][1];
592 shZ
= fr
->shift_vec
[shift
][2];
596 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
597 vai
= born
->vsolv
[ai
];
598 prod_ai
= STILL_P4
*vai
;
600 /* Load atom i coordinates, add shift vectors */
601 ix1
= shX
+ x
[ai
][0];
602 iy1
= shY
+ x
[ai
][1];
603 iz1
= shZ
+ x
[ai
][2];
616 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
617 rinv
= gmx_invsqrt(dr2
);
622 raj
= top
->atomtypes
.gb_radius
[md
->typeA
[aj
]];
626 ratio
= dr2
/ (rvdw
* rvdw
);
627 vaj
= born
->vsolv
[aj
];
629 if(ratio
>STILL_P5INV
)
636 theta
= ratio
*STILL_PIP5
;
638 term
= 0.5*(1.0-cosq
);
640 sinq
= 1.0 - cosq
*cosq
;
641 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
646 icf6
= (4*ccf
-dccf
)*idr6
;
648 born
->gpol_still_work
[aj
] += prod_ai
*icf4
;
651 /* Save ai->aj and aj->ai chain rule terms */
652 fr
->dadx
[n
++] = prod
*icf6
;
653 fr
->dadx
[n
++] = prod_ai
*icf6
;
655 born
->gpol_still_work
[ai
]+=gpi
;
658 /* Parallel summations */
661 gmx_sum(natoms
, born
->gpol_still_work
, cr
);
663 else if(DOMAINDECOMP(cr
))
665 dd_atom_sum_real(cr
->dd
, born
->gpol_still_work
);
668 /* Calculate the radii */
669 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
671 if(born
->use
[i
] != 0)
674 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
676 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
677 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
681 /* Extra communication required for DD */
684 dd_atom_spread_real(cr
->dd
, born
->bRad
);
685 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
694 calc_gb_rad_hct(t_commrec
*cr
,t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
695 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
696 gmx_genborn_t
*born
,t_mdatoms
*md
)
698 int i
,k
,n
,ai
,aj
,nj0
,nj1
,at0
,at1
;
701 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk_ai
,sk2
,sk2_ai
,lij
,uij
,diff2
,tmp
,sum_ai
;
702 real rad
,min_rad
,rinv
,rai_inv
;
703 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
704 real lij2
, uij2
, lij3
, uij3
, t1
,t2
,t3
;
705 real lij_inv
,dlij
,duij
,sk2_rinv
,prod
,log_term
;
706 real doffset
,raj_inv
,dadx_val
;
709 doffset
= born
->gb_doffset
;
710 gb_radius
= born
->gb_radius
;
712 for(i
=0;i
<born
->nr
;i
++)
714 born
->gpol_hct_work
[i
] = 0;
717 /* Keep the compiler happy */
721 for(i
=0;i
<nl
->nri
;i
++)
725 nj0
= nl
->jindex
[ai
];
726 nj1
= nl
->jindex
[ai
+1];
728 /* Load shifts for this list */
729 shift
= nl
->shift
[i
];
730 shX
= fr
->shift_vec
[shift
][0];
731 shY
= fr
->shift_vec
[shift
][1];
732 shZ
= fr
->shift_vec
[shift
][2];
737 sk_ai
= born
->param
[ai
];
738 sk2_ai
= sk_ai
*sk_ai
;
740 /* Load atom i coordinates, add shift vectors */
741 ix1
= shX
+ x
[ai
][0];
742 iy1
= shY
+ x
[ai
][1];
743 iz1
= shZ
+ x
[ai
][2];
759 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
760 rinv
= gmx_invsqrt(dr2
);
763 sk
= born
->param
[aj
];
766 /* aj -> ai interaction */
787 lij_inv
= gmx_invsqrt(lij2
);
790 prod
= 0.25*sk2_rinv
;
792 /* log_term = table_log(uij*lij_inv,born->log_table,
793 LOG_TABLE_ACCURACY); */
794 log_term
= log(uij
*lij_inv
);
796 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
801 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
804 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
805 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
806 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
808 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
809 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
810 /* rb2 is moved to chainrule */
818 fr
->dadx
[n
++] = dadx_val
;
821 /* ai -> aj interaction */
824 lij
= 1.0/(dr
-sk_ai
);
837 uij
= 1.0/(dr
+sk_ai
);
843 lij_inv
= gmx_invsqrt(lij2
);
844 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
846 prod
= 0.25 * sk2_rinv
;
848 /* log_term = table_log(uij*lij_inv,born->log_table,
849 LOG_TABLE_ACCURACY); */
850 log_term
= log(uij
*lij_inv
);
852 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
857 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
861 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
862 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
863 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
865 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
866 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */
868 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
874 fr
->dadx
[n
++] = dadx_val
;
877 born
->gpol_hct_work
[ai
] += sum_ai
;
880 /* Parallel summations */
883 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
885 else if(DOMAINDECOMP(cr
))
887 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
890 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
892 if(born
->use
[i
] != 0)
894 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]]-doffset
;
895 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
896 min_rad
= rai
+ doffset
;
899 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
900 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
904 /* Extra communication required for DD */
907 dd_atom_spread_real(cr
->dd
, born
->bRad
);
908 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
916 calc_gb_rad_obc(t_commrec
*cr
, t_forcerec
*fr
, int natoms
, gmx_localtop_t
*top
,
917 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
)
919 int i
,k
,ai
,aj
,nj0
,nj1
,n
,at0
,at1
;
922 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk2
,lij
,uij
,diff2
,tmp
,sum_ai
;
923 real rad
, min_rad
,sum_ai2
,sum_ai3
,tsum
,tchain
,rinv
,rai_inv
,lij_inv
,rai_inv2
;
924 real log_term
,prod
,sk2_rinv
,sk_ai
,sk2_ai
;
925 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
926 real lij2
,uij2
,lij3
,uij3
,dlij
,duij
,t1
,t2
,t3
;
927 real doffset
,raj_inv
,dadx_val
;
930 /* Keep the compiler happy */
935 doffset
= born
->gb_doffset
;
936 gb_radius
= born
->gb_radius
;
938 for(i
=0;i
<born
->nr
;i
++)
940 born
->gpol_hct_work
[i
] = 0;
943 for(i
=0;i
<nl
->nri
;i
++)
948 nj1
= nl
->jindex
[i
+1];
950 /* Load shifts for this list */
951 shift
= nl
->shift
[i
];
952 shX
= fr
->shift_vec
[shift
][0];
953 shY
= fr
->shift_vec
[shift
][1];
954 shZ
= fr
->shift_vec
[shift
][2];
959 sk_ai
= born
->param
[ai
];
960 sk2_ai
= sk_ai
*sk_ai
;
962 /* Load atom i coordinates, add shift vectors */
963 ix1
= shX
+ x
[ai
][0];
964 iy1
= shY
+ x
[ai
][1];
965 iz1
= shZ
+ x
[ai
][2];
981 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
982 rinv
= gmx_invsqrt(dr2
);
985 /* sk is precalculated in init_gb() */
986 sk
= born
->param
[aj
];
989 /* aj -> ai interaction */
1009 lij_inv
= gmx_invsqrt(lij2
);
1011 sk2_rinv
= sk2
*rinv
;
1012 prod
= 0.25*sk2_rinv
;
1014 log_term
= log(uij
*lij_inv
);
1016 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1017 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1021 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
1025 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1026 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1027 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1029 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1037 fr
->dadx
[n
++] = dadx_val
;
1039 /* ai -> aj interaction */
1040 if(raj
< dr
+ sk_ai
)
1042 lij
= 1.0/(dr
-sk_ai
);
1055 uij
= 1.0/(dr
+sk_ai
);
1061 lij_inv
= gmx_invsqrt(lij2
);
1062 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
1063 sk2_rinv
= sk2
*rinv
;
1064 prod
= 0.25 * sk2_rinv
;
1066 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1067 log_term
= log(uij
*lij_inv
);
1069 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1073 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
1076 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1077 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1078 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1080 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1082 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
1089 fr
->dadx
[n
++] = dadx_val
;
1092 born
->gpol_hct_work
[ai
] += sum_ai
;
1096 /* Parallel summations */
1099 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
1101 else if(DOMAINDECOMP(cr
))
1103 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
1106 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
1108 if(born
->use
[i
] != 0)
1110 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]];
1114 sum_ai
= rai
* born
->gpol_hct_work
[i
];
1115 sum_ai2
= sum_ai
* sum_ai
;
1116 sum_ai3
= sum_ai2
* sum_ai
;
1118 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
1119 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
1120 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
1122 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
1124 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
1125 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
1129 /* Extra (local) communication required for DD */
1130 if(DOMAINDECOMP(cr
))
1132 dd_atom_spread_real(cr
->dd
, born
->bRad
);
1133 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
1134 dd_atom_spread_real(cr
->dd
, born
->drobc
);
1143 int calc_gb_rad(t_commrec
*cr
, t_forcerec
*fr
, t_inputrec
*ir
,gmx_localtop_t
*top
,
1144 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
,t_nrnb
*nrnb
)
1150 if(fr
->bAllvsAll
&& fr
->dadx
==NULL
)
1152 /* We might need up to 8 atoms of padding before and after,
1153 * and another 4 units to guarantee SSE alignment.
1155 fr
->nalloc_dadx
= 2*(md
->homenr
+12)*(md
->nr
/2+1+12);
1156 snew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1157 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1161 /* In the SSE-enabled gb-loops, when writing to dadx, we
1162 * always write 2*4 elements at a time, even in the case with only
1163 * 1-3 j particles, where we only really need to write 2*(1-3)
1164 * elements. This is because we want dadx to be aligned to a 16-
1165 * byte boundary, and being able to use _mm_store/load_ps
1167 ndadx
= 2 * (nl
->nrj
+ 3*nl
->nri
);
1169 /* First, reallocate the dadx array, we need 3 extra for SSE */
1170 if (ndadx
+ 3 > fr
->nalloc_dadx
)
1172 fr
->nalloc_dadx
= over_alloc_large(ndadx
) + 3;
1173 srenew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1174 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1181 cnt
= md
->homenr
*(md
->nr
/2+1);
1183 if(ir
->gb_algorithm
==egbSTILL
)
1185 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1186 genborn_allvsall_calc_still_radii_sse2_single(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1188 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1190 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_STILL
,cnt
);
1192 else if(ir
->gb_algorithm
==egbHCT
|| ir
->gb_algorithm
==egbOBC
)
1194 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1195 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1197 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1199 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_HCT_OBC
,cnt
);
1203 gmx_fatal(FARGS
,"Bad gb algorithm for all-vs-all interactions");
1205 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1211 /* Switch for determining which algorithm to use for Born radii calculation */
1214 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1215 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1216 switch(ir
->gb_algorithm
)
1219 calc_gb_rad_still_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1222 calc_gb_rad_hct_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1225 calc_gb_rad_obc_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1229 gmx_fatal(FARGS
, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1232 switch(ir
->gb_algorithm
)
1235 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1238 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1241 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1245 gmx_fatal(FARGS
, "Unknown double precision algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1252 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ) )
1253 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1254 switch(ir
->gb_algorithm
)
1257 calc_gb_rad_still_sse(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
);
1261 calc_gb_rad_hct_obc_sse(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1265 gmx_fatal(FARGS
, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1269 switch(ir
->gb_algorithm
)
1272 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1275 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1278 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1282 gmx_fatal(FARGS
, "Unknown algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1285 #endif /* Single precision sse */
1287 #endif /* Double or single precision */
1289 if(fr
->bAllvsAll
==FALSE
)
1291 switch(ir
->gb_algorithm
)
1294 inc_nrnb(nrnb
,eNR_BORN_RADII_STILL
,nl
->nrj
);
1298 inc_nrnb(nrnb
,eNR_BORN_RADII_HCT_OBC
,nl
->nrj
);
1304 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,nl
->nri
);
1312 real
gb_bonds_tab(rvec x
[], rvec f
[], rvec fshift
[], real
*charge
, real
*p_gbtabscale
,
1313 real
*invsqrta
, real
*dvda
, real
*GBtab
, t_idef
*idef
, real epsilon_r
,
1314 real gb_epsilon_solvent
, real facel
, const t_pbc
*pbc
, const t_graph
*graph
)
1316 int i
,j
,n0
,m
,nnn
,type
,ai
,aj
;
1322 real isaprod
,qq
,gbscale
,gbtabscale
,Y
,F
,Geps
,Heps2
,Fp
,VV
,FF
,rt
,eps
,eps2
;
1323 real vgb
,fgb
,vcoul
,fijC
,dvdatmp
,fscal
,dvdaj
;
1329 t_iatom
*forceatoms
;
1331 /* Scale the electrostatics by gb_epsilon_solvent */
1332 facel
= facel
* ((1.0/epsilon_r
) - 1.0/gb_epsilon_solvent
);
1334 gbtabscale
=*p_gbtabscale
;
1337 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1339 forceatoms
= idef
->il
[j
].iatoms
;
1341 for(i
=0;i
<idef
->il
[j
].nr
; )
1343 /* To avoid reading in the interaction type, we just increment i to pass over
1344 * the types in the forceatoms array, this saves some memory accesses
1347 ai
= forceatoms
[i
++];
1348 aj
= forceatoms
[i
++];
1350 ki
= pbc_rvec_sub(pbc
,x
[ai
],x
[aj
],dx
);
1351 rsq11
= iprod(dx
,dx
);
1353 isai
= invsqrta
[ai
];
1354 iq
= (-1)*facel
*charge
[ai
];
1356 rinv11
= gmx_invsqrt(rsq11
);
1357 isaj
= invsqrta
[aj
];
1358 isaprod
= isai
*isaj
;
1359 qq
= isaprod
*iq
*charge
[aj
];
1360 gbscale
= isaprod
*gbtabscale
;
1369 Geps
= eps
*GBtab
[nnn
+2];
1370 Heps2
= eps2
*GBtab
[nnn
+3];
1373 FF
= Fp
+Geps
+2.0*Heps2
;
1375 fijC
= qq
*FF
*gbscale
;
1376 dvdatmp
= -(vgb
+fijC
*r
)*0.5;
1377 dvda
[aj
] = dvda
[aj
] + dvdatmp
*isaj
*isaj
;
1378 dvda
[ai
] = dvda
[ai
] + dvdatmp
*isai
*isai
;
1379 vctot
= vctot
+ vgb
;
1380 fgb
= -(fijC
)*rinv11
;
1383 ivec_sub(SHIFT_IVEC(graph
,ai
),SHIFT_IVEC(graph
,aj
),dt
);
1387 for (m
=0; (m
<DIM
); m
++) { /* 15 */
1391 fshift
[ki
][m
]+=fscal
;
1392 fshift
[CENTRAL
][m
]-=fscal
;
1400 real
calc_gb_selfcorrections(t_commrec
*cr
, int natoms
,
1401 real
*charge
, gmx_genborn_t
*born
, real
*dvda
, t_mdatoms
*md
, double facel
)
1404 real rai
,e
,derb
,q
,q2
,fi
,rai_inv
,vtot
;
1408 pd_at_range(cr
,&at0
,&at1
);
1410 else if(DOMAINDECOMP(cr
))
1413 at1
=cr
->dd
->nat_home
;
1422 /* Scale the electrostatics by gb_epsilon_solvent */
1423 facel
= facel
* ((1.0/born
->epsilon_r
) - 1.0/born
->gb_epsilon_solvent
);
1427 /* Apply self corrections */
1428 for(i
=at0
;i
<at1
;i
++)
1432 if(born
->use
[ai
]==1)
1434 rai
= born
->bRad
[ai
];
1440 derb
= 0.5*e
*rai_inv
*rai_inv
;
1441 dvda
[ai
] += derb
*rai
;
1450 real
calc_gb_nonpolar(t_commrec
*cr
, t_forcerec
*fr
,int natoms
,gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1451 const t_atomtypes
*atype
, real
*dvda
,int gb_algorithm
, t_mdatoms
*md
)
1454 real e
,es
,rai
,rbi
,term
,probe
,tmp
,factor
;
1455 real rbi_inv
,rbi_inv2
;
1457 /* To keep the compiler happy */
1462 pd_at_range(cr
,&at0
,&at1
);
1464 else if(DOMAINDECOMP(cr
))
1467 at1
= cr
->dd
->nat_home
;
1475 /* The surface area factor is 0.0049 for Still model, 0.0054 for HCT/OBC */
1476 if(gb_algorithm
==egbSTILL
)
1478 factor
=0.0049*100*CAL2JOULE
;
1482 factor
=0.0054*100*CAL2JOULE
;
1485 /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1491 for(i
=at0
;i
<at1
;i
++)
1495 if(born
->use
[ai
]==1)
1497 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
1498 rbi_inv
= fr
->invsqrta
[ai
];
1499 rbi_inv2
= rbi_inv
* rbi_inv
;
1500 tmp
= (rai
*rbi_inv2
)*(rai
*rbi_inv2
);
1502 e
= factor
*term
*(rai
+probe
)*(rai
+probe
)*tmp
;
1503 dvda
[ai
] = dvda
[ai
] - 6*e
*rbi_inv2
;
1513 real
calc_gb_chainrule(int natoms
, t_nblist
*nl
, real
*dadx
, real
*dvda
, rvec x
[], rvec t
[], rvec fshift
[],
1514 rvec shift_vec
[], int gb_algorithm
, gmx_genborn_t
*born
, t_mdatoms
*md
)
1516 int i
,k
,n
,ai
,aj
,nj0
,nj1
,n0
,n1
;
1519 real fgb
,fij
,rb2
,rbi
,fix1
,fiy1
,fiz1
;
1520 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
,rsq11
;
1521 real rinv11
,tx
,ty
,tz
,rbai
,rbaj
,fgb_ai
;
1530 n1
= md
->start
+md
->homenr
+1+natoms
/2;
1532 if(gb_algorithm
==egbSTILL
)
1537 rbi
= born
->bRad
[k
];
1538 rb
[k
] = (2 * rbi
* rbi
* dvda
[k
])/ONE_4PI_EPS0
;
1541 else if(gb_algorithm
==egbHCT
)
1546 rbi
= born
->bRad
[k
];
1547 rb
[k
] = rbi
* rbi
* dvda
[k
];
1550 else if(gb_algorithm
==egbOBC
)
1555 rbi
= born
->bRad
[k
];
1556 rb
[k
] = rbi
* rbi
* born
->drobc
[k
] * dvda
[k
];
1560 for(i
=0;i
<nl
->nri
;i
++)
1564 nj0
= nl
->jindex
[i
];
1565 nj1
= nl
->jindex
[i
+1];
1567 /* Load shifts for this list */
1568 shift
= nl
->shift
[i
];
1569 shX
= shift_vec
[shift
][0];
1570 shY
= shift_vec
[shift
][1];
1571 shZ
= shift_vec
[shift
][2];
1573 /* Load atom i coordinates, add shift vectors */
1574 ix1
= shX
+ x
[ai
][0];
1575 iy1
= shY
+ x
[ai
][1];
1576 iz1
= shZ
+ x
[ai
][2];
1584 for(k
=nj0
;k
<nj1
;k
++)
1598 fgb
= rbai
*dadx
[n
++];
1599 fgb_ai
= rbaj
*dadx
[n
++];
1601 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1612 /* Update force on atom aj */
1613 t
[aj
][0] = t
[aj
][0] - tx
;
1614 t
[aj
][1] = t
[aj
][1] - ty
;
1615 t
[aj
][2] = t
[aj
][2] - tz
;
1618 /* Update force and shift forces on atom ai */
1619 t
[ai
][0] = t
[ai
][0] + fix1
;
1620 t
[ai
][1] = t
[ai
][1] + fiy1
;
1621 t
[ai
][2] = t
[ai
][2] + fiz1
;
1623 fshift
[shift
][0] = fshift
[shift
][0] + fix1
;
1624 fshift
[shift
][1] = fshift
[shift
][1] + fiy1
;
1625 fshift
[shift
][2] = fshift
[shift
][2] + fiz1
;
1633 real
calc_gb_forces(t_commrec
*cr
, t_mdatoms
*md
, gmx_genborn_t
*born
, gmx_localtop_t
*top
, const t_atomtypes
*atype
,
1634 rvec x
[], rvec f
[], t_forcerec
*fr
, t_idef
*idef
, int gb_algorithm
, t_nrnb
*nrnb
, bool bRad
,
1635 const t_pbc
*pbc
, const t_graph
*graph
)
1641 const t_pbc
*pbc_null
;
1650 /* Do a simple ACE type approximation for the non-polar solvation */
1651 v
+= calc_gb_nonpolar(cr
, fr
,born
->nr
, born
, top
, atype
, fr
->dvda
, gb_algorithm
,md
);
1653 /* Calculate the bonded GB-interactions using either table or analytical formula */
1655 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1656 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1658 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) ) /*
1659 v += gb_bonds_analytic(x[0],f[0],md->chargeA,born->bRad,fr->dvda,idef,born->epsilon_r,born->gb_epsilon_solvent,fr->epsfac);
1661 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1662 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1665 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1666 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1670 /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1671 v
+= calc_gb_selfcorrections(cr
,born
->nr
,md
->chargeA
, born
, fr
->dvda
, md
, fr
->epsfac
);
1673 /* If parallel, sum the derivative of the potential w.r.t the born radii */
1676 gmx_sum(md
->nr
,fr
->dvda
, cr
);
1678 else if(DOMAINDECOMP(cr
))
1680 dd_atom_sum_real(cr
->dd
,fr
->dvda
);
1681 dd_atom_spread_real(cr
->dd
,fr
->dvda
);
1687 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1688 genborn_allvsall_calc_chainrule_sse2_single(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1690 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1692 cnt
= md
->homenr
*(md
->nr
/2+1);
1693 inc_nrnb(nrnb
,eNR_BORN_AVA_CHAINRULE
,cnt
);
1694 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1701 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1702 calc_gb_chainrule_sse2_double(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1703 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1704 gb_algorithm
, born
);
1706 calc_gb_chainrule(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1707 x
, f
, fr
->fshift
, fr
->shift_vec
,
1708 gb_algorithm
, born
, md
);
1713 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ))
1714 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1715 calc_gb_chainrule_sse(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1716 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1717 gb_algorithm
, born
, md
);
1719 /* Calculate the forces due to chain rule terms with non sse code */
1720 calc_gb_chainrule(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1721 x
, f
, fr
->fshift
, fr
->shift_vec
,
1722 gb_algorithm
, born
, md
);
1728 inc_nrnb(nrnb
,eNR_BORN_CHAINRULE
,fr
->gblist
.nrj
);
1729 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,fr
->gblist
.nri
);
1737 static void add_j_to_gblist(gbtmpnbl_t
*list
,int aj
)
1739 if (list
->naj
>= list
->aj_nalloc
)
1741 list
->aj_nalloc
= over_alloc_large(list
->naj
+1);
1742 srenew(list
->aj
,list
->aj_nalloc
);
1745 list
->aj
[list
->naj
++] = aj
;
1748 static gbtmpnbl_t
*find_gbtmplist(struct gbtmpnbls
*lists
,int shift
)
1752 /* Search the list with the same shift, if there is one */
1754 while (ind
< lists
->nlist
&& shift
!= lists
->list
[ind
].shift
)
1758 if (ind
== lists
->nlist
)
1760 if (lists
->nlist
== lists
->list_nalloc
)
1762 lists
->list_nalloc
++;
1763 srenew(lists
->list
,lists
->list_nalloc
);
1764 for(i
=lists
->nlist
; i
<lists
->list_nalloc
; i
++)
1766 lists
->list
[i
].aj
= NULL
;
1767 lists
->list
[i
].aj_nalloc
= 0;
1772 lists
->list
[lists
->nlist
].shift
= shift
;
1773 lists
->list
[lists
->nlist
].naj
= 0;
1777 return &lists
->list
[ind
];
1780 static void add_bondeds_to_gblist(t_ilist
*il
,
1781 bool bMolPBC
,t_pbc
*pbc
,t_graph
*g
,rvec
*x
,
1782 struct gbtmpnbls
*nls
)
1784 int ind
,j
,ai
,aj
,shift
,found
;
1790 for(ind
=0; ind
<il
->nr
; ind
+=3)
1792 ai
= il
->iatoms
[ind
+1];
1793 aj
= il
->iatoms
[ind
+2];
1798 rvec_sub(x
[ai
],x
[aj
],dx
);
1799 ivec_sub(SHIFT_IVEC(g
,ai
),SHIFT_IVEC(g
,aj
),dt
);
1800 shift
= IVEC2IS(dt
);
1804 shift
= pbc_dx_aiuc(pbc
,x
[ai
],x
[aj
],dx
);
1807 /* Find the list for this shift or create one */
1808 list
= find_gbtmplist(&nls
[ai
],shift
);
1812 /* So that we do not add the same bond twice.
1813 * This happens with some constraints between 1-3 atoms
1814 * that are in the bond-list but should not be in the GB nb-list */
1815 for(j
=0;j
<list
->naj
;j
++)
1817 if (list
->aj
[j
] == aj
)
1827 gmx_incons("ai == aj");
1830 add_j_to_gblist(list
,aj
);
1836 compare_int (const void * a
, const void * b
)
1838 return ( *(int*)a
- *(int*)b
);
1843 int make_gb_nblist(t_commrec
*cr
, int gb_algorithm
, real gbcut
,
1844 rvec x
[], matrix box
,
1845 t_forcerec
*fr
, t_idef
*idef
, t_graph
*graph
, gmx_genborn_t
*born
)
1847 int i
,l
,ii
,j
,k
,n
,nj0
,nj1
,ai
,aj
,at0
,at1
,found
,shift
,s
;
1852 struct gbtmpnbls
*nls
;
1853 gbtmpnbl_t
*list
=NULL
;
1855 nls
= born
->nblist_work
;
1857 for(i
=0;i
<born
->nr
;i
++)
1864 set_pbc_dd(&pbc
,fr
->ePBC
,cr
->dd
,TRUE
,box
);
1867 switch (gb_algorithm
)
1871 /* Loop over 1-2, 1-3 and 1-4 interactions */
1872 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1874 add_bondeds_to_gblist(&idef
->il
[j
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1878 /* Loop over 1-4 interactions */
1879 add_bondeds_to_gblist(&idef
->il
[F_GB14
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1882 gmx_incons("Unknown GB algorithm");
1885 /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1886 for(n
=0; (n
<fr
->nnblists
); n
++)
1888 for(i
=0; (i
<eNL_NR
); i
++)
1890 nblist
=&(fr
->nblists
[n
].nlist_sr
[i
]);
1892 if (nblist
->nri
> 0 && (i
==eNL_VDWQQ
|| i
==eNL_QQ
))
1894 for(j
=0;j
<nblist
->nri
;j
++)
1896 ai
= nblist
->iinr
[j
];
1897 shift
= nblist
->shift
[j
];
1899 /* Find the list for this shift or create one */
1900 list
= find_gbtmplist(&nls
[ai
],shift
);
1902 nj0
= nblist
->jindex
[j
];
1903 nj1
= nblist
->jindex
[j
+1];
1905 /* Add all the j-atoms in the non-bonded list to the GB list */
1906 for(k
=nj0
;k
<nj1
;k
++)
1908 add_j_to_gblist(list
,nblist
->jjnr
[k
]);
1915 /* Zero out some counters */
1919 fr
->gblist
.jindex
[0] = fr
->gblist
.nri
;
1921 for(i
=0;i
<fr
->natoms_force
;i
++)
1923 for(s
=0; s
<nls
[i
].nlist
; s
++)
1925 list
= &nls
[i
].list
[s
];
1927 /* Only add those atoms that actually have neighbours */
1928 if (born
->use
[i
] != 0)
1930 fr
->gblist
.iinr
[fr
->gblist
.nri
] = i
;
1931 fr
->gblist
.shift
[fr
->gblist
.nri
] = list
->shift
;
1934 for(k
=0; k
<list
->naj
; k
++)
1936 /* Memory allocation for jjnr */
1937 if(fr
->gblist
.nrj
>= fr
->gblist
.maxnrj
)
1939 fr
->gblist
.maxnrj
+= over_alloc_large(fr
->gblist
.maxnrj
);
1943 fprintf(debug
,"Increasing GB neighbourlist j size to %d\n",fr
->gblist
.maxnrj
);
1946 srenew(fr
->gblist
.jjnr
,fr
->gblist
.maxnrj
);
1950 if(i
== list
->aj
[k
])
1952 gmx_incons("i == list->aj[k]");
1954 fr
->gblist
.jjnr
[fr
->gblist
.nrj
++] = list
->aj
[k
];
1957 fr
->gblist
.jindex
[fr
->gblist
.nri
] = fr
->gblist
.nrj
;
1964 for(i
=0;i
<fr
->gblist
.nri
;i
++)
1966 nj0
= fr
->gblist
.jindex
[i
];
1967 nj1
= fr
->gblist
.jindex
[i
+1];
1968 ai
= fr
->gblist
.iinr
[i
];
1971 for(j
=nj0
;j
<nj1
;j
++)
1973 if(fr
->gblist
.jjnr
[j
]<ai
)
1974 fr
->gblist
.jjnr
[j
]+=fr
->natoms_force
;
1976 qsort(fr
->gblist
.jjnr
+nj0
,nj1
-nj0
,sizeof(int),compare_int
);
1978 for(j
=nj0
;j
<nj1
;j
++)
1980 if(fr
->gblist
.jjnr
[j
]>=fr
->natoms_force
)
1981 fr
->gblist
.jjnr
[j
]-=fr
->natoms_force
;
1990 void make_local_gb(const t_commrec
*cr
, gmx_genborn_t
*born
, int gb_algorithm
)
1993 gmx_domdec_t
*dd
=NULL
;
1995 if(DOMAINDECOMP(cr
))
2003 /* Single node or particle decomp (global==local), just copy pointers and return */
2004 if(gb_algorithm
==egbSTILL
)
2006 born
->gpol
= born
->gpol_globalindex
;
2007 born
->vsolv
= born
->vsolv_globalindex
;
2008 born
->gb_radius
= born
->gb_radius_globalindex
;
2012 born
->param
= born
->param_globalindex
;
2013 born
->gb_radius
= born
->gb_radius_globalindex
;
2016 born
->use
= born
->use_globalindex
;
2021 /* Reallocation of local arrays if necessary */
2022 if(born
->nlocal
< dd
->nat_tot
)
2024 born
->nlocal
= dd
->nat_tot
;
2026 /* Arrays specific to different gb algorithms */
2027 if(gb_algorithm
==egbSTILL
)
2029 srenew(born
->gpol
, born
->nlocal
+3);
2030 srenew(born
->vsolv
, born
->nlocal
+3);
2031 srenew(born
->gb_radius
, born
->nlocal
+3);
2035 srenew(born
->param
, born
->nlocal
+3);
2036 srenew(born
->gb_radius
, born
->nlocal
+3);
2039 /* All gb-algorithms use the array for vsites exclusions */
2040 srenew(born
->use
, born
->nlocal
+3);
2043 /* With dd, copy algorithm specific arrays */
2044 if(gb_algorithm
==egbSTILL
)
2046 for(i
=at0
;i
<at1
;i
++)
2048 born
->gpol
[i
] = born
->gpol_globalindex
[dd
->gatindex
[i
]];
2049 born
->vsolv
[i
] = born
->vsolv_globalindex
[dd
->gatindex
[i
]];
2050 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2051 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];
2056 for(i
=at0
;i
<at1
;i
++)
2058 born
->param
[i
] = born
->param_globalindex
[dd
->gatindex
[i
]];
2059 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2060 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];