1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
54 #include "gmx_fatal.h"
55 #include "mtop_util.h"
69 # include "genborn_sse2_double.h"
70 # include "genborn_allvsall_sse2_double.h"
72 # include "genborn_sse2_single.h"
73 # include "genborn_allvsall_sse2_single.h"
74 # endif /* GMX_DOUBLE */
75 #endif /* SSE or AVX present */
77 #include "genborn_allvsall.h"
79 /*#define DISABLE_SSE*/
88 typedef struct gbtmpnbls
{
94 /* This function is exactly the same as the one in bondfree.c. The reason
95 * it is copied here is that the bonded gb-interactions are evaluated
96 * not in calc_bonds, but rather in calc_gb_forces
98 static int pbc_rvec_sub(const t_pbc
*pbc
,const rvec xi
,const rvec xj
,rvec dx
)
101 return pbc_dx_aiuc(pbc
,xi
,xj
,dx
);
109 int init_gb_nblist(int natoms
, t_nblist
*nl
)
111 nl
->maxnri
= natoms
*4;
121 /*nl->nltype = nltype;*/
123 srenew(nl
->iinr
, nl
->maxnri
);
124 srenew(nl
->gid
, nl
->maxnri
);
125 srenew(nl
->shift
, nl
->maxnri
);
126 srenew(nl
->jindex
, nl
->maxnri
+1);
133 void gb_pd_send(t_commrec
*cr
, real
*send_data
, int nr
)
137 int *index
,*sendc
,*disp
;
139 snew(sendc
,cr
->nnodes
);
140 snew(disp
,cr
->nnodes
);
142 index
= pd_index(cr
);
145 /* Setup count/index arrays */
146 for(i
=0;i
<cr
->nnodes
;i
++)
148 sendc
[i
] = index
[i
+1]-index
[i
];
152 /* Do communication */
153 MPI_Gatherv(send_data
+index
[cur
],sendc
[cur
],GMX_MPI_REAL
,send_data
,sendc
,
154 disp
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
155 MPI_Bcast(send_data
,nr
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
161 int init_gb_plist(t_params
*p_list
)
164 p_list
->param
= NULL
;
171 int init_gb_still(const t_commrec
*cr
, t_forcerec
*fr
,
172 const t_atomtypes
*atype
, t_idef
*idef
, t_atoms
*atoms
,
173 gmx_genborn_t
*born
,int natoms
)
176 int i
,j
,i1
,i2
,k
,m
,nbond
,nang
,ia
,ib
,ic
,id
,nb
,idx
,idx2
,at
;
180 real r
,ri
,rj
,ri2
,ri3
,rj2
,r2
,r3
,r4
,rk
,ratio
,term
,h
,doffset
;
181 real p1
,p2
,p3
,factor
,cosine
,rab
,rbc
;
188 snew(born
->gpol_still_work
,natoms
+3);
194 pd_at_range(cr
,&at0
,&at1
);
196 for(i
=0;i
<natoms
;i
++)
213 doffset
= born
->gb_doffset
;
215 for(i
=0;i
<natoms
;i
++)
217 born
->gpol_globalindex
[i
]=born
->vsolv_globalindex
[i
]=
218 born
->gb_radius_globalindex
[i
]=0;
221 /* Compute atomic solvation volumes for Still method */
222 for(i
=0;i
<natoms
;i
++)
224 ri
=atype
->gb_radius
[atoms
->atom
[i
].type
];
225 born
->gb_radius_globalindex
[i
] = ri
;
227 born
->vsolv_globalindex
[i
]=(4*M_PI
/3)*r3
;
230 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
232 m
=idef
->il
[F_GB12
].iatoms
[j
];
233 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
234 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
236 r
=1.01*idef
->iparams
[m
].gb
.st
;
238 ri
= atype
->gb_radius
[atoms
->atom
[ia
].type
];
239 rj
= atype
->gb_radius
[atoms
->atom
[ib
].type
];
245 ratio
= (rj2
-ri2
-r
*r
)/(2*ri
*r
);
247 term
= (M_PI
/3.0)*h
*h
*(3.0*ri
-h
);
255 born
->vsolv_globalindex
[ia
] -= term
;
258 ratio
= (ri2
-rj2
-r
*r
)/(2*rj
*r
);
260 term
= (M_PI
/3.0)*h
*h
*(3.0*rj
-h
);
268 born
->vsolv_globalindex
[ib
] -= term
;
274 gmx_sum(natoms
,vsol
,cr
);
276 for(i
=0;i
<natoms
;i
++)
278 born
->vsolv_globalindex
[i
]=born
->vsolv_globalindex
[i
]-vsol
[i
];
282 /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
285 for(j
=0;j
<natoms
;j
++)
287 if(born
->use_globalindex
[j
]==1)
289 born
->gpol_globalindex
[j
]=-0.5*ONE_4PI_EPS0
/
290 (atype
->gb_radius
[atoms
->atom
[j
].type
]-doffset
+STILL_P1
);
295 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
297 m
=idef
->il
[F_GB12
].iatoms
[j
];
298 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
299 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
301 r
=idef
->iparams
[m
].gb
.st
;
307 gp
[ia
]+=STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
308 gp
[ib
]+=STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
312 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
313 STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
314 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
315 STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
320 for(j
=0;j
<idef
->il
[F_GB13
].nr
;j
+=3)
322 m
=idef
->il
[F_GB13
].iatoms
[j
];
323 ia
=idef
->il
[F_GB13
].iatoms
[j
+1];
324 ib
=idef
->il
[F_GB13
].iatoms
[j
+2];
326 r
=idef
->iparams
[m
].gb
.st
;
331 gp
[ia
]+=STILL_P3
*born
->vsolv
[ib
]/r4
;
332 gp
[ib
]+=STILL_P3
*born
->vsolv
[ia
]/r4
;
336 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
337 STILL_P3
*born
->vsolv_globalindex
[ib
]/r4
;
338 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
339 STILL_P3
*born
->vsolv_globalindex
[ia
]/r4
;
345 gmx_sum(natoms
,gp
,cr
);
347 for(i
=0;i
<natoms
;i
++)
349 born
->gpol_globalindex
[i
]=born
->gpol_globalindex
[i
]+gp
[i
];
359 /* Initialize all GB datastructs and compute polarization energies */
360 int init_gb(gmx_genborn_t
**p_born
,
361 const t_commrec
*cr
, t_forcerec
*fr
, const t_inputrec
*ir
,
362 const gmx_mtop_t
*mtop
, real rgbradii
, int gb_algorithm
)
364 int i
,j
,m
,ai
,aj
,jj
,natoms
,nalloc
;
365 real rai
,sk
,p
,doffset
;
369 gmx_localtop_t
*localtop
;
371 natoms
= mtop
->natoms
;
373 atoms
= gmx_mtop_global_atoms(mtop
);
374 localtop
= gmx_mtop_generate_local_top(mtop
,ir
);
381 snew(born
->drobc
, natoms
);
382 snew(born
->bRad
, natoms
);
384 /* Allocate memory for the global data arrays */
385 snew(born
->param_globalindex
, natoms
+3);
386 snew(born
->gpol_globalindex
, natoms
+3);
387 snew(born
->vsolv_globalindex
, natoms
+3);
388 snew(born
->gb_radius_globalindex
, natoms
+3);
389 snew(born
->use_globalindex
, natoms
+3);
391 snew(fr
->invsqrta
, natoms
);
392 snew(fr
->dvda
, natoms
);
395 fr
->dadx_rawptr
= NULL
;
397 born
->gpol_still_work
= NULL
;
398 born
->gpol_hct_work
= NULL
;
400 /* snew(born->asurf,natoms); */
401 /* snew(born->dasurf,natoms); */
403 /* Initialize the gb neighbourlist */
404 init_gb_nblist(natoms
,&(fr
->gblist
));
406 /* Do the Vsites exclusions (if any) */
407 for(i
=0;i
<natoms
;i
++)
409 jj
= atoms
.atom
[i
].type
;
410 if (mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
] > 0)
412 born
->use_globalindex
[i
] = 1;
416 born
->use_globalindex
[i
] = 0;
419 /* If we have a Vsite, put vs_globalindex[i]=0 */
420 if (C6 (fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
421 C12(fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
422 atoms
.atom
[i
].q
== 0)
424 born
->use_globalindex
[i
]=0;
428 /* Copy algorithm parameters from inputrecord to local structure */
429 born
->obc_alpha
= ir
->gb_obc_alpha
;
430 born
->obc_beta
= ir
->gb_obc_beta
;
431 born
->obc_gamma
= ir
->gb_obc_gamma
;
432 born
->gb_doffset
= ir
->gb_dielectric_offset
;
433 born
->gb_epsilon_solvent
= ir
->gb_epsilon_solvent
;
434 born
->epsilon_r
= ir
->epsilon_r
;
436 doffset
= born
->gb_doffset
;
438 /* Set the surface tension */
439 born
->sa_surface_tension
= ir
->sa_surface_tension
;
441 /* If Still model, initialise the polarisation energies */
442 if(gb_algorithm
==egbSTILL
)
444 init_gb_still(cr
, fr
,&(mtop
->atomtypes
), &(localtop
->idef
), &atoms
,
449 /* If HCT/OBC, precalculate the sk*atype->S_hct factors */
450 else if(gb_algorithm
==egbHCT
|| gb_algorithm
==egbOBC
)
453 snew(born
->gpol_hct_work
, natoms
+3);
455 for(i
=0;i
<natoms
;i
++)
457 if(born
->use_globalindex
[i
]==1)
459 rai
= mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
]-doffset
;
460 sk
= rai
* mtop
->atomtypes
.S_hct
[atoms
.atom
[i
].type
];
461 born
->param_globalindex
[i
] = sk
;
462 born
->gb_radius_globalindex
[i
] = rai
;
466 born
->param_globalindex
[i
] = 0;
467 born
->gb_radius_globalindex
[i
] = 0;
472 /* Allocate memory for work arrays for temporary use */
473 snew(born
->work
,natoms
+4);
474 snew(born
->count
,natoms
);
475 snew(born
->nblist_work
,natoms
);
477 /* Domain decomposition specific stuff */
486 calc_gb_rad_still(t_commrec
*cr
, t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
487 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
488 gmx_genborn_t
*born
,t_mdatoms
*md
)
490 int i
,k
,n
,nj0
,nj1
,ai
,aj
,type
;
493 real gpi
,dr
,dr2
,dr4
,idr4
,rvdw
,ratio
,ccf
,theta
,term
,rai
,raj
;
494 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
495 real rinv
,idr2
,idr6
,vaj
,dccf
,cosq
,sinq
,prod
,gpi2
;
497 real vai
, prod_ai
, icf4
,icf6
;
499 factor
= 0.5*ONE_4PI_EPS0
;
502 for(i
=0;i
<born
->nr
;i
++)
504 born
->gpol_still_work
[i
]=0;
507 for(i
=0;i
<nl
->nri
;i
++ )
512 nj1
= nl
->jindex
[i
+1];
514 /* Load shifts for this list */
515 shift
= nl
->shift
[i
];
516 shX
= fr
->shift_vec
[shift
][0];
517 shY
= fr
->shift_vec
[shift
][1];
518 shZ
= fr
->shift_vec
[shift
][2];
522 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
523 vai
= born
->vsolv
[ai
];
524 prod_ai
= STILL_P4
*vai
;
526 /* Load atom i coordinates, add shift vectors */
527 ix1
= shX
+ x
[ai
][0];
528 iy1
= shY
+ x
[ai
][1];
529 iz1
= shZ
+ x
[ai
][2];
542 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
543 rinv
= gmx_invsqrt(dr2
);
548 raj
= top
->atomtypes
.gb_radius
[md
->typeA
[aj
]];
552 ratio
= dr2
/ (rvdw
* rvdw
);
553 vaj
= born
->vsolv
[aj
];
555 if(ratio
>STILL_P5INV
)
562 theta
= ratio
*STILL_PIP5
;
564 term
= 0.5*(1.0-cosq
);
566 sinq
= 1.0 - cosq
*cosq
;
567 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
572 icf6
= (4*ccf
-dccf
)*idr6
;
574 born
->gpol_still_work
[aj
] += prod_ai
*icf4
;
577 /* Save ai->aj and aj->ai chain rule terms */
578 fr
->dadx
[n
++] = prod
*icf6
;
579 fr
->dadx
[n
++] = prod_ai
*icf6
;
581 born
->gpol_still_work
[ai
]+=gpi
;
584 /* Parallel summations */
587 gmx_sum(natoms
, born
->gpol_still_work
, cr
);
589 else if(DOMAINDECOMP(cr
))
591 dd_atom_sum_real(cr
->dd
, born
->gpol_still_work
);
594 /* Calculate the radii */
595 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
597 if(born
->use
[i
] != 0)
600 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
602 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
603 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
607 /* Extra communication required for DD */
610 dd_atom_spread_real(cr
->dd
, born
->bRad
);
611 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
620 calc_gb_rad_hct(t_commrec
*cr
,t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
621 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
622 gmx_genborn_t
*born
,t_mdatoms
*md
)
624 int i
,k
,n
,ai
,aj
,nj0
,nj1
,at0
,at1
;
627 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk_ai
,sk2
,sk2_ai
,lij
,uij
,diff2
,tmp
,sum_ai
;
628 real rad
,min_rad
,rinv
,rai_inv
;
629 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
630 real lij2
, uij2
, lij3
, uij3
, t1
,t2
,t3
;
631 real lij_inv
,dlij
,duij
,sk2_rinv
,prod
,log_term
;
632 real doffset
,raj_inv
,dadx_val
;
635 doffset
= born
->gb_doffset
;
636 gb_radius
= born
->gb_radius
;
638 for(i
=0;i
<born
->nr
;i
++)
640 born
->gpol_hct_work
[i
] = 0;
643 /* Keep the compiler happy */
647 for(i
=0;i
<nl
->nri
;i
++)
652 nj1
= nl
->jindex
[i
+1];
654 /* Load shifts for this list */
655 shift
= nl
->shift
[i
];
656 shX
= fr
->shift_vec
[shift
][0];
657 shY
= fr
->shift_vec
[shift
][1];
658 shZ
= fr
->shift_vec
[shift
][2];
663 sk_ai
= born
->param
[ai
];
664 sk2_ai
= sk_ai
*sk_ai
;
666 /* Load atom i coordinates, add shift vectors */
667 ix1
= shX
+ x
[ai
][0];
668 iy1
= shY
+ x
[ai
][1];
669 iz1
= shZ
+ x
[ai
][2];
685 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
686 rinv
= gmx_invsqrt(dr2
);
689 sk
= born
->param
[aj
];
692 /* aj -> ai interaction */
713 lij_inv
= gmx_invsqrt(lij2
);
716 prod
= 0.25*sk2_rinv
;
718 log_term
= log(uij
*lij_inv
);
720 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
725 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
728 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
729 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
730 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
732 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
733 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
734 /* rb2 is moved to chainrule */
742 fr
->dadx
[n
++] = dadx_val
;
745 /* ai -> aj interaction */
748 lij
= 1.0/(dr
-sk_ai
);
761 uij
= 1.0/(dr
+sk_ai
);
767 lij_inv
= gmx_invsqrt(lij2
);
768 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
770 prod
= 0.25 * sk2_rinv
;
772 /* log_term = table_log(uij*lij_inv,born->log_table,
773 LOG_TABLE_ACCURACY); */
774 log_term
= log(uij
*lij_inv
);
776 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
781 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
785 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
786 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
787 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
789 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
790 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */
792 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
798 fr
->dadx
[n
++] = dadx_val
;
801 born
->gpol_hct_work
[ai
] += sum_ai
;
804 /* Parallel summations */
807 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
809 else if(DOMAINDECOMP(cr
))
811 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
814 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
816 if(born
->use
[i
] != 0)
818 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]]-doffset
;
819 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
820 min_rad
= rai
+ doffset
;
823 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
824 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
828 /* Extra communication required for DD */
831 dd_atom_spread_real(cr
->dd
, born
->bRad
);
832 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
840 calc_gb_rad_obc(t_commrec
*cr
, t_forcerec
*fr
, int natoms
, gmx_localtop_t
*top
,
841 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
)
843 int i
,k
,ai
,aj
,nj0
,nj1
,n
,at0
,at1
;
846 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk2
,lij
,uij
,diff2
,tmp
,sum_ai
;
847 real rad
, min_rad
,sum_ai2
,sum_ai3
,tsum
,tchain
,rinv
,rai_inv
,lij_inv
,rai_inv2
;
848 real log_term
,prod
,sk2_rinv
,sk_ai
,sk2_ai
;
849 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
850 real lij2
,uij2
,lij3
,uij3
,dlij
,duij
,t1
,t2
,t3
;
851 real doffset
,raj_inv
,dadx_val
;
854 /* Keep the compiler happy */
859 doffset
= born
->gb_doffset
;
860 gb_radius
= born
->gb_radius
;
862 for(i
=0;i
<born
->nr
;i
++)
864 born
->gpol_hct_work
[i
] = 0;
867 for(i
=0;i
<nl
->nri
;i
++)
872 nj1
= nl
->jindex
[i
+1];
874 /* Load shifts for this list */
875 shift
= nl
->shift
[i
];
876 shX
= fr
->shift_vec
[shift
][0];
877 shY
= fr
->shift_vec
[shift
][1];
878 shZ
= fr
->shift_vec
[shift
][2];
883 sk_ai
= born
->param
[ai
];
884 sk2_ai
= sk_ai
*sk_ai
;
886 /* Load atom i coordinates, add shift vectors */
887 ix1
= shX
+ x
[ai
][0];
888 iy1
= shY
+ x
[ai
][1];
889 iz1
= shZ
+ x
[ai
][2];
905 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
906 rinv
= gmx_invsqrt(dr2
);
909 /* sk is precalculated in init_gb() */
910 sk
= born
->param
[aj
];
913 /* aj -> ai interaction */
933 lij_inv
= gmx_invsqrt(lij2
);
936 prod
= 0.25*sk2_rinv
;
938 log_term
= log(uij
*lij_inv
);
940 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
944 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
948 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
949 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
950 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
952 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
960 fr
->dadx
[n
++] = dadx_val
;
962 /* ai -> aj interaction */
965 lij
= 1.0/(dr
-sk_ai
);
978 uij
= 1.0/(dr
+sk_ai
);
984 lij_inv
= gmx_invsqrt(lij2
);
985 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
987 prod
= 0.25 * sk2_rinv
;
989 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
990 log_term
= log(uij
*lij_inv
);
992 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
996 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
999 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1000 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1001 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1003 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1005 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
1012 fr
->dadx
[n
++] = dadx_val
;
1015 born
->gpol_hct_work
[ai
] += sum_ai
;
1019 /* Parallel summations */
1022 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
1024 else if(DOMAINDECOMP(cr
))
1026 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
1029 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
1031 if(born
->use
[i
] != 0)
1033 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]];
1037 sum_ai
= rai
* born
->gpol_hct_work
[i
];
1038 sum_ai2
= sum_ai
* sum_ai
;
1039 sum_ai3
= sum_ai2
* sum_ai
;
1041 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
1042 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
1043 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
1045 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
1047 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
1048 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
1052 /* Extra (local) communication required for DD */
1053 if(DOMAINDECOMP(cr
))
1055 dd_atom_spread_real(cr
->dd
, born
->bRad
);
1056 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
1057 dd_atom_spread_real(cr
->dd
, born
->drobc
);
1066 int calc_gb_rad(t_commrec
*cr
, t_forcerec
*fr
, t_inputrec
*ir
,gmx_localtop_t
*top
,
1067 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
,t_nrnb
*nrnb
)
1073 if(fr
->bAllvsAll
&& fr
->dadx
==NULL
)
1075 /* We might need up to 8 atoms of padding before and after,
1076 * and another 4 units to guarantee SSE alignment.
1078 fr
->nalloc_dadx
= 2*(md
->homenr
+12)*(md
->nr
/2+1+12);
1079 snew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1080 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1084 /* In the SSE-enabled gb-loops, when writing to dadx, we
1085 * always write 2*4 elements at a time, even in the case with only
1086 * 1-3 j particles, where we only really need to write 2*(1-3)
1087 * elements. This is because we want dadx to be aligned to a 16-
1088 * byte boundary, and being able to use _mm_store/load_ps
1090 ndadx
= 2 * (nl
->nrj
+ 3*nl
->nri
);
1092 /* First, reallocate the dadx array, we need 3 extra for SSE */
1093 if (ndadx
+ 3 > fr
->nalloc_dadx
)
1095 fr
->nalloc_dadx
= over_alloc_large(ndadx
) + 3;
1096 srenew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1097 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1103 cnt
= md
->homenr
*(md
->nr
/2+1);
1105 if(ir
->gb_algorithm
==egbSTILL
)
1107 #if 0 && defined (GMX_X86_SSE2)
1108 if(fr
->use_acceleration
)
1111 genborn_allvsall_calc_still_radii_sse2_double(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1113 genborn_allvsall_calc_still_radii_sse2_single(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1118 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1121 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1123 /* 13 flops in outer loop, 47 flops in inner loop */
1124 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_STILL
,md
->homenr
*13+cnt
*47);
1126 else if(ir
->gb_algorithm
==egbHCT
|| ir
->gb_algorithm
==egbOBC
)
1128 #if 0 && defined (GMX_X86_SSE2)
1129 if(fr
->use_acceleration
)
1132 genborn_allvsall_calc_hct_obc_radii_sse2_double(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1134 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1139 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1142 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1144 /* 24 flops in outer loop, 183 in inner */
1145 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_HCT_OBC
,md
->homenr
*24+cnt
*183);
1149 gmx_fatal(FARGS
,"Bad gb algorithm for all-vs-all interactions");
1154 /* Switch for determining which algorithm to use for Born radii calculation */
1157 #if 0 && defined (GMX_X86_SSE2)
1158 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1159 switch(ir
->gb_algorithm
)
1162 if(fr
->use_acceleration
)
1164 calc_gb_rad_still_sse2_double(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
);
1168 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1172 if(fr
->use_acceleration
)
1174 calc_gb_rad_hct_obc_sse2_double(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1178 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1182 if(fr
->use_acceleration
)
1184 calc_gb_rad_hct_obc_sse2_double(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1188 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1193 gmx_fatal(FARGS
, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1196 switch(ir
->gb_algorithm
)
1199 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1202 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1205 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1209 gmx_fatal(FARGS
, "Unknown double precision algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1216 #if 0 && defined (GMX_X86_SSE2)
1217 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1218 switch(ir
->gb_algorithm
)
1221 if(fr
->use_acceleration
)
1223 calc_gb_rad_still_sse2_single(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
);
1227 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1231 if(fr
->use_acceleration
)
1233 calc_gb_rad_hct_obc_sse2_single(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1237 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1242 if(fr
->use_acceleration
)
1244 calc_gb_rad_hct_obc_sse2_single(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1248 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1253 gmx_fatal(FARGS
, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1257 switch(ir
->gb_algorithm
)
1260 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1263 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1266 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1270 gmx_fatal(FARGS
, "Unknown algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1273 #endif /* Single precision sse */
1275 #endif /* Double or single precision */
1277 if(fr
->bAllvsAll
==FALSE
)
1279 switch(ir
->gb_algorithm
)
1282 /* 17 flops per outer loop iteration, 47 flops per inner loop */
1283 inc_nrnb(nrnb
,eNR_BORN_RADII_STILL
,nl
->nri
*17+nl
->nrj
*47);
1287 /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
1288 inc_nrnb(nrnb
,eNR_BORN_RADII_HCT_OBC
,nl
->nri
*61+nl
->nrj
*183);
1301 real
gb_bonds_tab(rvec x
[], rvec f
[], rvec fshift
[], real
*charge
, real
*p_gbtabscale
,
1302 real
*invsqrta
, real
*dvda
, real
*GBtab
, t_idef
*idef
, real epsilon_r
,
1303 real gb_epsilon_solvent
, real facel
, const t_pbc
*pbc
, const t_graph
*graph
)
1305 int i
,j
,n0
,m
,nnn
,type
,ai
,aj
;
1311 real isaprod
,qq
,gbscale
,gbtabscale
,Y
,F
,Geps
,Heps2
,Fp
,VV
,FF
,rt
,eps
,eps2
;
1312 real vgb
,fgb
,vcoul
,fijC
,dvdatmp
,fscal
,dvdaj
;
1318 t_iatom
*forceatoms
;
1320 /* Scale the electrostatics by gb_epsilon_solvent */
1321 facel
= facel
* ((1.0/epsilon_r
) - 1.0/gb_epsilon_solvent
);
1323 gbtabscale
=*p_gbtabscale
;
1326 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1328 forceatoms
= idef
->il
[j
].iatoms
;
1330 for(i
=0;i
<idef
->il
[j
].nr
; )
1332 /* To avoid reading in the interaction type, we just increment i to pass over
1333 * the types in the forceatoms array, this saves some memory accesses
1336 ai
= forceatoms
[i
++];
1337 aj
= forceatoms
[i
++];
1339 ki
= pbc_rvec_sub(pbc
,x
[ai
],x
[aj
],dx
);
1340 rsq11
= iprod(dx
,dx
);
1342 isai
= invsqrta
[ai
];
1343 iq
= (-1)*facel
*charge
[ai
];
1345 rinv11
= gmx_invsqrt(rsq11
);
1346 isaj
= invsqrta
[aj
];
1347 isaprod
= isai
*isaj
;
1348 qq
= isaprod
*iq
*charge
[aj
];
1349 gbscale
= isaprod
*gbtabscale
;
1358 Geps
= eps
*GBtab
[nnn
+2];
1359 Heps2
= eps2
*GBtab
[nnn
+3];
1362 FF
= Fp
+Geps
+2.0*Heps2
;
1364 fijC
= qq
*FF
*gbscale
;
1365 dvdatmp
= -(vgb
+fijC
*r
)*0.5;
1366 dvda
[aj
] = dvda
[aj
] + dvdatmp
*isaj
*isaj
;
1367 dvda
[ai
] = dvda
[ai
] + dvdatmp
*isai
*isai
;
1368 vctot
= vctot
+ vgb
;
1369 fgb
= -(fijC
)*rinv11
;
1372 ivec_sub(SHIFT_IVEC(graph
,ai
),SHIFT_IVEC(graph
,aj
),dt
);
1376 for (m
=0; (m
<DIM
); m
++) { /* 15 */
1380 fshift
[ki
][m
]+=fscal
;
1381 fshift
[CENTRAL
][m
]-=fscal
;
1389 real
calc_gb_selfcorrections(t_commrec
*cr
, int natoms
,
1390 real
*charge
, gmx_genborn_t
*born
, real
*dvda
, t_mdatoms
*md
, double facel
)
1393 real rai
,e
,derb
,q
,q2
,fi
,rai_inv
,vtot
;
1397 pd_at_range(cr
,&at0
,&at1
);
1399 else if(DOMAINDECOMP(cr
))
1402 at1
=cr
->dd
->nat_home
;
1411 /* Scale the electrostatics by gb_epsilon_solvent */
1412 facel
= facel
* ((1.0/born
->epsilon_r
) - 1.0/born
->gb_epsilon_solvent
);
1416 /* Apply self corrections */
1417 for(i
=at0
;i
<at1
;i
++)
1421 if(born
->use
[ai
]==1)
1423 rai
= born
->bRad
[ai
];
1429 derb
= 0.5*e
*rai_inv
*rai_inv
;
1430 dvda
[ai
] += derb
*rai
;
1439 real
calc_gb_nonpolar(t_commrec
*cr
, t_forcerec
*fr
,int natoms
,gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1440 const t_atomtypes
*atype
, real
*dvda
,int gb_algorithm
, t_mdatoms
*md
)
1443 real e
,es
,rai
,rbi
,term
,probe
,tmp
,factor
;
1444 real rbi_inv
,rbi_inv2
;
1446 /* To keep the compiler happy */
1451 pd_at_range(cr
,&at0
,&at1
);
1453 else if(DOMAINDECOMP(cr
))
1456 at1
= cr
->dd
->nat_home
;
1464 /* factor is the surface tension */
1465 factor
= born
->sa_surface_tension
;
1468 // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
1469 if(gb_algorithm==egbSTILL)
1471 factor=0.0049*100*CAL2JOULE;
1475 factor=0.0054*100*CAL2JOULE;
1478 /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1484 for(i
=at0
;i
<at1
;i
++)
1488 if(born
->use
[ai
]==1)
1490 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
1491 rbi_inv
= fr
->invsqrta
[ai
];
1492 rbi_inv2
= rbi_inv
* rbi_inv
;
1493 tmp
= (rai
*rbi_inv2
)*(rai
*rbi_inv2
);
1495 e
= factor
*term
*(rai
+probe
)*(rai
+probe
)*tmp
;
1496 dvda
[ai
] = dvda
[ai
] - 6*e
*rbi_inv2
;
1506 real
calc_gb_chainrule(int natoms
, t_nblist
*nl
, real
*dadx
, real
*dvda
, rvec x
[], rvec t
[], rvec fshift
[],
1507 rvec shift_vec
[], int gb_algorithm
, gmx_genborn_t
*born
, t_mdatoms
*md
)
1509 int i
,k
,n
,ai
,aj
,nj0
,nj1
,n0
,n1
;
1512 real fgb
,fij
,rb2
,rbi
,fix1
,fiy1
,fiz1
;
1513 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
,rsq11
;
1514 real rinv11
,tx
,ty
,tz
,rbai
,rbaj
,fgb_ai
;
1524 if(gb_algorithm
==egbSTILL
)
1528 rbi
= born
->bRad
[i
];
1529 rb
[i
] = (2 * rbi
* rbi
* dvda
[i
])/ONE_4PI_EPS0
;
1532 else if(gb_algorithm
==egbHCT
)
1536 rbi
= born
->bRad
[i
];
1537 rb
[i
] = rbi
* rbi
* dvda
[i
];
1540 else if(gb_algorithm
==egbOBC
)
1544 rbi
= born
->bRad
[i
];
1545 rb
[i
] = rbi
* rbi
* born
->drobc
[i
] * dvda
[i
];
1549 for(i
=0;i
<nl
->nri
;i
++)
1553 nj0
= nl
->jindex
[i
];
1554 nj1
= nl
->jindex
[i
+1];
1556 /* Load shifts for this list */
1557 shift
= nl
->shift
[i
];
1558 shX
= shift_vec
[shift
][0];
1559 shY
= shift_vec
[shift
][1];
1560 shZ
= shift_vec
[shift
][2];
1562 /* Load atom i coordinates, add shift vectors */
1563 ix1
= shX
+ x
[ai
][0];
1564 iy1
= shY
+ x
[ai
][1];
1565 iz1
= shZ
+ x
[ai
][2];
1573 for(k
=nj0
;k
<nj1
;k
++)
1587 fgb
= rbai
*dadx
[n
++];
1588 fgb_ai
= rbaj
*dadx
[n
++];
1590 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1601 /* Update force on atom aj */
1602 t
[aj
][0] = t
[aj
][0] - tx
;
1603 t
[aj
][1] = t
[aj
][1] - ty
;
1604 t
[aj
][2] = t
[aj
][2] - tz
;
1607 /* Update force and shift forces on atom ai */
1608 t
[ai
][0] = t
[ai
][0] + fix1
;
1609 t
[ai
][1] = t
[ai
][1] + fiy1
;
1610 t
[ai
][2] = t
[ai
][2] + fiz1
;
1612 fshift
[shift
][0] = fshift
[shift
][0] + fix1
;
1613 fshift
[shift
][1] = fshift
[shift
][1] + fiy1
;
1614 fshift
[shift
][2] = fshift
[shift
][2] + fiz1
;
1623 calc_gb_forces(t_commrec
*cr
, t_mdatoms
*md
, gmx_genborn_t
*born
, gmx_localtop_t
*top
, const t_atomtypes
*atype
,
1624 rvec x
[], rvec f
[], t_forcerec
*fr
, t_idef
*idef
, int gb_algorithm
, int sa_algorithm
, t_nrnb
*nrnb
, gmx_bool bRad
,
1625 const t_pbc
*pbc
, const t_graph
*graph
, gmx_enerdata_t
*enerd
)
1632 const t_pbc
*pbc_null
;
1639 if(sa_algorithm
== esaAPPROX
)
1641 /* Do a simple ACE type approximation for the non-polar solvation */
1642 enerd
->term
[F_NPSOLVATION
] += calc_gb_nonpolar(cr
, fr
,born
->nr
, born
, top
, atype
, fr
->dvda
, gb_algorithm
,md
);
1645 /* Calculate the bonded GB-interactions using either table or analytical formula */
1646 enerd
->term
[F_GBPOL
] += gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1647 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.data
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1649 /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1650 enerd
->term
[F_GBPOL
] += calc_gb_selfcorrections(cr
,born
->nr
,md
->chargeA
, born
, fr
->dvda
, md
, fr
->epsfac
);
1652 /* If parallel, sum the derivative of the potential w.r.t the born radii */
1655 gmx_sum(md
->nr
,fr
->dvda
, cr
);
1657 else if(DOMAINDECOMP(cr
))
1659 dd_atom_sum_real(cr
->dd
,fr
->dvda
);
1660 dd_atom_spread_real(cr
->dd
,fr
->dvda
);
1665 #if 0 && defined (GMX_X86_SSE2)
1666 if(fr
->use_acceleration
)
1669 genborn_allvsall_calc_chainrule_sse2_double(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1671 genborn_allvsall_calc_chainrule_sse2_single(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1676 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1679 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1681 cnt
= md
->homenr
*(md
->nr
/2+1);
1682 /* 9 flops for outer loop, 15 for inner */
1683 inc_nrnb(nrnb
,eNR_BORN_AVA_CHAINRULE
,md
->homenr
*9+cnt
*15);
1687 #if 0 && defined (GMX_X86_SSE2)
1688 if(fr
->use_acceleration
)
1691 calc_gb_chainrule_sse2_double(fr
->natoms_force
, &(fr
->gblist
),fr
->dadx
,fr
->dvda
,x
[0],
1692 f
[0],fr
->fshift
[0],fr
->shift_vec
[0],gb_algorithm
,born
,md
);
1694 calc_gb_chainrule_sse2_single(fr
->natoms_force
, &(fr
->gblist
),fr
->dadx
,fr
->dvda
,x
[0],
1695 f
[0],fr
->fshift
[0],fr
->shift_vec
[0],gb_algorithm
,born
,md
);
1700 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1701 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1704 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1705 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1710 /* 9 flops for outer loop, 15 for inner */
1711 inc_nrnb(nrnb
,eNR_BORN_CHAINRULE
,fr
->gblist
.nri
*9+fr
->gblist
.nrj
*15);
1715 static void add_j_to_gblist(gbtmpnbl_t
*list
,int aj
)
1717 if (list
->naj
>= list
->aj_nalloc
)
1719 list
->aj_nalloc
= over_alloc_large(list
->naj
+1);
1720 srenew(list
->aj
,list
->aj_nalloc
);
1723 list
->aj
[list
->naj
++] = aj
;
1726 static gbtmpnbl_t
*find_gbtmplist(struct gbtmpnbls
*lists
,int shift
)
1730 /* Search the list with the same shift, if there is one */
1732 while (ind
< lists
->nlist
&& shift
!= lists
->list
[ind
].shift
)
1736 if (ind
== lists
->nlist
)
1738 if (lists
->nlist
== lists
->list_nalloc
)
1740 lists
->list_nalloc
++;
1741 srenew(lists
->list
,lists
->list_nalloc
);
1742 for(i
=lists
->nlist
; i
<lists
->list_nalloc
; i
++)
1744 lists
->list
[i
].aj
= NULL
;
1745 lists
->list
[i
].aj_nalloc
= 0;
1750 lists
->list
[lists
->nlist
].shift
= shift
;
1751 lists
->list
[lists
->nlist
].naj
= 0;
1755 return &lists
->list
[ind
];
1758 static void add_bondeds_to_gblist(t_ilist
*il
,
1759 gmx_bool bMolPBC
,t_pbc
*pbc
,t_graph
*g
,rvec
*x
,
1760 struct gbtmpnbls
*nls
)
1762 int ind
,j
,ai
,aj
,shift
,found
;
1768 for(ind
=0; ind
<il
->nr
; ind
+=3)
1770 ai
= il
->iatoms
[ind
+1];
1771 aj
= il
->iatoms
[ind
+2];
1776 rvec_sub(x
[ai
],x
[aj
],dx
);
1777 ivec_sub(SHIFT_IVEC(g
,ai
),SHIFT_IVEC(g
,aj
),dt
);
1778 shift
= IVEC2IS(dt
);
1782 shift
= pbc_dx_aiuc(pbc
,x
[ai
],x
[aj
],dx
);
1785 /* Find the list for this shift or create one */
1786 list
= find_gbtmplist(&nls
[ai
],shift
);
1790 /* So that we do not add the same bond twice.
1791 * This happens with some constraints between 1-3 atoms
1792 * that are in the bond-list but should not be in the GB nb-list */
1793 for(j
=0;j
<list
->naj
;j
++)
1795 if (list
->aj
[j
] == aj
)
1805 gmx_incons("ai == aj");
1808 add_j_to_gblist(list
,aj
);
1814 compare_int (const void * a
, const void * b
)
1816 return ( *(int*)a
- *(int*)b
);
1821 int make_gb_nblist(t_commrec
*cr
, int gb_algorithm
, real gbcut
,
1822 rvec x
[], matrix box
,
1823 t_forcerec
*fr
, t_idef
*idef
, t_graph
*graph
, gmx_genborn_t
*born
)
1825 int i
,l
,ii
,j
,k
,n
,nj0
,nj1
,ai
,aj
,at0
,at1
,found
,shift
,s
;
1830 struct gbtmpnbls
*nls
;
1831 gbtmpnbl_t
*list
=NULL
;
1833 set_pbc(&pbc
,fr
->ePBC
,box
);
1834 nls
= born
->nblist_work
;
1836 for(i
=0;i
<born
->nr
;i
++)
1843 set_pbc_dd(&pbc
,fr
->ePBC
,cr
->dd
,TRUE
,box
);
1846 switch (gb_algorithm
)
1850 /* Loop over 1-2, 1-3 and 1-4 interactions */
1851 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1853 add_bondeds_to_gblist(&idef
->il
[j
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1857 /* Loop over 1-4 interactions */
1858 add_bondeds_to_gblist(&idef
->il
[F_GB14
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1861 gmx_incons("Unknown GB algorithm");
1864 /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1865 for(n
=0; (n
<fr
->nnblists
); n
++)
1867 for(i
=0; (i
<eNL_NR
); i
++)
1869 nblist
=&(fr
->nblists
[n
].nlist_sr
[i
]);
1871 if (nblist
->nri
> 0 && (i
==eNL_VDWQQ
|| i
==eNL_QQ
))
1873 for(j
=0;j
<nblist
->nri
;j
++)
1875 ai
= nblist
->iinr
[j
];
1876 shift
= nblist
->shift
[j
];
1878 /* Find the list for this shift or create one */
1879 list
= find_gbtmplist(&nls
[ai
],shift
);
1881 nj0
= nblist
->jindex
[j
];
1882 nj1
= nblist
->jindex
[j
+1];
1884 /* Add all the j-atoms in the non-bonded list to the GB list */
1885 for(k
=nj0
;k
<nj1
;k
++)
1887 add_j_to_gblist(list
,nblist
->jjnr
[k
]);
1894 /* Zero out some counters */
1898 fr
->gblist
.jindex
[0] = fr
->gblist
.nri
;
1900 for(i
=0;i
<fr
->natoms_force
;i
++)
1902 for(s
=0; s
<nls
[i
].nlist
; s
++)
1904 list
= &nls
[i
].list
[s
];
1906 /* Only add those atoms that actually have neighbours */
1907 if (born
->use
[i
] != 0)
1909 fr
->gblist
.iinr
[fr
->gblist
.nri
] = i
;
1910 fr
->gblist
.shift
[fr
->gblist
.nri
] = list
->shift
;
1913 for(k
=0; k
<list
->naj
; k
++)
1915 /* Memory allocation for jjnr */
1916 if(fr
->gblist
.nrj
>= fr
->gblist
.maxnrj
)
1918 fr
->gblist
.maxnrj
+= over_alloc_large(fr
->gblist
.maxnrj
);
1922 fprintf(debug
,"Increasing GB neighbourlist j size to %d\n",fr
->gblist
.maxnrj
);
1925 srenew(fr
->gblist
.jjnr
,fr
->gblist
.maxnrj
);
1929 if(i
== list
->aj
[k
])
1931 gmx_incons("i == list->aj[k]");
1933 fr
->gblist
.jjnr
[fr
->gblist
.nrj
++] = list
->aj
[k
];
1936 fr
->gblist
.jindex
[fr
->gblist
.nri
] = fr
->gblist
.nrj
;
1943 for(i
=0;i
<fr
->gblist
.nri
;i
++)
1945 nj0
= fr
->gblist
.jindex
[i
];
1946 nj1
= fr
->gblist
.jindex
[i
+1];
1947 ai
= fr
->gblist
.iinr
[i
];
1950 for(j
=nj0
;j
<nj1
;j
++)
1952 if(fr
->gblist
.jjnr
[j
]<ai
)
1953 fr
->gblist
.jjnr
[j
]+=fr
->natoms_force
;
1955 qsort(fr
->gblist
.jjnr
+nj0
,nj1
-nj0
,sizeof(int),compare_int
);
1957 for(j
=nj0
;j
<nj1
;j
++)
1959 if(fr
->gblist
.jjnr
[j
]>=fr
->natoms_force
)
1960 fr
->gblist
.jjnr
[j
]-=fr
->natoms_force
;
1969 void make_local_gb(const t_commrec
*cr
, gmx_genborn_t
*born
, int gb_algorithm
)
1972 gmx_domdec_t
*dd
=NULL
;
1974 if(DOMAINDECOMP(cr
))
1982 /* Single node or particle decomp (global==local), just copy pointers and return */
1983 if(gb_algorithm
==egbSTILL
)
1985 born
->gpol
= born
->gpol_globalindex
;
1986 born
->vsolv
= born
->vsolv_globalindex
;
1987 born
->gb_radius
= born
->gb_radius_globalindex
;
1991 born
->param
= born
->param_globalindex
;
1992 born
->gb_radius
= born
->gb_radius_globalindex
;
1995 born
->use
= born
->use_globalindex
;
2000 /* Reallocation of local arrays if necessary */
2001 /* fr->natoms_force is equal to dd->nat_tot */
2002 if (DOMAINDECOMP(cr
) && dd
->nat_tot
> born
->nalloc
)
2006 nalloc
= dd
->nat_tot
;
2008 /* Arrays specific to different gb algorithms */
2009 if (gb_algorithm
== egbSTILL
)
2011 srenew(born
->gpol
, nalloc
+3);
2012 srenew(born
->vsolv
, nalloc
+3);
2013 srenew(born
->gb_radius
, nalloc
+3);
2014 for(i
=born
->nalloc
; (i
<nalloc
+3); i
++)
2018 born
->gb_radius
[i
] = 0;
2023 srenew(born
->param
, nalloc
+3);
2024 srenew(born
->gb_radius
, nalloc
+3);
2025 for(i
=born
->nalloc
; (i
<nalloc
+3); i
++)
2028 born
->gb_radius
[i
] = 0;
2032 /* All gb-algorithms use the array for vsites exclusions */
2033 srenew(born
->use
, nalloc
+3);
2034 for(i
=born
->nalloc
; (i
<nalloc
+3); i
++)
2039 born
->nalloc
= nalloc
;
2042 /* With dd, copy algorithm specific arrays */
2043 if(gb_algorithm
==egbSTILL
)
2045 for(i
=at0
;i
<at1
;i
++)
2047 born
->gpol
[i
] = born
->gpol_globalindex
[dd
->gatindex
[i
]];
2048 born
->vsolv
[i
] = born
->vsolv_globalindex
[dd
->gatindex
[i
]];
2049 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2050 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];
2055 for(i
=at0
;i
<at1
;i
++)
2057 born
->param
[i
] = born
->param_globalindex
[dd
->gatindex
[i
]];
2058 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2059 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];