2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2008, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "types/commrec.h"
47 #include "gromacs/utility/smalloc.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/math/units.h"
55 #include "gromacs/utility/fatalerror.h"
56 #include "mtop_util.h"
61 #include "gromacs/utility/gmxmpi.h"
63 #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
65 # include "genborn_sse2_double.h"
66 # include "genborn_allvsall_sse2_double.h"
68 # include "genborn_sse2_single.h"
69 # include "genborn_allvsall_sse2_single.h"
70 # endif /* GMX_DOUBLE */
71 #endif /* SSE or AVX present */
73 #include "genborn_allvsall.h"
75 /*#define DISABLE_SSE*/
84 typedef struct gbtmpnbls
{
90 /* This function is exactly the same as the one in bondfree.c. The reason
91 * it is copied here is that the bonded gb-interactions are evaluated
92 * not in calc_bonds, but rather in calc_gb_forces
94 static int pbc_rvec_sub(const t_pbc
*pbc
, const rvec xi
, const rvec xj
, rvec dx
)
98 return pbc_dx_aiuc(pbc
, xi
, xj
, dx
);
102 rvec_sub(xi
, xj
, dx
);
107 static int init_gb_nblist(int natoms
, t_nblist
*nl
)
109 nl
->maxnri
= natoms
*4;
118 /*nl->nltype = nltype;*/
120 srenew(nl
->iinr
, nl
->maxnri
);
121 srenew(nl
->gid
, nl
->maxnri
);
122 srenew(nl
->shift
, nl
->maxnri
);
123 srenew(nl
->jindex
, nl
->maxnri
+1);
131 static int init_gb_still(const t_atomtypes
*atype
, t_idef
*idef
, t_atoms
*atoms
,
132 gmx_genborn_t
*born
, int natoms
)
135 int i
, j
, i1
, i2
, k
, m
, nbond
, nang
, ia
, ib
, ic
, id
, nb
, idx
, idx2
, at
;
139 real r
, ri
, rj
, ri2
, ri3
, rj2
, r2
, r3
, r4
, rk
, ratio
, term
, h
, doffset
;
140 real p1
, p2
, p3
, factor
, cosine
, rab
, rbc
;
147 snew(born
->gpol_still_work
, natoms
+3);
152 doffset
= born
->gb_doffset
;
154 for (i
= 0; i
< natoms
; i
++)
156 born
->gpol_globalindex
[i
] = born
->vsolv_globalindex
[i
] =
157 born
->gb_radius_globalindex
[i
] = 0;
160 /* Compute atomic solvation volumes for Still method */
161 for (i
= 0; i
< natoms
; i
++)
163 ri
= atype
->gb_radius
[atoms
->atom
[i
].type
];
164 born
->gb_radius_globalindex
[i
] = ri
;
166 born
->vsolv_globalindex
[i
] = (4*M_PI
/3)*r3
;
169 for (j
= 0; j
< idef
->il
[F_GB12
].nr
; j
+= 3)
171 m
= idef
->il
[F_GB12
].iatoms
[j
];
172 ia
= idef
->il
[F_GB12
].iatoms
[j
+1];
173 ib
= idef
->il
[F_GB12
].iatoms
[j
+2];
175 r
= 1.01*idef
->iparams
[m
].gb
.st
;
177 ri
= atype
->gb_radius
[atoms
->atom
[ia
].type
];
178 rj
= atype
->gb_radius
[atoms
->atom
[ib
].type
];
184 ratio
= (rj2
-ri2
-r
*r
)/(2*ri
*r
);
186 term
= (M_PI
/3.0)*h
*h
*(3.0*ri
-h
);
188 born
->vsolv_globalindex
[ia
] -= term
;
190 ratio
= (ri2
-rj2
-r
*r
)/(2*rj
*r
);
192 term
= (M_PI
/3.0)*h
*h
*(3.0*rj
-h
);
194 born
->vsolv_globalindex
[ib
] -= term
;
197 /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
200 for (j
= 0; j
< natoms
; j
++)
202 if (born
->use_globalindex
[j
] == 1)
204 born
->gpol_globalindex
[j
] = -0.5*ONE_4PI_EPS0
/
205 (atype
->gb_radius
[atoms
->atom
[j
].type
]-doffset
+STILL_P1
);
210 for (j
= 0; j
< idef
->il
[F_GB12
].nr
; j
+= 3)
212 m
= idef
->il
[F_GB12
].iatoms
[j
];
213 ia
= idef
->il
[F_GB12
].iatoms
[j
+1];
214 ib
= idef
->il
[F_GB12
].iatoms
[j
+2];
216 r
= idef
->iparams
[m
].gb
.st
;
220 born
->gpol_globalindex
[ia
] = born
->gpol_globalindex
[ia
]+
221 STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
222 born
->gpol_globalindex
[ib
] = born
->gpol_globalindex
[ib
]+
223 STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
227 for (j
= 0; j
< idef
->il
[F_GB13
].nr
; j
+= 3)
229 m
= idef
->il
[F_GB13
].iatoms
[j
];
230 ia
= idef
->il
[F_GB13
].iatoms
[j
+1];
231 ib
= idef
->il
[F_GB13
].iatoms
[j
+2];
233 r
= idef
->iparams
[m
].gb
.st
;
236 born
->gpol_globalindex
[ia
] = born
->gpol_globalindex
[ia
]+
237 STILL_P3
*born
->vsolv_globalindex
[ib
]/r4
;
238 born
->gpol_globalindex
[ib
] = born
->gpol_globalindex
[ib
]+
239 STILL_P3
*born
->vsolv_globalindex
[ia
]/r4
;
248 /* Initialize all GB datastructs and compute polarization energies */
249 int init_gb(gmx_genborn_t
**p_born
,
250 t_forcerec
*fr
, const t_inputrec
*ir
,
251 const gmx_mtop_t
*mtop
, int gb_algorithm
)
253 int i
, j
, m
, ai
, aj
, jj
, natoms
, nalloc
;
254 real rai
, sk
, p
, doffset
;
258 gmx_localtop_t
*localtop
;
260 natoms
= mtop
->natoms
;
262 atoms
= gmx_mtop_global_atoms(mtop
);
263 localtop
= gmx_mtop_generate_local_top(mtop
, ir
);
270 snew(born
->drobc
, natoms
);
271 snew(born
->bRad
, natoms
);
273 /* Allocate memory for the global data arrays */
274 snew(born
->param_globalindex
, natoms
+3);
275 snew(born
->gpol_globalindex
, natoms
+3);
276 snew(born
->vsolv_globalindex
, natoms
+3);
277 snew(born
->gb_radius_globalindex
, natoms
+3);
278 snew(born
->use_globalindex
, natoms
+3);
280 snew(fr
->invsqrta
, natoms
);
281 snew(fr
->dvda
, natoms
);
284 fr
->dadx_rawptr
= NULL
;
286 born
->gpol_still_work
= NULL
;
287 born
->gpol_hct_work
= NULL
;
289 /* snew(born->asurf,natoms); */
290 /* snew(born->dasurf,natoms); */
292 /* Initialize the gb neighbourlist */
293 init_gb_nblist(natoms
, &(fr
->gblist
));
295 /* Do the Vsites exclusions (if any) */
296 for (i
= 0; i
< natoms
; i
++)
298 jj
= atoms
.atom
[i
].type
;
299 if (mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
] > 0)
301 born
->use_globalindex
[i
] = 1;
305 born
->use_globalindex
[i
] = 0;
308 /* If we have a Vsite, put vs_globalindex[i]=0 */
309 if (C6 (fr
->nbfp
, fr
->ntype
, jj
, jj
) == 0 &&
310 C12(fr
->nbfp
, fr
->ntype
, jj
, jj
) == 0 &&
311 atoms
.atom
[i
].q
== 0)
313 born
->use_globalindex
[i
] = 0;
317 /* Copy algorithm parameters from inputrecord to local structure */
318 born
->obc_alpha
= ir
->gb_obc_alpha
;
319 born
->obc_beta
= ir
->gb_obc_beta
;
320 born
->obc_gamma
= ir
->gb_obc_gamma
;
321 born
->gb_doffset
= ir
->gb_dielectric_offset
;
322 born
->gb_epsilon_solvent
= ir
->gb_epsilon_solvent
;
323 born
->epsilon_r
= ir
->epsilon_r
;
325 doffset
= born
->gb_doffset
;
327 /* Set the surface tension */
328 born
->sa_surface_tension
= ir
->sa_surface_tension
;
330 /* If Still model, initialise the polarisation energies */
331 if (gb_algorithm
== egbSTILL
)
333 init_gb_still(&(mtop
->atomtypes
), &(localtop
->idef
), &atoms
,
338 /* If HCT/OBC, precalculate the sk*atype->S_hct factors */
339 else if (gb_algorithm
== egbHCT
|| gb_algorithm
== egbOBC
)
342 snew(born
->gpol_hct_work
, natoms
+3);
344 for (i
= 0; i
< natoms
; i
++)
346 if (born
->use_globalindex
[i
] == 1)
348 rai
= mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
]-doffset
;
349 sk
= rai
* mtop
->atomtypes
.S_hct
[atoms
.atom
[i
].type
];
350 born
->param_globalindex
[i
] = sk
;
351 born
->gb_radius_globalindex
[i
] = rai
;
355 born
->param_globalindex
[i
] = 0;
356 born
->gb_radius_globalindex
[i
] = 0;
361 /* Allocate memory for work arrays for temporary use */
362 snew(born
->work
, natoms
+4);
363 snew(born
->count
, natoms
);
364 snew(born
->nblist_work
, natoms
);
366 /* Domain decomposition specific stuff */
375 calc_gb_rad_still(t_commrec
*cr
, t_forcerec
*fr
, gmx_localtop_t
*top
,
376 rvec x
[], t_nblist
*nl
,
377 gmx_genborn_t
*born
, t_mdatoms
*md
)
379 int i
, k
, n
, nj0
, nj1
, ai
, aj
, type
;
382 real gpi
, dr
, dr2
, dr4
, idr4
, rvdw
, ratio
, ccf
, theta
, term
, rai
, raj
;
383 real ix1
, iy1
, iz1
, jx1
, jy1
, jz1
, dx11
, dy11
, dz11
;
384 real rinv
, idr2
, idr6
, vaj
, dccf
, cosq
, sinq
, prod
, gpi2
;
386 real vai
, prod_ai
, icf4
, icf6
;
388 factor
= 0.5*ONE_4PI_EPS0
;
391 for (i
= 0; i
< born
->nr
; i
++)
393 born
->gpol_still_work
[i
] = 0;
396 for (i
= 0; i
< nl
->nri
; i
++)
401 nj1
= nl
->jindex
[i
+1];
403 /* Load shifts for this list */
404 shift
= nl
->shift
[i
];
405 shX
= fr
->shift_vec
[shift
][0];
406 shY
= fr
->shift_vec
[shift
][1];
407 shZ
= fr
->shift_vec
[shift
][2];
411 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
412 vai
= born
->vsolv
[ai
];
413 prod_ai
= STILL_P4
*vai
;
415 /* Load atom i coordinates, add shift vectors */
416 ix1
= shX
+ x
[ai
][0];
417 iy1
= shY
+ x
[ai
][1];
418 iz1
= shZ
+ x
[ai
][2];
420 for (k
= nj0
; k
< nj1
&& nl
->jjnr
[k
] >= 0; k
++)
431 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
432 rinv
= gmx_invsqrt(dr2
);
437 raj
= top
->atomtypes
.gb_radius
[md
->typeA
[aj
]];
441 ratio
= dr2
/ (rvdw
* rvdw
);
442 vaj
= born
->vsolv
[aj
];
444 if (ratio
> STILL_P5INV
)
451 theta
= ratio
*STILL_PIP5
;
453 term
= 0.5*(1.0-cosq
);
455 sinq
= 1.0 - cosq
*cosq
;
456 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
461 icf6
= (4*ccf
-dccf
)*idr6
;
462 born
->gpol_still_work
[aj
] += prod_ai
*icf4
;
465 /* Save ai->aj and aj->ai chain rule terms */
466 fr
->dadx
[n
++] = prod
*icf6
;
467 fr
->dadx
[n
++] = prod_ai
*icf6
;
469 born
->gpol_still_work
[ai
] += gpi
;
472 /* Parallel summations */
473 if (DOMAINDECOMP(cr
))
475 dd_atom_sum_real(cr
->dd
, born
->gpol_still_work
);
478 /* Calculate the radii */
479 for (i
= 0; i
< fr
->natoms_force
; i
++) /* PELA born->nr */
481 if (born
->use
[i
] != 0)
483 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
485 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
486 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
490 /* Extra communication required for DD */
491 if (DOMAINDECOMP(cr
))
493 dd_atom_spread_real(cr
->dd
, born
->bRad
);
494 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
503 calc_gb_rad_hct(t_commrec
*cr
, t_forcerec
*fr
, gmx_localtop_t
*top
,
504 rvec x
[], t_nblist
*nl
,
505 gmx_genborn_t
*born
, t_mdatoms
*md
)
507 int i
, k
, n
, ai
, aj
, nj0
, nj1
, at0
, at1
;
510 real rai
, raj
, gpi
, dr2
, dr
, sk
, sk_ai
, sk2
, sk2_ai
, lij
, uij
, diff2
, tmp
, sum_ai
;
511 real rad
, min_rad
, rinv
, rai_inv
;
512 real ix1
, iy1
, iz1
, jx1
, jy1
, jz1
, dx11
, dy11
, dz11
;
513 real lij2
, uij2
, lij3
, uij3
, t1
, t2
, t3
;
514 real lij_inv
, dlij
, duij
, sk2_rinv
, prod
, log_term
;
515 real doffset
, raj_inv
, dadx_val
;
518 doffset
= born
->gb_doffset
;
519 gb_radius
= born
->gb_radius
;
521 for (i
= 0; i
< born
->nr
; i
++)
523 born
->gpol_hct_work
[i
] = 0;
526 /* Keep the compiler happy */
530 for (i
= 0; i
< nl
->nri
; i
++)
535 nj1
= nl
->jindex
[i
+1];
537 /* Load shifts for this list */
538 shift
= nl
->shift
[i
];
539 shX
= fr
->shift_vec
[shift
][0];
540 shY
= fr
->shift_vec
[shift
][1];
541 shZ
= fr
->shift_vec
[shift
][2];
546 sk_ai
= born
->param
[ai
];
547 sk2_ai
= sk_ai
*sk_ai
;
549 /* Load atom i coordinates, add shift vectors */
550 ix1
= shX
+ x
[ai
][0];
551 iy1
= shY
+ x
[ai
][1];
552 iz1
= shZ
+ x
[ai
][2];
556 for (k
= nj0
; k
< nj1
&& nl
->jjnr
[k
] >= 0; k
++)
568 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
569 rinv
= gmx_invsqrt(dr2
);
572 sk
= born
->param
[aj
];
575 /* aj -> ai interaction */
596 lij_inv
= gmx_invsqrt(lij2
);
599 prod
= 0.25*sk2_rinv
;
601 log_term
= log(uij
*lij_inv
);
603 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
608 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
611 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
612 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
613 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
615 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
616 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
617 /* rb2 is moved to chainrule */
625 fr
->dadx
[n
++] = dadx_val
;
628 /* ai -> aj interaction */
629 if (raj
< dr
+ sk_ai
)
631 lij
= 1.0/(dr
-sk_ai
);
644 uij
= 1.0/(dr
+sk_ai
);
650 lij_inv
= gmx_invsqrt(lij2
);
651 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
653 prod
= 0.25 * sk2_rinv
;
655 /* log_term = table_log(uij*lij_inv,born->log_table,
656 LOG_TABLE_ACCURACY); */
657 log_term
= log(uij
*lij_inv
);
659 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
664 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
668 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
669 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
670 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
672 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
673 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */
675 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
681 fr
->dadx
[n
++] = dadx_val
;
684 born
->gpol_hct_work
[ai
] += sum_ai
;
687 /* Parallel summations */
688 if (DOMAINDECOMP(cr
))
690 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
693 for (i
= 0; i
< fr
->natoms_force
; i
++) /* PELA born->nr */
695 if (born
->use
[i
] != 0)
697 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]]-doffset
;
698 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
699 min_rad
= rai
+ doffset
;
702 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
703 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
707 /* Extra communication required for DD */
708 if (DOMAINDECOMP(cr
))
710 dd_atom_spread_real(cr
->dd
, born
->bRad
);
711 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
719 calc_gb_rad_obc(t_commrec
*cr
, t_forcerec
*fr
, gmx_localtop_t
*top
,
720 rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
, t_mdatoms
*md
)
722 int i
, k
, ai
, aj
, nj0
, nj1
, n
, at0
, at1
;
725 real rai
, raj
, gpi
, dr2
, dr
, sk
, sk2
, lij
, uij
, diff2
, tmp
, sum_ai
;
726 real rad
, min_rad
, sum_ai2
, sum_ai3
, tsum
, tchain
, rinv
, rai_inv
, lij_inv
, rai_inv2
;
727 real log_term
, prod
, sk2_rinv
, sk_ai
, sk2_ai
;
728 real ix1
, iy1
, iz1
, jx1
, jy1
, jz1
, dx11
, dy11
, dz11
;
729 real lij2
, uij2
, lij3
, uij3
, dlij
, duij
, t1
, t2
, t3
;
730 real doffset
, raj_inv
, dadx_val
;
733 /* Keep the compiler happy */
738 doffset
= born
->gb_doffset
;
739 gb_radius
= born
->gb_radius
;
741 for (i
= 0; i
< born
->nr
; i
++)
743 born
->gpol_hct_work
[i
] = 0;
746 for (i
= 0; i
< nl
->nri
; i
++)
751 nj1
= nl
->jindex
[i
+1];
753 /* Load shifts for this list */
754 shift
= nl
->shift
[i
];
755 shX
= fr
->shift_vec
[shift
][0];
756 shY
= fr
->shift_vec
[shift
][1];
757 shZ
= fr
->shift_vec
[shift
][2];
762 sk_ai
= born
->param
[ai
];
763 sk2_ai
= sk_ai
*sk_ai
;
765 /* Load atom i coordinates, add shift vectors */
766 ix1
= shX
+ x
[ai
][0];
767 iy1
= shY
+ x
[ai
][1];
768 iz1
= shZ
+ x
[ai
][2];
772 for (k
= nj0
; k
< nj1
&& nl
->jjnr
[k
] >= 0; k
++)
784 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
785 rinv
= gmx_invsqrt(dr2
);
788 /* sk is precalculated in init_gb() */
789 sk
= born
->param
[aj
];
792 /* aj -> ai interaction */
812 lij_inv
= gmx_invsqrt(lij2
);
815 prod
= 0.25*sk2_rinv
;
817 log_term
= log(uij
*lij_inv
);
819 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
823 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
827 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
828 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
829 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
831 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
839 fr
->dadx
[n
++] = dadx_val
;
841 /* ai -> aj interaction */
842 if (raj
< dr
+ sk_ai
)
844 lij
= 1.0/(dr
-sk_ai
);
857 uij
= 1.0/(dr
+sk_ai
);
863 lij_inv
= gmx_invsqrt(lij2
);
864 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
866 prod
= 0.25 * sk2_rinv
;
868 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
869 log_term
= log(uij
*lij_inv
);
871 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
875 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
878 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
879 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
880 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
882 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
884 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
891 fr
->dadx
[n
++] = dadx_val
;
894 born
->gpol_hct_work
[ai
] += sum_ai
;
898 /* Parallel summations */
899 if (DOMAINDECOMP(cr
))
901 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
904 for (i
= 0; i
< fr
->natoms_force
; i
++) /* PELA born->nr */
906 if (born
->use
[i
] != 0)
908 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]];
912 sum_ai
= rai
* born
->gpol_hct_work
[i
];
913 sum_ai2
= sum_ai
* sum_ai
;
914 sum_ai3
= sum_ai2
* sum_ai
;
916 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
917 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
918 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
920 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
922 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
923 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
927 /* Extra (local) communication required for DD */
928 if (DOMAINDECOMP(cr
))
930 dd_atom_spread_real(cr
->dd
, born
->bRad
);
931 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
932 dd_atom_spread_real(cr
->dd
, born
->drobc
);
941 int calc_gb_rad(t_commrec
*cr
, t_forcerec
*fr
, t_inputrec
*ir
, gmx_localtop_t
*top
,
942 rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
, t_mdatoms
*md
, t_nrnb
*nrnb
)
948 if (fr
->bAllvsAll
&& fr
->dadx
== NULL
)
950 /* We might need up to 8 atoms of padding before and after,
951 * and another 4 units to guarantee SSE alignment.
953 fr
->nalloc_dadx
= 2*(md
->homenr
+12)*(md
->nr
/2+1+12);
954 snew(fr
->dadx_rawptr
, fr
->nalloc_dadx
);
955 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
959 /* In the SSE-enabled gb-loops, when writing to dadx, we
960 * always write 2*4 elements at a time, even in the case with only
961 * 1-3 j particles, where we only really need to write 2*(1-3)
962 * elements. This is because we want dadx to be aligned to a 16-
963 * byte boundary, and being able to use _mm_store/load_ps
965 ndadx
= 2 * (nl
->nrj
+ 3*nl
->nri
);
967 /* First, reallocate the dadx array, we need 3 extra for SSE */
968 if (ndadx
+ 3 > fr
->nalloc_dadx
)
970 fr
->nalloc_dadx
= over_alloc_large(ndadx
) + 3;
971 srenew(fr
->dadx_rawptr
, fr
->nalloc_dadx
);
972 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
978 cnt
= md
->homenr
*(md
->nr
/2+1);
980 if (ir
->gb_algorithm
== egbSTILL
)
982 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
983 if (fr
->use_simd_kernels
)
986 genborn_allvsall_calc_still_radii_sse2_double(fr
, md
, born
, top
, x
[0], cr
, &fr
->AllvsAll_workgb
);
988 genborn_allvsall_calc_still_radii_sse2_single(fr
, md
, born
, top
, x
[0], cr
, &fr
->AllvsAll_workgb
);
993 genborn_allvsall_calc_still_radii(fr
, md
, born
, top
, x
[0], cr
, &fr
->AllvsAll_workgb
);
996 genborn_allvsall_calc_still_radii(fr
, md
, born
, top
, x
[0], &fr
->AllvsAll_workgb
);
998 /* 13 flops in outer loop, 47 flops in inner loop */
999 inc_nrnb(nrnb
, eNR_BORN_AVA_RADII_STILL
, md
->homenr
*13+cnt
*47);
1001 else if (ir
->gb_algorithm
== egbHCT
|| ir
->gb_algorithm
== egbOBC
)
1003 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1004 if (fr
->use_simd_kernels
)
1007 genborn_allvsall_calc_hct_obc_radii_sse2_double(fr
, md
, born
, ir
->gb_algorithm
, top
, x
[0], cr
, &fr
->AllvsAll_workgb
);
1009 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr
, md
, born
, ir
->gb_algorithm
, top
, x
[0], cr
, &fr
->AllvsAll_workgb
);
1014 genborn_allvsall_calc_hct_obc_radii(fr
, md
, born
, ir
->gb_algorithm
, top
, x
[0], cr
, &fr
->AllvsAll_workgb
);
1017 genborn_allvsall_calc_hct_obc_radii(fr
, md
, born
, ir
->gb_algorithm
, top
, x
[0], &fr
->AllvsAll_workgb
);
1019 /* 24 flops in outer loop, 183 in inner */
1020 inc_nrnb(nrnb
, eNR_BORN_AVA_RADII_HCT_OBC
, md
->homenr
*24+cnt
*183);
1024 gmx_fatal(FARGS
, "Bad gb algorithm for all-vs-all interactions");
1029 /* Switch for determining which algorithm to use for Born radii calculation */
1032 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1033 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1034 switch (ir
->gb_algorithm
)
1037 if (fr
->use_simd_kernels
)
1039 calc_gb_rad_still_sse2_double(cr
, fr
, born
->nr
, top
, atype
, x
[0], nl
, born
);
1043 calc_gb_rad_still(cr
, fr
, top
, x
, nl
, born
, md
);
1047 if (fr
->use_simd_kernels
)
1049 calc_gb_rad_hct_obc_sse2_double(cr
, fr
, born
->nr
, top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1053 calc_gb_rad_hct(cr
, fr
, top
, x
, nl
, born
, md
);
1057 if (fr
->use_simd_kernels
)
1059 calc_gb_rad_hct_obc_sse2_double(cr
, fr
, born
->nr
, top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1063 calc_gb_rad_obc(cr
, fr
, born
->nr
, top
, x
, nl
, born
, md
);
1068 gmx_fatal(FARGS
, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d", ir
->gb_algorithm
);
1071 switch (ir
->gb_algorithm
)
1074 calc_gb_rad_still(cr
, fr
, top
, x
, nl
, born
, md
);
1077 calc_gb_rad_hct(cr
, fr
, top
, x
, nl
, born
, md
);
1080 calc_gb_rad_obc(cr
, fr
, top
, x
, nl
, born
, md
);
1084 gmx_fatal(FARGS
, "Unknown double precision algorithm for Born radii calculation: %d", ir
->gb_algorithm
);
1091 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1092 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1093 switch (ir
->gb_algorithm
)
1096 if (fr
->use_simd_kernels
)
1098 calc_gb_rad_still_sse2_single(cr
, fr
, born
->nr
, top
, x
[0], nl
, born
);
1102 calc_gb_rad_still(cr
, fr
, top
, x
, nl
, born
, md
);
1106 if (fr
->use_simd_kernels
)
1108 calc_gb_rad_hct_obc_sse2_single(cr
, fr
, born
->nr
, top
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1112 calc_gb_rad_hct(cr
, fr
, top
, x
, nl
, born
, md
);
1117 if (fr
->use_simd_kernels
)
1119 calc_gb_rad_hct_obc_sse2_single(cr
, fr
, born
->nr
, top
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1123 calc_gb_rad_obc(cr
, fr
, born
->nr
, top
, x
, nl
, born
, md
);
1128 gmx_fatal(FARGS
, "Unknown sse-enabled algorithm for Born radii calculation: %d", ir
->gb_algorithm
);
1132 switch (ir
->gb_algorithm
)
1135 calc_gb_rad_still(cr
, fr
, top
, x
, nl
, born
, md
);
1138 calc_gb_rad_hct(cr
, fr
, top
, x
, nl
, born
, md
);
1141 calc_gb_rad_obc(cr
, fr
, top
, x
, nl
, born
, md
);
1145 gmx_fatal(FARGS
, "Unknown algorithm for Born radii calculation: %d", ir
->gb_algorithm
);
1148 #endif /* Single precision sse */
1150 #endif /* Double or single precision */
1152 if (fr
->bAllvsAll
== FALSE
)
1154 switch (ir
->gb_algorithm
)
1157 /* 17 flops per outer loop iteration, 47 flops per inner loop */
1158 inc_nrnb(nrnb
, eNR_BORN_RADII_STILL
, nl
->nri
*17+nl
->nrj
*47);
1162 /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
1163 inc_nrnb(nrnb
, eNR_BORN_RADII_HCT_OBC
, nl
->nri
*61+nl
->nrj
*183);
1176 real
gb_bonds_tab(rvec x
[], rvec f
[], rvec fshift
[], real
*charge
, real
*p_gbtabscale
,
1177 real
*invsqrta
, real
*dvda
, real
*GBtab
, t_idef
*idef
, real epsilon_r
,
1178 real gb_epsilon_solvent
, real facel
, const t_pbc
*pbc
, const t_graph
*graph
)
1180 int i
, j
, n0
, m
, nnn
, type
, ai
, aj
;
1186 real isaprod
, qq
, gbscale
, gbtabscale
, Y
, F
, Geps
, Heps2
, Fp
, VV
, FF
, rt
, eps
, eps2
;
1187 real vgb
, fgb
, vcoul
, fijC
, dvdatmp
, fscal
, dvdaj
;
1193 t_iatom
*forceatoms
;
1195 /* Scale the electrostatics by gb_epsilon_solvent */
1196 facel
= facel
* ((1.0/epsilon_r
) - 1.0/gb_epsilon_solvent
);
1198 gbtabscale
= *p_gbtabscale
;
1201 for (j
= F_GB12
; j
<= F_GB14
; j
++)
1203 forceatoms
= idef
->il
[j
].iatoms
;
1205 for (i
= 0; i
< idef
->il
[j
].nr
; )
1207 /* To avoid reading in the interaction type, we just increment i to pass over
1208 * the types in the forceatoms array, this saves some memory accesses
1211 ai
= forceatoms
[i
++];
1212 aj
= forceatoms
[i
++];
1214 ki
= pbc_rvec_sub(pbc
, x
[ai
], x
[aj
], dx
);
1215 rsq11
= iprod(dx
, dx
);
1217 isai
= invsqrta
[ai
];
1218 iq
= (-1)*facel
*charge
[ai
];
1220 rinv11
= gmx_invsqrt(rsq11
);
1221 isaj
= invsqrta
[aj
];
1222 isaprod
= isai
*isaj
;
1223 qq
= isaprod
*iq
*charge
[aj
];
1224 gbscale
= isaprod
*gbtabscale
;
1233 Geps
= eps
*GBtab
[nnn
+2];
1234 Heps2
= eps2
*GBtab
[nnn
+3];
1237 FF
= Fp
+Geps
+2.0*Heps2
;
1239 fijC
= qq
*FF
*gbscale
;
1240 dvdatmp
= -(vgb
+fijC
*r
)*0.5;
1241 dvda
[aj
] = dvda
[aj
] + dvdatmp
*isaj
*isaj
;
1242 dvda
[ai
] = dvda
[ai
] + dvdatmp
*isai
*isai
;
1243 vctot
= vctot
+ vgb
;
1244 fgb
= -(fijC
)*rinv11
;
1248 ivec_sub(SHIFT_IVEC(graph
, ai
), SHIFT_IVEC(graph
, aj
), dt
);
1252 for (m
= 0; (m
< DIM
); m
++) /* 15 */
1257 fshift
[ki
][m
] += fscal
;
1258 fshift
[CENTRAL
][m
] -= fscal
;
1266 real
calc_gb_selfcorrections(t_commrec
*cr
, int natoms
,
1267 real
*charge
, gmx_genborn_t
*born
, real
*dvda
, double facel
)
1269 int i
, ai
, at0
, at1
;
1270 real rai
, e
, derb
, q
, q2
, fi
, rai_inv
, vtot
;
1272 if (DOMAINDECOMP(cr
))
1275 at1
= cr
->dd
->nat_home
;
1284 /* Scale the electrostatics by gb_epsilon_solvent */
1285 facel
= facel
* ((1.0/born
->epsilon_r
) - 1.0/born
->gb_epsilon_solvent
);
1289 /* Apply self corrections */
1290 for (i
= at0
; i
< at1
; i
++)
1294 if (born
->use
[ai
] == 1)
1296 rai
= born
->bRad
[ai
];
1302 derb
= 0.5*e
*rai_inv
*rai_inv
;
1303 dvda
[ai
] += derb
*rai
;
1312 real
calc_gb_nonpolar(t_commrec
*cr
, t_forcerec
*fr
, int natoms
, gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1313 real
*dvda
, t_mdatoms
*md
)
1315 int ai
, i
, at0
, at1
;
1316 real e
, es
, rai
, rbi
, term
, probe
, tmp
, factor
;
1317 real rbi_inv
, rbi_inv2
;
1319 /* To keep the compiler happy */
1322 if (DOMAINDECOMP(cr
))
1325 at1
= cr
->dd
->nat_home
;
1333 /* factor is the surface tension */
1334 factor
= born
->sa_surface_tension
;
1337 // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
1338 if(gb_algorithm==egbSTILL)
1340 factor=0.0049*100*CAL2JOULE;
1344 factor=0.0054*100*CAL2JOULE;
1347 /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1353 for (i
= at0
; i
< at1
; i
++)
1357 if (born
->use
[ai
] == 1)
1359 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
1360 rbi_inv
= fr
->invsqrta
[ai
];
1361 rbi_inv2
= rbi_inv
* rbi_inv
;
1362 tmp
= (rai
*rbi_inv2
)*(rai
*rbi_inv2
);
1364 e
= factor
*term
*(rai
+probe
)*(rai
+probe
)*tmp
;
1365 dvda
[ai
] = dvda
[ai
] - 6*e
*rbi_inv2
;
1375 real
calc_gb_chainrule(int natoms
, t_nblist
*nl
, real
*dadx
, real
*dvda
, rvec x
[], rvec t
[], rvec fshift
[],
1376 rvec shift_vec
[], int gb_algorithm
, gmx_genborn_t
*born
)
1378 int i
, k
, n
, ai
, aj
, nj0
, nj1
, n0
, n1
;
1381 real fgb
, fij
, rb2
, rbi
, fix1
, fiy1
, fiz1
;
1382 real ix1
, iy1
, iz1
, jx1
, jy1
, jz1
, dx11
, dy11
, dz11
, rsq11
;
1383 real rinv11
, tx
, ty
, tz
, rbai
, rbaj
, fgb_ai
;
1393 if (gb_algorithm
== egbSTILL
)
1395 for (i
= n0
; i
< n1
; i
++)
1397 rbi
= born
->bRad
[i
];
1398 rb
[i
] = (2 * rbi
* rbi
* dvda
[i
])/ONE_4PI_EPS0
;
1401 else if (gb_algorithm
== egbHCT
)
1403 for (i
= n0
; i
< n1
; i
++)
1405 rbi
= born
->bRad
[i
];
1406 rb
[i
] = rbi
* rbi
* dvda
[i
];
1409 else if (gb_algorithm
== egbOBC
)
1411 for (i
= n0
; i
< n1
; i
++)
1413 rbi
= born
->bRad
[i
];
1414 rb
[i
] = rbi
* rbi
* born
->drobc
[i
] * dvda
[i
];
1418 for (i
= 0; i
< nl
->nri
; i
++)
1422 nj0
= nl
->jindex
[i
];
1423 nj1
= nl
->jindex
[i
+1];
1425 /* Load shifts for this list */
1426 shift
= nl
->shift
[i
];
1427 shX
= shift_vec
[shift
][0];
1428 shY
= shift_vec
[shift
][1];
1429 shZ
= shift_vec
[shift
][2];
1431 /* Load atom i coordinates, add shift vectors */
1432 ix1
= shX
+ x
[ai
][0];
1433 iy1
= shY
+ x
[ai
][1];
1434 iz1
= shZ
+ x
[ai
][2];
1442 for (k
= nj0
; k
< nj1
&& nl
->jjnr
[k
] >= 0; k
++)
1456 fgb
= rbai
*dadx
[n
++];
1457 fgb_ai
= rbaj
*dadx
[n
++];
1459 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1470 /* Update force on atom aj */
1471 t
[aj
][0] = t
[aj
][0] - tx
;
1472 t
[aj
][1] = t
[aj
][1] - ty
;
1473 t
[aj
][2] = t
[aj
][2] - tz
;
1476 /* Update force and shift forces on atom ai */
1477 t
[ai
][0] = t
[ai
][0] + fix1
;
1478 t
[ai
][1] = t
[ai
][1] + fiy1
;
1479 t
[ai
][2] = t
[ai
][2] + fiz1
;
1481 fshift
[shift
][0] = fshift
[shift
][0] + fix1
;
1482 fshift
[shift
][1] = fshift
[shift
][1] + fiy1
;
1483 fshift
[shift
][2] = fshift
[shift
][2] + fiz1
;
1492 calc_gb_forces(t_commrec
*cr
, t_mdatoms
*md
, gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1493 rvec x
[], rvec f
[], t_forcerec
*fr
, t_idef
*idef
, int gb_algorithm
, int sa_algorithm
, t_nrnb
*nrnb
,
1494 const t_pbc
*pbc
, const t_graph
*graph
, gmx_enerdata_t
*enerd
)
1501 const t_pbc
*pbc_null
;
1512 if (sa_algorithm
== esaAPPROX
)
1514 /* Do a simple ACE type approximation for the non-polar solvation */
1515 enerd
->term
[F_NPSOLVATION
] += calc_gb_nonpolar(cr
, fr
, born
->nr
, born
, top
, fr
->dvda
, md
);
1518 /* Calculate the bonded GB-interactions using either table or analytical formula */
1519 enerd
->term
[F_GBPOL
] += gb_bonds_tab(x
, f
, fr
->fshift
, md
->chargeA
, &(fr
->gbtabscale
),
1520 fr
->invsqrta
, fr
->dvda
, fr
->gbtab
.data
, idef
, born
->epsilon_r
, born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1522 /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1523 enerd
->term
[F_GBPOL
] += calc_gb_selfcorrections(cr
, born
->nr
, md
->chargeA
, born
, fr
->dvda
, fr
->epsfac
);
1525 /* If parallel, sum the derivative of the potential w.r.t the born radii */
1526 if (DOMAINDECOMP(cr
))
1528 dd_atom_sum_real(cr
->dd
, fr
->dvda
);
1529 dd_atom_spread_real(cr
->dd
, fr
->dvda
);
1534 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1535 if (fr
->use_simd_kernels
)
1538 genborn_allvsall_calc_chainrule_sse2_double(fr
, md
, born
, x
[0], f
[0], gb_algorithm
, fr
->AllvsAll_workgb
);
1540 genborn_allvsall_calc_chainrule_sse2_single(fr
, md
, born
, x
[0], f
[0], gb_algorithm
, fr
->AllvsAll_workgb
);
1545 genborn_allvsall_calc_chainrule(fr
, md
, born
, x
[0], f
[0], gb_algorithm
, fr
->AllvsAll_workgb
);
1548 genborn_allvsall_calc_chainrule(fr
, md
, born
, x
[0], f
[0], gb_algorithm
, fr
->AllvsAll_workgb
);
1550 cnt
= md
->homenr
*(md
->nr
/2+1);
1551 /* 9 flops for outer loop, 15 for inner */
1552 inc_nrnb(nrnb
, eNR_BORN_AVA_CHAINRULE
, md
->homenr
*9+cnt
*15);
1556 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1557 if (fr
->use_simd_kernels
)
1560 calc_gb_chainrule_sse2_double(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
, x
[0],
1561 f
[0], fr
->fshift
[0], fr
->shift_vec
[0], gb_algorithm
, born
, md
);
1563 calc_gb_chainrule_sse2_single(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
, x
[0],
1564 f
[0], fr
->fshift
[0], fr
->shift_vec
[0], gb_algorithm
, born
, md
);
1569 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1570 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
, md
);
1573 calc_gb_chainrule(fr
->natoms_force
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1574 x
, f
, fr
->fshift
, fr
->shift_vec
, gb_algorithm
, born
);
1579 /* 9 flops for outer loop, 15 for inner */
1580 inc_nrnb(nrnb
, eNR_BORN_CHAINRULE
, fr
->gblist
.nri
*9+fr
->gblist
.nrj
*15);
1584 static void add_j_to_gblist(gbtmpnbl_t
*list
, int aj
)
1586 if (list
->naj
>= list
->aj_nalloc
)
1588 list
->aj_nalloc
= over_alloc_large(list
->naj
+1);
1589 srenew(list
->aj
, list
->aj_nalloc
);
1592 list
->aj
[list
->naj
++] = aj
;
1595 static gbtmpnbl_t
*find_gbtmplist(struct gbtmpnbls
*lists
, int shift
)
1599 /* Search the list with the same shift, if there is one */
1601 while (ind
< lists
->nlist
&& shift
!= lists
->list
[ind
].shift
)
1605 if (ind
== lists
->nlist
)
1607 if (lists
->nlist
== lists
->list_nalloc
)
1609 lists
->list_nalloc
++;
1610 srenew(lists
->list
, lists
->list_nalloc
);
1611 for (i
= lists
->nlist
; i
< lists
->list_nalloc
; i
++)
1613 lists
->list
[i
].aj
= NULL
;
1614 lists
->list
[i
].aj_nalloc
= 0;
1619 lists
->list
[lists
->nlist
].shift
= shift
;
1620 lists
->list
[lists
->nlist
].naj
= 0;
1624 return &lists
->list
[ind
];
1627 static void add_bondeds_to_gblist(t_ilist
*il
,
1628 gmx_bool bMolPBC
, t_pbc
*pbc
, t_graph
*g
, rvec
*x
,
1629 struct gbtmpnbls
*nls
)
1631 int ind
, j
, ai
, aj
, shift
, found
;
1637 for (ind
= 0; ind
< il
->nr
; ind
+= 3)
1639 ai
= il
->iatoms
[ind
+1];
1640 aj
= il
->iatoms
[ind
+2];
1645 rvec_sub(x
[ai
], x
[aj
], dx
);
1646 ivec_sub(SHIFT_IVEC(g
, ai
), SHIFT_IVEC(g
, aj
), dt
);
1647 shift
= IVEC2IS(dt
);
1651 shift
= pbc_dx_aiuc(pbc
, x
[ai
], x
[aj
], dx
);
1654 /* Find the list for this shift or create one */
1655 list
= find_gbtmplist(&nls
[ai
], shift
);
1659 /* So that we do not add the same bond twice.
1660 * This happens with some constraints between 1-3 atoms
1661 * that are in the bond-list but should not be in the GB nb-list */
1662 for (j
= 0; j
< list
->naj
; j
++)
1664 if (list
->aj
[j
] == aj
)
1674 gmx_incons("ai == aj");
1677 add_j_to_gblist(list
, aj
);
1683 compare_int (const void * a
, const void * b
)
1685 return ( *(int*)a
- *(int*)b
);
1690 int make_gb_nblist(t_commrec
*cr
, int gb_algorithm
,
1691 rvec x
[], matrix box
,
1692 t_forcerec
*fr
, t_idef
*idef
, t_graph
*graph
, gmx_genborn_t
*born
)
1694 int i
, l
, ii
, j
, k
, n
, nj0
, nj1
, ai
, aj
, at0
, at1
, found
, shift
, s
;
1699 struct gbtmpnbls
*nls
;
1700 gbtmpnbl_t
*list
= NULL
;
1702 set_pbc(&pbc
, fr
->ePBC
, box
);
1703 nls
= born
->nblist_work
;
1705 for (i
= 0; i
< born
->nr
; i
++)
1712 set_pbc_dd(&pbc
, fr
->ePBC
, cr
->dd
, TRUE
, box
);
1715 switch (gb_algorithm
)
1719 /* Loop over 1-2, 1-3 and 1-4 interactions */
1720 for (j
= F_GB12
; j
<= F_GB14
; j
++)
1722 add_bondeds_to_gblist(&idef
->il
[j
], fr
->bMolPBC
, &pbc
, graph
, x
, nls
);
1726 /* Loop over 1-4 interactions */
1727 add_bondeds_to_gblist(&idef
->il
[F_GB14
], fr
->bMolPBC
, &pbc
, graph
, x
, nls
);
1730 gmx_incons("Unknown GB algorithm");
1733 /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1734 for (n
= 0; (n
< fr
->nnblists
); n
++)
1736 for (i
= 0; (i
< eNL_NR
); i
++)
1738 nblist
= &(fr
->nblists
[n
].nlist_sr
[i
]);
1740 if (nblist
->nri
> 0 && (i
== eNL_VDWQQ
|| i
== eNL_QQ
))
1742 for (j
= 0; j
< nblist
->nri
; j
++)
1744 ai
= nblist
->iinr
[j
];
1745 shift
= nblist
->shift
[j
];
1747 /* Find the list for this shift or create one */
1748 list
= find_gbtmplist(&nls
[ai
], shift
);
1750 nj0
= nblist
->jindex
[j
];
1751 nj1
= nblist
->jindex
[j
+1];
1753 /* Add all the j-atoms in the non-bonded list to the GB list */
1754 for (k
= nj0
; k
< nj1
; k
++)
1756 add_j_to_gblist(list
, nblist
->jjnr
[k
]);
1763 /* Zero out some counters */
1767 fr
->gblist
.jindex
[0] = fr
->gblist
.nri
;
1769 for (i
= 0; i
< fr
->natoms_force
; i
++)
1771 for (s
= 0; s
< nls
[i
].nlist
; s
++)
1773 list
= &nls
[i
].list
[s
];
1775 /* Only add those atoms that actually have neighbours */
1776 if (born
->use
[i
] != 0)
1778 fr
->gblist
.iinr
[fr
->gblist
.nri
] = i
;
1779 fr
->gblist
.shift
[fr
->gblist
.nri
] = list
->shift
;
1782 for (k
= 0; k
< list
->naj
; k
++)
1784 /* Memory allocation for jjnr */
1785 if (fr
->gblist
.nrj
>= fr
->gblist
.maxnrj
)
1787 fr
->gblist
.maxnrj
+= over_alloc_large(fr
->gblist
.maxnrj
);
1791 fprintf(debug
, "Increasing GB neighbourlist j size to %d\n", fr
->gblist
.maxnrj
);
1794 srenew(fr
->gblist
.jjnr
, fr
->gblist
.maxnrj
);
1798 if (i
== list
->aj
[k
])
1800 gmx_incons("i == list->aj[k]");
1802 fr
->gblist
.jjnr
[fr
->gblist
.nrj
++] = list
->aj
[k
];
1805 fr
->gblist
.jindex
[fr
->gblist
.nri
] = fr
->gblist
.nrj
;
1812 for (i
= 0; i
< fr
->gblist
.nri
; i
++)
1814 nj0
= fr
->gblist
.jindex
[i
];
1815 nj1
= fr
->gblist
.jindex
[i
+1];
1816 ai
= fr
->gblist
.iinr
[i
];
1819 for (j
= nj0
; j
< nj1
; j
++)
1821 if (fr
->gblist
.jjnr
[j
] < ai
)
1823 fr
->gblist
.jjnr
[j
] += fr
->natoms_force
;
1826 qsort(fr
->gblist
.jjnr
+nj0
, nj1
-nj0
, sizeof(int), compare_int
);
1828 for (j
= nj0
; j
< nj1
; j
++)
1830 if (fr
->gblist
.jjnr
[j
] >= fr
->natoms_force
)
1832 fr
->gblist
.jjnr
[j
] -= fr
->natoms_force
;
1842 void make_local_gb(const t_commrec
*cr
, gmx_genborn_t
*born
, int gb_algorithm
)
1845 gmx_domdec_t
*dd
= NULL
;
1847 if (DOMAINDECOMP(cr
))
1855 /* Single node, just copy pointers and return */
1856 if (gb_algorithm
== egbSTILL
)
1858 born
->gpol
= born
->gpol_globalindex
;
1859 born
->vsolv
= born
->vsolv_globalindex
;
1860 born
->gb_radius
= born
->gb_radius_globalindex
;
1864 born
->param
= born
->param_globalindex
;
1865 born
->gb_radius
= born
->gb_radius_globalindex
;
1868 born
->use
= born
->use_globalindex
;
1873 /* Reallocation of local arrays if necessary */
1874 /* fr->natoms_force is equal to dd->nat_tot */
1875 if (DOMAINDECOMP(cr
) && dd
->nat_tot
> born
->nalloc
)
1879 nalloc
= dd
->nat_tot
;
1881 /* Arrays specific to different gb algorithms */
1882 if (gb_algorithm
== egbSTILL
)
1884 srenew(born
->gpol
, nalloc
+3);
1885 srenew(born
->vsolv
, nalloc
+3);
1886 srenew(born
->gb_radius
, nalloc
+3);
1887 for (i
= born
->nalloc
; (i
< nalloc
+3); i
++)
1891 born
->gb_radius
[i
] = 0;
1896 srenew(born
->param
, nalloc
+3);
1897 srenew(born
->gb_radius
, nalloc
+3);
1898 for (i
= born
->nalloc
; (i
< nalloc
+3); i
++)
1901 born
->gb_radius
[i
] = 0;
1905 /* All gb-algorithms use the array for vsites exclusions */
1906 srenew(born
->use
, nalloc
+3);
1907 for (i
= born
->nalloc
; (i
< nalloc
+3); i
++)
1912 born
->nalloc
= nalloc
;
1915 /* With dd, copy algorithm specific arrays */
1916 if (gb_algorithm
== egbSTILL
)
1918 for (i
= at0
; i
< at1
; i
++)
1920 born
->gpol
[i
] = born
->gpol_globalindex
[dd
->gatindex
[i
]];
1921 born
->vsolv
[i
] = born
->vsolv_globalindex
[dd
->gatindex
[i
]];
1922 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
1923 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];
1928 for (i
= at0
; i
< at1
; i
++)
1930 born
->param
[i
] = born
->param_globalindex
[dd
->gatindex
[i
]];
1931 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
1932 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];