2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2009, The GROMACS Development Team.
6 * Copyright (c) 2010,2014,2015,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "genborn_allvsall.h"
45 #include "gromacs/gmxlib/network.h"
46 #include "gromacs/math/functions.h"
47 #include "gromacs/math/units.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/mdlib/genborn.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/mdtypes/mdatom.h"
53 #include "gromacs/topology/topology.h"
54 #include "gromacs/utility/smalloc.h"
60 int ** exclusion_mask_gb
;
62 gmx_allvsallgb2_data_t
;
65 calc_maxoffset(int i
, int natoms
)
69 if ((natoms
% 2) == 1)
71 /* Odd number of atoms, easy */
74 else if ((natoms
% 4) == 0)
76 /* Multiple of four is hard */
85 maxoffset
= natoms
/2-1;
96 maxoffset
= natoms
/2-1;
105 maxoffset
= natoms
/2;
109 maxoffset
= natoms
/2-1;
117 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t
* aadata
,
129 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
130 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
131 * whether they should interact or not.
133 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
134 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
135 * we create a jindex array with three elements per i atom: the starting point, the point to
136 * which we need to check exclusions, and the end point.
137 * This way we only have to allocate a short exclusion mask per i atom.
140 /* Allocate memory for jindex arrays */
141 snew(aadata
->jindex_gb
, 3*natoms
);
143 /* Pointer to lists with exclusion masks */
144 snew(aadata
->exclusion_mask_gb
, natoms
);
146 for (i
= 0; i
< natoms
; i
++)
149 aadata
->jindex_gb
[3*i
] = i
+1;
150 max_offset
= calc_maxoffset(i
, natoms
);
152 /* first check the max range of atoms to EXCLUDE */
156 for (j
= 0; j
< ilist
[F_GB12
].nr
; j
+= 3)
158 a1
= ilist
[F_GB12
].iatoms
[j
+1];
159 a2
= ilist
[F_GB12
].iatoms
[j
+2];
173 if (k
> 0 && k
<= max_offset
)
175 max_excl_offset
= std::max(k
, max_excl_offset
);
181 for (j
= 0; j
< ilist
[F_GB13
].nr
; j
+= 3)
183 a1
= ilist
[F_GB13
].iatoms
[j
+1];
184 a2
= ilist
[F_GB13
].iatoms
[j
+2];
199 if (k
> 0 && k
<= max_offset
)
201 max_excl_offset
= std::max(k
, max_excl_offset
);
207 for (j
= 0; j
< ilist
[F_GB14
].nr
; j
+= 3)
209 a1
= ilist
[F_GB14
].iatoms
[j
+1];
210 a2
= ilist
[F_GB14
].iatoms
[j
+2];
225 if (k
> 0 && k
<= max_offset
)
227 max_excl_offset
= std::max(k
, max_excl_offset
);
231 max_excl_offset
= std::min(max_offset
, max_excl_offset
);
233 aadata
->jindex_gb
[3*i
+1] = i
+1+max_excl_offset
;
235 snew(aadata
->exclusion_mask_gb
[i
], max_excl_offset
);
237 /* Include everything by default */
238 for (j
= 0; j
< max_excl_offset
; j
++)
240 /* Use all-ones to mark interactions that should be present, compatible with SSE */
241 aadata
->exclusion_mask_gb
[i
][j
] = 0xFFFFFFFF;
243 /* Go through exclusions again */
246 for (j
= 0; j
< ilist
[F_GB12
].nr
; j
+= 3)
248 a1
= ilist
[F_GB12
].iatoms
[j
+1];
249 a2
= ilist
[F_GB12
].iatoms
[j
+2];
263 if (k
> 0 && k
<= max_offset
)
265 aadata
->exclusion_mask_gb
[i
][k
-1] = 0;
271 for (j
= 0; j
< ilist
[F_GB13
].nr
; j
+= 3)
273 a1
= ilist
[F_GB13
].iatoms
[j
+1];
274 a2
= ilist
[F_GB13
].iatoms
[j
+2];
288 if (k
> 0 && k
<= max_offset
)
290 aadata
->exclusion_mask_gb
[i
][k
-1] = 0;
296 for (j
= 0; j
< ilist
[F_GB14
].nr
; j
+= 3)
298 a1
= ilist
[F_GB14
].iatoms
[j
+1];
299 a2
= ilist
[F_GB14
].iatoms
[j
+2];
313 if (k
> 0 && k
<= max_offset
)
315 aadata
->exclusion_mask_gb
[i
][k
-1] = 0;
323 aadata
->jindex_gb
[3*i
+2] = i
+1+max_offset
;
329 genborn_allvsall_setup(gmx_allvsallgb2_data_t
** p_aadata
,
336 gmx_allvsallgb2_data_t
*aadata
;
341 setup_gb_exclusions_and_indices(aadata
, ilist
, natoms
, bInclude12
, bInclude13
, bInclude14
);
347 genborn_allvsall_calc_still_radii(t_forcerec
* fr
,
349 gmx_genborn_t
* born
,
350 gmx_localtop_t
* top
,
354 gmx_allvsallgb2_data_t
*aadata
;
367 real irsq
, idr4
, idr6
;
368 real raj
, rvdw
, ratio
;
369 real vaj
, ccf
, dccf
, theta
, cosq
;
370 real term
, prod
, icf4
, icf6
, gpi2
, factor
, sinq
;
372 natoms
= mdatoms
->nr
;
374 ni1
= mdatoms
->homenr
;
375 factor
= 0.5*ONE_4PI_EPS0
;
378 aadata
= *((gmx_allvsallgb2_data_t
**)work
);
380 if (aadata
== nullptr)
382 genborn_allvsall_setup(&aadata
, top
->idef
.il
, mdatoms
->nr
,
384 *((gmx_allvsallgb2_data_t
**)work
) = aadata
;
388 for (i
= 0; i
< born
->nr
; i
++)
390 born
->gpol_still_work
[i
] = 0;
394 for (i
= ni0
; i
< ni1
; i
++)
396 /* We assume shifts are NOT used for all-vs-all interactions */
398 /* Load i atom data */
405 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]];
406 vai
= born
->vsolv
[i
];
407 prod_ai
= STILL_P4
*vai
;
409 /* Load limits for loop over neighbors */
410 nj0
= aadata
->jindex_gb
[3*i
];
411 nj1
= aadata
->jindex_gb
[3*i
+1];
412 nj2
= aadata
->jindex_gb
[3*i
+2];
414 mask
= aadata
->exclusion_mask_gb
[i
];
416 /* Prologue part, including exclusion mask */
417 for (j
= nj0
; j
< nj1
; j
++, mask
++)
423 /* load j atom coordinates */
428 /* Calculate distance */
432 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
434 /* Calculate 1/r and 1/r2 */
435 rinv
= gmx::invsqrt(rsq
);
440 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]];
444 ratio
= rsq
/ (rvdw
* rvdw
);
445 vaj
= born
->vsolv
[k
];
448 if (ratio
> STILL_P5INV
)
455 theta
= ratio
*STILL_PIP5
;
457 term
= 0.5*(1.0-cosq
);
459 sinq
= 1.0 - cosq
*cosq
;
460 dccf
= 2.0*term
*sinq
*gmx::invsqrt(sinq
)*theta
;
465 icf6
= (4*ccf
-dccf
)*idr6
;
467 born
->gpol_still_work
[k
] += prod_ai
*icf4
;
470 /* Save ai->aj and aj->ai chain rule terms */
471 fr
->dadx
[n
++] = prod
*icf6
;
472 fr
->dadx
[n
++] = prod_ai
*icf6
;
474 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
479 /* Main part, no exclusions */
480 for (j
= nj1
; j
< nj2
; j
++)
484 /* load j atom coordinates */
489 /* Calculate distance */
493 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
495 /* Calculate 1/r and 1/r2 */
496 rinv
= gmx::invsqrt(rsq
);
501 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]];
505 ratio
= rsq
/ (rvdw
* rvdw
);
506 vaj
= born
->vsolv
[k
];
508 if (ratio
> STILL_P5INV
)
515 theta
= ratio
*STILL_PIP5
;
517 term
= 0.5*(1.0-cosq
);
519 sinq
= 1.0 - cosq
*cosq
;
520 dccf
= 2.0*term
*sinq
*gmx::invsqrt(sinq
)*theta
;
525 icf6
= (4*ccf
-dccf
)*idr6
;
527 born
->gpol_still_work
[k
] += prod_ai
*icf4
;
530 /* Save ai->aj and aj->ai chain rule terms */
531 fr
->dadx
[n
++] = prod
*icf6
;
532 fr
->dadx
[n
++] = prod_ai
*icf6
;
534 born
->gpol_still_work
[i
] += gpi
;
537 /* Parallel summations would go here if ever implemented with DD */
539 /* Calculate the radii */
540 for (i
= 0; i
< natoms
; i
++)
542 if (born
->use
[i
] != 0)
544 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
546 born
->bRad
[i
] = factor
*gmx::invsqrt(gpi2
);
547 fr
->invsqrta
[i
] = gmx::invsqrt(born
->bRad
[i
]);
557 genborn_allvsall_calc_hct_obc_radii(t_forcerec
* fr
,
559 gmx_genborn_t
* born
,
561 gmx_localtop_t
* top
,
565 gmx_allvsallgb2_data_t
*aadata
;
577 real rai
, doffset
, rai_inv
, rai_inv2
, sk_ai
, sk2_ai
, sum_ai
;
578 real dr
, sk
, lij
, dlij
, lij2
, lij3
, uij2
, uij3
, diff2
, uij
, log_term
;
579 real lij_inv
, sk2
, sk2_rinv
, tmp
, t1
, t2
, t3
, raj_inv
, sum_ai2
, sum_ai3
, tsum
;
584 natoms
= mdatoms
->nr
;
586 ni1
= mdatoms
->homenr
;
589 doffset
= born
->gb_doffset
;
591 aadata
= *((gmx_allvsallgb2_data_t
**)work
);
593 if (aadata
== nullptr)
595 genborn_allvsall_setup(&aadata
, top
->idef
.il
, mdatoms
->nr
,
597 *((gmx_allvsallgb2_data_t
**)work
) = aadata
;
600 for (i
= 0; i
< born
->nr
; i
++)
602 born
->gpol_hct_work
[i
] = 0;
605 for (i
= ni0
; i
< ni1
; i
++)
607 /* We assume shifts are NOT used for all-vs-all interactions */
609 /* Load i atom data */
614 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]]-doffset
;
617 sk_ai
= born
->param
[i
];
618 sk2_ai
= sk_ai
*sk_ai
;
622 /* Load limits for loop over neighbors */
623 nj0
= aadata
->jindex_gb
[3*i
];
624 nj1
= aadata
->jindex_gb
[3*i
+1];
625 nj2
= aadata
->jindex_gb
[3*i
+2];
627 mask
= aadata
->exclusion_mask_gb
[i
];
629 /* Prologue part, including exclusion mask */
630 for (j
= nj0
; j
< nj1
; j
++, mask
++)
636 /* load j atom coordinates */
641 /* Calculate distance */
645 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
647 /* Calculate 1/r and 1/r2 */
648 rinv
= gmx::invsqrt(rsq
);
651 /* sk is precalculated in init_gb() */
653 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]]-doffset
;
655 /* aj -> ai interaction */
677 lij_inv
= gmx::invsqrt(lij2
);
680 prod
= 0.25*sk2_rinv
;
682 log_term
= std::log(uij
*lij_inv
);
683 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
684 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
688 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
691 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
692 t2
= -0.5*uij2
- prod
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
694 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
696 dadxi
= (dlij
*t1
+t2
+t3
)*rinv
;
705 /* ai -> aj interaction */
706 if (raj
< dr
+ sk_ai
)
708 lij
= 1.0/(dr
-sk_ai
);
721 uij
= 1.0/(dr
+sk_ai
);
727 lij_inv
= gmx::invsqrt(lij2
);
728 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
730 prod
= 0.25 * sk2_rinv
;
732 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
733 log_term
= std::log(uij
*lij_inv
);
735 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
739 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
742 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
743 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
744 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
746 dadxj
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
748 born
->gpol_hct_work
[k
] += 0.5*tmp
;
754 fr
->dadx
[n
++] = dadxi
;
755 fr
->dadx
[n
++] = dadxj
;
760 /* Main part, no exclusions */
761 for (j
= nj1
; j
< nj2
; j
++)
765 /* load j atom coordinates */
770 /* Calculate distance */
774 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
776 /* Calculate 1/r and 1/r2 */
777 rinv
= gmx::invsqrt(rsq
);
780 /* sk is precalculated in init_gb() */
782 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]]-doffset
;
784 /* aj -> ai interaction */
804 lij_inv
= gmx::invsqrt(lij2
);
807 prod
= 0.25*sk2_rinv
;
809 log_term
= std::log(uij
*lij_inv
);
810 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
811 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
815 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
819 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
820 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
821 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
823 dadxi
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
832 /* ai -> aj interaction */
833 if (raj
< dr
+ sk_ai
)
835 lij
= 1.0/(dr
-sk_ai
);
848 uij
= 1.0/(dr
+sk_ai
);
854 lij_inv
= gmx::invsqrt(lij2
);
855 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
857 prod
= 0.25 * sk2_rinv
;
859 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
860 log_term
= std::log(uij
*lij_inv
);
862 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
866 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
869 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
870 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
871 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
873 dadxj
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
875 born
->gpol_hct_work
[k
] += 0.5*tmp
;
881 fr
->dadx
[n
++] = dadxi
;
882 fr
->dadx
[n
++] = dadxj
;
884 born
->gpol_hct_work
[i
] += sum_ai
;
887 /* Parallel summations would go here if ever implemented with DD */
889 if (gb_algorithm
== egbHCT
)
892 for (i
= 0; i
< natoms
; i
++)
894 if (born
->use
[i
] != 0)
896 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]]-born
->gb_doffset
;
897 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
898 min_rad
= rai
+ born
->gb_doffset
;
901 born
->bRad
[i
] = std::max(rad
, min_rad
);
902 fr
->invsqrta
[i
] = gmx::invsqrt(born
->bRad
[i
]);
910 /* Calculate the radii */
911 for (i
= 0; i
< natoms
; i
++)
913 if (born
->use
[i
] != 0)
915 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]];
919 sum_ai
= rai
* born
->gpol_hct_work
[i
];
920 sum_ai2
= sum_ai
* sum_ai
;
921 sum_ai3
= sum_ai2
* sum_ai
;
923 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
924 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
925 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
927 fr
->invsqrta
[i
] = gmx::invsqrt(born
->bRad
[i
]);
929 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
930 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
942 genborn_allvsall_calc_chainrule(t_forcerec
* fr
,
944 gmx_genborn_t
* born
,
950 gmx_allvsallgb2_data_t
*aadata
;
963 real rbai
, rbaj
, fgb
, fgb_ai
, rbi
;
967 natoms
= mdatoms
->nr
;
969 ni1
= mdatoms
->homenr
;
972 aadata
= (gmx_allvsallgb2_data_t
*)work
;
977 /* Loop to get the proper form for the Born radius term */
978 if (gb_algorithm
== egbSTILL
)
980 for (i
= 0; i
< natoms
; i
++)
983 rb
[i
] = (2 * rbi
* rbi
* fr
->dvda
[i
])/ONE_4PI_EPS0
;
986 else if (gb_algorithm
== egbHCT
)
988 for (i
= 0; i
< natoms
; i
++)
991 rb
[i
] = rbi
* rbi
* fr
->dvda
[i
];
994 else if (gb_algorithm
== egbOBC
)
996 for (idx
= 0; idx
< natoms
; idx
++)
998 rbi
= born
->bRad
[idx
];
999 rb
[idx
] = rbi
* rbi
* born
->drobc
[idx
] * fr
->dvda
[idx
];
1003 for (i
= ni0
; i
< ni1
; i
++)
1005 /* We assume shifts are NOT used for all-vs-all interactions */
1007 /* Load i atom data */
1018 /* Load limits for loop over neighbors */
1019 nj0
= aadata
->jindex_gb
[3*i
];
1020 nj1
= aadata
->jindex_gb
[3*i
+1];
1021 nj2
= aadata
->jindex_gb
[3*i
+2];
1023 mask
= aadata
->exclusion_mask_gb
[i
];
1025 /* Prologue part, including exclusion mask */
1026 for (j
= nj0
; j
< nj1
; j
++, mask
++)
1032 /* load j atom coordinates */
1037 /* Calculate distance */
1044 fgb
= rbai
*dadx
[n
++];
1045 fgb_ai
= rbaj
*dadx
[n
++];
1047 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1058 /* Update force on atom aj */
1059 f
[3*k
] = f
[3*k
] - tx
;
1060 f
[3*k
+1] = f
[3*k
+1] - ty
;
1061 f
[3*k
+2] = f
[3*k
+2] - tz
;
1065 /* Main part, no exclusions */
1066 for (j
= nj1
; j
< nj2
; j
++)
1070 /* load j atom coordinates */
1075 /* Calculate distance */
1082 fgb
= rbai
*dadx
[n
++];
1083 fgb_ai
= rbaj
*dadx
[n
++];
1085 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1096 /* Update force on atom aj */
1097 f
[3*k
] = f
[3*k
] - tx
;
1098 f
[3*k
+1] = f
[3*k
+1] - ty
;
1099 f
[3*k
+2] = f
[3*k
+2] - tz
;
1101 /* Update force and shift forces on atom ai */
1102 f
[3*i
] = f
[3*i
] + fix
;
1103 f
[3*i
+1] = f
[3*i
+1] + fiy
;
1104 f
[3*i
+2] = f
[3*i
+2] + fiz
;