2 * This source code is part of
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2009, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
35 #include "types/simple.h"
44 #include "genborn_allvsall.h"
50 int ** exclusion_mask_gb
;
52 gmx_allvsallgb2_data_t
;
55 calc_maxoffset(int i
,int natoms
)
59 if ((natoms
% 2) == 1)
61 /* Odd number of atoms, easy */
64 else if ((natoms
% 4) == 0)
66 /* Multiple of four is hard */
107 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t
* aadata
,
121 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
122 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
123 * whether they should interact or not.
125 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
126 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
127 * we create a jindex array with three elements per i atom: the starting point, the point to
128 * which we need to check exclusions, and the end point.
129 * This way we only have to allocate a short exclusion mask per i atom.
132 /* Allocate memory for jindex arrays */
133 snew(aadata
->jindex_gb
,3*natoms
);
135 /* Pointer to lists with exclusion masks */
136 snew(aadata
->exclusion_mask_gb
,natoms
);
138 for(i
=0;i
<natoms
;i
++)
141 aadata
->jindex_gb
[3*i
] = i
+1;
142 max_offset
= calc_maxoffset(i
,natoms
);
144 /* first check the max range of atoms to EXCLUDE */
148 for(j
=0;j
<ilist
[F_GB12
].nr
;j
+=3)
150 a1
= ilist
[F_GB12
].iatoms
[j
+1];
151 a2
= ilist
[F_GB12
].iatoms
[j
+2];
165 if(k
>0 && k
<=max_offset
)
167 max_excl_offset
= (k
> max_excl_offset
) ? k
: max_excl_offset
;
173 for(j
=0;j
<ilist
[F_GB13
].nr
;j
+=3)
175 a1
= ilist
[F_GB13
].iatoms
[j
+1];
176 a2
= ilist
[F_GB13
].iatoms
[j
+2];
191 if(k
>0 && k
<=max_offset
)
193 max_excl_offset
= (k
> max_excl_offset
) ? k
: max_excl_offset
;
199 for(j
=0;j
<ilist
[F_GB14
].nr
;j
+=3)
201 a1
= ilist
[F_GB14
].iatoms
[j
+1];
202 a2
= ilist
[F_GB14
].iatoms
[j
+2];
217 if(k
>0 && k
<=max_offset
)
219 max_excl_offset
= (k
> max_excl_offset
) ? k
: max_excl_offset
;
223 max_excl_offset
= (max_offset
< max_excl_offset
) ? max_offset
: max_excl_offset
;
225 aadata
->jindex_gb
[3*i
+1] = i
+1+max_excl_offset
;
227 snew(aadata
->exclusion_mask_gb
[i
],max_excl_offset
);
229 /* Include everything by default */
230 for(j
=0;j
<max_excl_offset
;j
++)
232 /* Use all-ones to mark interactions that should be present, compatible with SSE */
233 aadata
->exclusion_mask_gb
[i
][j
] = 0xFFFFFFFF;
235 /* Go through exclusions again */
238 for(j
=0;j
<ilist
[F_GB12
].nr
;j
+=3)
240 a1
= ilist
[F_GB12
].iatoms
[j
+1];
241 a2
= ilist
[F_GB12
].iatoms
[j
+2];
255 if(k
>0 && k
<=max_offset
)
257 aadata
->exclusion_mask_gb
[i
][k
-1] = 0;
263 for(j
=0;j
<ilist
[F_GB13
].nr
;j
+=3)
265 a1
= ilist
[F_GB13
].iatoms
[j
+1];
266 a2
= ilist
[F_GB13
].iatoms
[j
+2];
280 if(k
>0 && k
<=max_offset
)
282 aadata
->exclusion_mask_gb
[i
][k
-1] = 0;
288 for(j
=0;j
<ilist
[F_GB14
].nr
;j
+=3)
290 a1
= ilist
[F_GB14
].iatoms
[j
+1];
291 a2
= ilist
[F_GB14
].iatoms
[j
+2];
305 if(k
>0 && k
<=max_offset
)
307 aadata
->exclusion_mask_gb
[i
][k
-1] = 0;
315 aadata
->jindex_gb
[3*i
+2] = i
+1+max_offset
;
321 genborn_allvsall_setup(gmx_allvsallgb2_data_t
** p_aadata
,
329 gmx_allvsallgb2_data_t
*aadata
;
335 setup_gb_exclusions_and_indices(aadata
,ilist
,natoms
,bInclude12
,bInclude13
,bInclude14
);
341 genborn_allvsall_calc_still_radii(t_forcerec
* fr
,
343 gmx_genborn_t
* born
,
344 gmx_localtop_t
* top
,
349 gmx_allvsallgb2_data_t
*aadata
;
364 real vaj
,ccf
,dccf
,theta
,cosq
;
365 real term
,prod
,icf4
,icf6
,gpi2
,factor
,sinq
;
367 natoms
= mdatoms
->nr
;
368 ni0
= mdatoms
->start
;
369 ni1
= mdatoms
->start
+mdatoms
->homenr
;
370 factor
= 0.5*ONE_4PI_EPS0
;
373 aadata
= *((gmx_allvsallgb2_data_t
**)work
);
377 genborn_allvsall_setup(&aadata
,top
->idef
.il
,mdatoms
->nr
,
379 *((gmx_allvsallgb2_data_t
**)work
) = aadata
;
383 for(i
=0;i
<born
->nr
;i
++)
385 born
->gpol_still_work
[i
] = 0;
389 for(i
=ni0
; i
<ni1
; i
++)
391 /* We assume shifts are NOT used for all-vs-all interactions */
393 /* Load i atom data */
400 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]];
401 vai
= born
->vsolv
[i
];
402 prod_ai
= STILL_P4
*vai
;
404 /* Load limits for loop over neighbors */
405 nj0
= aadata
->jindex_gb
[3*i
];
406 nj1
= aadata
->jindex_gb
[3*i
+1];
407 nj2
= aadata
->jindex_gb
[3*i
+2];
409 mask
= aadata
->exclusion_mask_gb
[i
];
411 /* Prologue part, including exclusion mask */
412 for(j
=nj0
; j
<nj1
; j
++,mask
++)
418 /* load j atom coordinates */
423 /* Calculate distance */
427 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
429 /* Calculate 1/r and 1/r2 */
430 rinv
= gmx_invsqrt(rsq
);
435 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]];
439 ratio
= rsq
/ (rvdw
* rvdw
);
440 vaj
= born
->vsolv
[k
];
443 if(ratio
>STILL_P5INV
)
450 theta
= ratio
*STILL_PIP5
;
452 term
= 0.5*(1.0-cosq
);
454 sinq
= 1.0 - cosq
*cosq
;
455 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
460 icf6
= (4*ccf
-dccf
)*idr6
;
462 born
->gpol_still_work
[k
] += prod_ai
*icf4
;
465 /* Save ai->aj and aj->ai chain rule terms */
466 fr
->dadx
[n
++] = prod
*icf6
;
467 fr
->dadx
[n
++] = prod_ai
*icf6
;
469 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
474 /* Main part, no exclusions */
475 for(j
=nj1
; j
<nj2
; j
++)
479 /* load j atom coordinates */
484 /* Calculate distance */
488 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
490 /* Calculate 1/r and 1/r2 */
491 rinv
= gmx_invsqrt(rsq
);
496 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]];
500 ratio
= rsq
/ (rvdw
* rvdw
);
501 vaj
= born
->vsolv
[k
];
503 if(ratio
>STILL_P5INV
)
510 theta
= ratio
*STILL_PIP5
;
512 term
= 0.5*(1.0-cosq
);
514 sinq
= 1.0 - cosq
*cosq
;
515 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
520 icf6
= (4*ccf
-dccf
)*idr6
;
522 born
->gpol_still_work
[k
] += prod_ai
*icf4
;
525 /* Save ai->aj and aj->ai chain rule terms */
526 fr
->dadx
[n
++] = prod
*icf6
;
527 fr
->dadx
[n
++] = prod_ai
*icf6
;
529 born
->gpol_still_work
[i
]+=gpi
;
532 /* Parallel summations */
535 gmx_sum(natoms
, born
->gpol_still_work
, cr
);
538 /* Calculate the radii */
539 for(i
=0;i
<natoms
;i
++)
541 if(born
->use
[i
] != 0)
543 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
545 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
546 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
556 genborn_allvsall_calc_hct_obc_radii(t_forcerec
* fr
,
558 gmx_genborn_t
* born
,
560 gmx_localtop_t
* top
,
565 gmx_allvsallgb2_data_t
*aadata
;
577 real rai
,doffset
,rai_inv
,rai_inv2
,sk_ai
,sk2_ai
,sum_ai
;
578 real dr
,sk
,lij
,dlij
,lij2
,lij3
,uij2
,uij3
,diff2
,uij
,log_term
;
579 real lij_inv
,sk2
,sk2_rinv
,tmp
,t1
,t2
,t3
,raj_inv
,sum_ai2
,sum_ai3
,tsum
;
584 natoms
= mdatoms
->nr
;
585 ni0
= mdatoms
->start
;
586 ni1
= mdatoms
->start
+mdatoms
->homenr
;
591 doffset
= born
->gb_doffset
;
593 aadata
= *((gmx_allvsallgb2_data_t
**)work
);
597 genborn_allvsall_setup(&aadata
,top
->idef
.il
,mdatoms
->nr
,
599 *((gmx_allvsallgb2_data_t
**)work
) = aadata
;
602 for(i
=0;i
<born
->nr
;i
++)
604 born
->gpol_hct_work
[i
] = 0;
607 for(i
=ni0
; i
<ni1
; i
++)
609 /* We assume shifts are NOT used for all-vs-all interactions */
611 /* Load i atom data */
616 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]]-doffset
;
619 sk_ai
= born
->param
[i
];
620 sk2_ai
= sk_ai
*sk_ai
;
624 /* Load limits for loop over neighbors */
625 nj0
= aadata
->jindex_gb
[3*i
];
626 nj1
= aadata
->jindex_gb
[3*i
+1];
627 nj2
= aadata
->jindex_gb
[3*i
+2];
629 mask
= aadata
->exclusion_mask_gb
[i
];
631 /* Prologue part, including exclusion mask */
632 for(j
=nj0
; j
<nj1
; j
++,mask
++)
638 /* load j atom coordinates */
643 /* Calculate distance */
647 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
649 /* Calculate 1/r and 1/r2 */
650 rinv
= gmx_invsqrt(rsq
);
653 /* sk is precalculated in init_gb() */
655 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]]-doffset
;
657 /* aj -> ai interaction */
679 lij_inv
= gmx_invsqrt(lij2
);
682 prod
= 0.25*sk2_rinv
;
684 log_term
= log(uij
*lij_inv
);
685 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
686 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
690 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
693 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
694 t2
= -0.5*uij2
- prod
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
696 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
698 dadxi
= (dlij
*t1
+t2
+t3
)*rinv
;
707 /* ai -> aj interaction */
710 lij
= 1.0/(dr
-sk_ai
);
723 uij
= 1.0/(dr
+sk_ai
);
729 lij_inv
= gmx_invsqrt(lij2
);
730 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
732 prod
= 0.25 * sk2_rinv
;
734 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
735 log_term
= log(uij
*lij_inv
);
737 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
741 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
744 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
745 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
746 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
748 dadxj
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
750 born
->gpol_hct_work
[k
] += 0.5*tmp
;
756 fr
->dadx
[n
++] = dadxi
;
757 fr
->dadx
[n
++] = dadxj
;
762 /* Main part, no exclusions */
763 for(j
=nj1
; j
<nj2
; j
++)
767 /* load j atom coordinates */
772 /* Calculate distance */
776 rsq
= dx
*dx
+dy
*dy
+dz
*dz
;
778 /* Calculate 1/r and 1/r2 */
779 rinv
= gmx_invsqrt(rsq
);
782 /* sk is precalculated in init_gb() */
784 raj
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[k
]]-doffset
;
786 /* aj -> ai interaction */
806 lij_inv
= gmx_invsqrt(lij2
);
809 prod
= 0.25*sk2_rinv
;
811 log_term
= log(uij
*lij_inv
);
812 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
813 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
817 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
821 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
822 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
823 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
825 dadxi
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
834 /* ai -> aj interaction */
837 lij
= 1.0/(dr
-sk_ai
);
850 uij
= 1.0/(dr
+sk_ai
);
856 lij_inv
= gmx_invsqrt(lij2
);
857 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
859 prod
= 0.25 * sk2_rinv
;
861 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
862 log_term
= log(uij
*lij_inv
);
864 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
868 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
871 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
872 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
873 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
875 dadxj
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
877 born
->gpol_hct_work
[k
] += 0.5*tmp
;
883 fr
->dadx
[n
++] = dadxi
;
884 fr
->dadx
[n
++] = dadxj
;
886 born
->gpol_hct_work
[i
]+=sum_ai
;
889 /* Parallel summations */
892 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
895 if(gb_algorithm
==egbHCT
)
898 for(i
=0;i
<natoms
;i
++)
900 if(born
->use
[i
] != 0)
902 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]]-born
->gb_doffset
;
903 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
904 min_rad
= rai
+ born
->gb_doffset
;
907 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
908 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
916 /* Calculate the radii */
917 for(i
=0;i
<natoms
;i
++)
919 if(born
->use
[i
] != 0)
921 rai
= top
->atomtypes
.gb_radius
[mdatoms
->typeA
[i
]];
925 sum_ai
= rai
* born
->gpol_hct_work
[i
];
926 sum_ai2
= sum_ai
* sum_ai
;
927 sum_ai3
= sum_ai2
* sum_ai
;
929 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
930 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
931 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
933 fr
->invsqrta
[i
]=gmx_invsqrt(born
->bRad
[i
]);
935 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
936 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
948 genborn_allvsall_calc_chainrule(t_forcerec
* fr
,
950 gmx_genborn_t
* born
,
956 gmx_allvsallgb2_data_t
*aadata
;
969 real rbai
,rbaj
,fgb
,fgb_ai
,rbi
;
973 natoms
= mdatoms
->nr
;
974 ni0
= mdatoms
->start
;
975 ni1
= mdatoms
->start
+mdatoms
->homenr
;
978 aadata
= (gmx_allvsallgb2_data_t
*)work
;
983 /* Loop to get the proper form for the Born radius term */
984 if(gb_algorithm
==egbSTILL
)
986 for(i
=0;i
<natoms
;i
++)
989 rb
[i
] = (2 * rbi
* rbi
* fr
->dvda
[i
])/ONE_4PI_EPS0
;
992 else if(gb_algorithm
==egbHCT
)
994 for(i
=0;i
<natoms
;i
++)
997 rb
[i
] = rbi
* rbi
* fr
->dvda
[i
];
1000 else if(gb_algorithm
==egbOBC
)
1002 for(idx
=0;idx
<natoms
;idx
++)
1004 rbi
= born
->bRad
[idx
];
1005 rb
[idx
] = rbi
* rbi
* born
->drobc
[idx
] * fr
->dvda
[idx
];
1009 for(i
=ni0
; i
<ni1
; i
++)
1011 /* We assume shifts are NOT used for all-vs-all interactions */
1013 /* Load i atom data */
1024 /* Load limits for loop over neighbors */
1025 nj0
= aadata
->jindex_gb
[3*i
];
1026 nj1
= aadata
->jindex_gb
[3*i
+1];
1027 nj2
= aadata
->jindex_gb
[3*i
+2];
1029 mask
= aadata
->exclusion_mask_gb
[i
];
1031 /* Prologue part, including exclusion mask */
1032 for(j
=nj0
; j
<nj1
; j
++,mask
++)
1038 /* load j atom coordinates */
1043 /* Calculate distance */
1050 fgb
= rbai
*dadx
[n
++];
1051 fgb_ai
= rbaj
*dadx
[n
++];
1053 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1064 /* Update force on atom aj */
1065 f
[3*k
] = f
[3*k
] - tx
;
1066 f
[3*k
+1] = f
[3*k
+1] - ty
;
1067 f
[3*k
+2] = f
[3*k
+2] - tz
;
1071 /* Main part, no exclusions */
1072 for(j
=nj1
; j
<nj2
; j
++)
1076 /* load j atom coordinates */
1081 /* Calculate distance */
1088 fgb
= rbai
*dadx
[n
++];
1089 fgb_ai
= rbaj
*dadx
[n
++];
1091 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1102 /* Update force on atom aj */
1103 f
[3*k
] = f
[3*k
] - tx
;
1104 f
[3*k
+1] = f
[3*k
+1] - ty
;
1105 f
[3*k
+2] = f
[3*k
+2] - tz
;
1107 /* Update force and shift forces on atom ai */
1108 f
[3*i
] = f
[3*i
] + fix
;
1109 f
[3*i
+1] = f
[3*i
+1] + fiy
;
1110 f
[3*i
+2] = f
[3*i
+2] + fiz
;