Generalize IForceProvider
[gromacs.git] / src / gromacs / mdlib / genborn_allvsall.cpp
blob1d96860895bcb9c0fe0e05868ccd7cdeeb8f634a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2009, The GROMACS Development Team.
6 * Copyright (c) 2010,2014,2015,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "genborn_allvsall.h"
41 #include <cmath>
43 #include <algorithm>
45 #include "gromacs/gmxlib/network.h"
46 #include "gromacs/math/functions.h"
47 #include "gromacs/math/units.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/mdlib/genborn.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/mdtypes/mdatom.h"
53 #include "gromacs/topology/topology.h"
54 #include "gromacs/utility/smalloc.h"
57 typedef struct
59 int * jindex_gb;
60 int ** exclusion_mask_gb;
62 gmx_allvsallgb2_data_t;
64 static int
65 calc_maxoffset(int i, int natoms)
67 int maxoffset;
69 if ((natoms % 2) == 1)
71 /* Odd number of atoms, easy */
72 maxoffset = natoms/2;
74 else if ((natoms % 4) == 0)
76 /* Multiple of four is hard */
77 if (i < natoms/2)
79 if ((i % 2) == 0)
81 maxoffset = natoms/2;
83 else
85 maxoffset = natoms/2-1;
88 else
90 if ((i % 2) == 1)
92 maxoffset = natoms/2;
94 else
96 maxoffset = natoms/2-1;
100 else
102 /* natoms/2 = odd */
103 if ((i % 2) == 0)
105 maxoffset = natoms/2;
107 else
109 maxoffset = natoms/2-1;
113 return maxoffset;
116 static void
117 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
118 t_ilist * ilist,
119 int natoms,
120 gmx_bool bInclude12,
121 gmx_bool bInclude13,
122 gmx_bool bInclude14)
124 int i, j, k;
125 int a1, a2;
126 int max_offset;
127 int max_excl_offset;
129 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
130 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
131 * whether they should interact or not.
133 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
134 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
135 * we create a jindex array with three elements per i atom: the starting point, the point to
136 * which we need to check exclusions, and the end point.
137 * This way we only have to allocate a short exclusion mask per i atom.
140 /* Allocate memory for jindex arrays */
141 snew(aadata->jindex_gb, 3*natoms);
143 /* Pointer to lists with exclusion masks */
144 snew(aadata->exclusion_mask_gb, natoms);
146 for (i = 0; i < natoms; i++)
148 /* Start */
149 aadata->jindex_gb[3*i] = i+1;
150 max_offset = calc_maxoffset(i, natoms);
152 /* first check the max range of atoms to EXCLUDE */
153 max_excl_offset = 0;
154 if (!bInclude12)
156 for (j = 0; j < ilist[F_GB12].nr; j += 3)
158 a1 = ilist[F_GB12].iatoms[j+1];
159 a2 = ilist[F_GB12].iatoms[j+2];
161 if (a1 == i)
163 k = a2-a1;
165 else if (a2 == i)
167 k = a1+natoms-a2;
169 else
171 continue;
173 if (k > 0 && k <= max_offset)
175 max_excl_offset = std::max(k, max_excl_offset);
179 if (!bInclude13)
181 for (j = 0; j < ilist[F_GB13].nr; j += 3)
183 a1 = ilist[F_GB13].iatoms[j+1];
184 a2 = ilist[F_GB13].iatoms[j+2];
187 if (a1 == i)
189 k = a2-a1;
191 else if (a2 == i)
193 k = a1+natoms-a2;
195 else
197 continue;
199 if (k > 0 && k <= max_offset)
201 max_excl_offset = std::max(k, max_excl_offset);
205 if (!bInclude14)
207 for (j = 0; j < ilist[F_GB14].nr; j += 3)
209 a1 = ilist[F_GB14].iatoms[j+1];
210 a2 = ilist[F_GB14].iatoms[j+2];
213 if (a1 == i)
215 k = a2-a1;
217 else if (a2 == i)
219 k = a1+natoms-a2;
221 else
223 continue;
225 if (k > 0 && k <= max_offset)
227 max_excl_offset = std::max(k, max_excl_offset);
231 max_excl_offset = std::min(max_offset, max_excl_offset);
233 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
235 snew(aadata->exclusion_mask_gb[i], max_excl_offset);
237 /* Include everything by default */
238 for (j = 0; j < max_excl_offset; j++)
240 /* Use all-ones to mark interactions that should be present, compatible with SSE */
241 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
243 /* Go through exclusions again */
244 if (!bInclude12)
246 for (j = 0; j < ilist[F_GB12].nr; j += 3)
248 a1 = ilist[F_GB12].iatoms[j+1];
249 a2 = ilist[F_GB12].iatoms[j+2];
251 if (a1 == i)
253 k = a2-a1;
255 else if (a2 == i)
257 k = a1+natoms-a2;
259 else
261 continue;
263 if (k > 0 && k <= max_offset)
265 aadata->exclusion_mask_gb[i][k-1] = 0;
269 if (!bInclude13)
271 for (j = 0; j < ilist[F_GB13].nr; j += 3)
273 a1 = ilist[F_GB13].iatoms[j+1];
274 a2 = ilist[F_GB13].iatoms[j+2];
276 if (a1 == i)
278 k = a2-a1;
280 else if (a2 == i)
282 k = a1+natoms-a2;
284 else
286 continue;
288 if (k > 0 && k <= max_offset)
290 aadata->exclusion_mask_gb[i][k-1] = 0;
294 if (!bInclude14)
296 for (j = 0; j < ilist[F_GB14].nr; j += 3)
298 a1 = ilist[F_GB14].iatoms[j+1];
299 a2 = ilist[F_GB14].iatoms[j+2];
301 if (a1 == i)
303 k = a2-a1;
305 else if (a2 == i)
307 k = a1+natoms-a2;
309 else
311 continue;
313 if (k > 0 && k <= max_offset)
315 aadata->exclusion_mask_gb[i][k-1] = 0;
320 /* End */
322 /* End */
323 aadata->jindex_gb[3*i+2] = i+1+max_offset;
328 static void
329 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
330 t_ilist * ilist,
331 int natoms,
332 gmx_bool bInclude12,
333 gmx_bool bInclude13,
334 gmx_bool bInclude14)
336 gmx_allvsallgb2_data_t *aadata;
338 snew(aadata, 1);
339 *p_aadata = aadata;
341 setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
347 genborn_allvsall_calc_still_radii(t_forcerec * fr,
348 t_mdatoms * mdatoms,
349 gmx_genborn_t * born,
350 gmx_localtop_t * top,
351 real * x,
352 void * work)
354 gmx_allvsallgb2_data_t *aadata;
355 int natoms;
356 int ni0, ni1;
357 int nj0, nj1, nj2;
358 int i, j, k, n;
359 int * mask;
361 real ix, iy, iz;
362 real jx, jy, jz;
363 real dx, dy, dz;
364 real rsq, rinv;
365 real gpi, rai, vai;
366 real prod_ai;
367 real irsq, idr4, idr6;
368 real raj, rvdw, ratio;
369 real vaj, ccf, dccf, theta, cosq;
370 real term, prod, icf4, icf6, gpi2, factor, sinq;
372 natoms = mdatoms->nr;
373 ni0 = 0;
374 ni1 = mdatoms->homenr;
375 factor = 0.5*ONE_4PI_EPS0;
376 n = 0;
378 aadata = *((gmx_allvsallgb2_data_t **)work);
380 if (aadata == nullptr)
382 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
383 FALSE, FALSE, TRUE);
384 *((gmx_allvsallgb2_data_t **)work) = aadata;
388 for (i = 0; i < born->nr; i++)
390 born->gpol_still_work[i] = 0;
394 for (i = ni0; i < ni1; i++)
396 /* We assume shifts are NOT used for all-vs-all interactions */
398 /* Load i atom data */
399 ix = x[3*i];
400 iy = x[3*i+1];
401 iz = x[3*i+2];
403 gpi = 0.0;
405 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
406 vai = born->vsolv[i];
407 prod_ai = STILL_P4*vai;
409 /* Load limits for loop over neighbors */
410 nj0 = aadata->jindex_gb[3*i];
411 nj1 = aadata->jindex_gb[3*i+1];
412 nj2 = aadata->jindex_gb[3*i+2];
414 mask = aadata->exclusion_mask_gb[i];
416 /* Prologue part, including exclusion mask */
417 for (j = nj0; j < nj1; j++, mask++)
419 if (*mask != 0)
421 k = j%natoms;
423 /* load j atom coordinates */
424 jx = x[3*k];
425 jy = x[3*k+1];
426 jz = x[3*k+2];
428 /* Calculate distance */
429 dx = ix - jx;
430 dy = iy - jy;
431 dz = iz - jz;
432 rsq = dx*dx+dy*dy+dz*dz;
434 /* Calculate 1/r and 1/r2 */
435 rinv = gmx::invsqrt(rsq);
436 irsq = rinv*rinv;
437 idr4 = irsq*irsq;
438 idr6 = idr4*irsq;
440 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
442 rvdw = rai + raj;
444 ratio = rsq / (rvdw * rvdw);
445 vaj = born->vsolv[k];
448 if (ratio > STILL_P5INV)
450 ccf = 1.0;
451 dccf = 0.0;
453 else
455 theta = ratio*STILL_PIP5;
456 cosq = cos(theta);
457 term = 0.5*(1.0-cosq);
458 ccf = term*term;
459 sinq = 1.0 - cosq*cosq;
460 dccf = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
463 prod = STILL_P4*vaj;
464 icf4 = ccf*idr4;
465 icf6 = (4*ccf-dccf)*idr6;
467 born->gpol_still_work[k] += prod_ai*icf4;
468 gpi = gpi+prod*icf4;
470 /* Save ai->aj and aj->ai chain rule terms */
471 fr->dadx[n++] = prod*icf6;
472 fr->dadx[n++] = prod_ai*icf6;
474 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
479 /* Main part, no exclusions */
480 for (j = nj1; j < nj2; j++)
482 k = j%natoms;
484 /* load j atom coordinates */
485 jx = x[3*k];
486 jy = x[3*k+1];
487 jz = x[3*k+2];
489 /* Calculate distance */
490 dx = ix - jx;
491 dy = iy - jy;
492 dz = iz - jz;
493 rsq = dx*dx+dy*dy+dz*dz;
495 /* Calculate 1/r and 1/r2 */
496 rinv = gmx::invsqrt(rsq);
497 irsq = rinv*rinv;
498 idr4 = irsq*irsq;
499 idr6 = idr4*irsq;
501 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
503 rvdw = rai + raj;
505 ratio = rsq / (rvdw * rvdw);
506 vaj = born->vsolv[k];
508 if (ratio > STILL_P5INV)
510 ccf = 1.0;
511 dccf = 0.0;
513 else
515 theta = ratio*STILL_PIP5;
516 cosq = cos(theta);
517 term = 0.5*(1.0-cosq);
518 ccf = term*term;
519 sinq = 1.0 - cosq*cosq;
520 dccf = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
523 prod = STILL_P4*vaj;
524 icf4 = ccf*idr4;
525 icf6 = (4*ccf-dccf)*idr6;
527 born->gpol_still_work[k] += prod_ai*icf4;
528 gpi = gpi+prod*icf4;
530 /* Save ai->aj and aj->ai chain rule terms */
531 fr->dadx[n++] = prod*icf6;
532 fr->dadx[n++] = prod_ai*icf6;
534 born->gpol_still_work[i] += gpi;
537 /* Parallel summations would go here if ever implemented with DD */
539 /* Calculate the radii */
540 for (i = 0; i < natoms; i++)
542 if (born->use[i] != 0)
544 gpi = born->gpol[i]+born->gpol_still_work[i];
545 gpi2 = gpi * gpi;
546 born->bRad[i] = factor*gmx::invsqrt(gpi2);
547 fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
551 return 0;
557 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
558 t_mdatoms * mdatoms,
559 gmx_genborn_t * born,
560 int gb_algorithm,
561 gmx_localtop_t * top,
562 real * x,
563 void * work)
565 gmx_allvsallgb2_data_t *aadata;
566 int natoms;
567 int ni0, ni1;
568 int nj0, nj1, nj2;
569 int i, j, k, n;
570 int * mask;
572 real ix, iy, iz;
573 real jx, jy, jz;
574 real dx, dy, dz;
575 real rsq, rinv;
576 real prod, raj;
577 real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
578 real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
579 real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
580 real tchain;
581 real dadxi, dadxj;
582 real rad, min_rad;
584 natoms = mdatoms->nr;
585 ni0 = 0;
586 ni1 = mdatoms->homenr;
588 n = 0;
589 doffset = born->gb_doffset;
591 aadata = *((gmx_allvsallgb2_data_t **)work);
593 if (aadata == nullptr)
595 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
596 TRUE, TRUE, TRUE);
597 *((gmx_allvsallgb2_data_t **)work) = aadata;
600 for (i = 0; i < born->nr; i++)
602 born->gpol_hct_work[i] = 0;
605 for (i = ni0; i < ni1; i++)
607 /* We assume shifts are NOT used for all-vs-all interactions */
609 /* Load i atom data */
610 ix = x[3*i];
611 iy = x[3*i+1];
612 iz = x[3*i+2];
614 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
615 rai_inv = 1.0/rai;
617 sk_ai = born->param[i];
618 sk2_ai = sk_ai*sk_ai;
620 sum_ai = 0;
622 /* Load limits for loop over neighbors */
623 nj0 = aadata->jindex_gb[3*i];
624 nj1 = aadata->jindex_gb[3*i+1];
625 nj2 = aadata->jindex_gb[3*i+2];
627 mask = aadata->exclusion_mask_gb[i];
629 /* Prologue part, including exclusion mask */
630 for (j = nj0; j < nj1; j++, mask++)
632 if (*mask != 0)
634 k = j%natoms;
636 /* load j atom coordinates */
637 jx = x[3*k];
638 jy = x[3*k+1];
639 jz = x[3*k+2];
641 /* Calculate distance */
642 dx = ix - jx;
643 dy = iy - jy;
644 dz = iz - jz;
645 rsq = dx*dx+dy*dy+dz*dz;
647 /* Calculate 1/r and 1/r2 */
648 rinv = gmx::invsqrt(rsq);
649 dr = rsq*rinv;
651 /* sk is precalculated in init_gb() */
652 sk = born->param[k];
653 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
655 /* aj -> ai interaction */
658 if (rai < dr+sk)
660 lij = 1.0/(dr-sk);
661 dlij = 1.0;
663 if (rai > dr-sk)
665 lij = rai_inv;
666 dlij = 0.0;
669 uij = 1.0/(dr+sk);
670 lij2 = lij * lij;
671 lij3 = lij2 * lij;
672 uij2 = uij * uij;
673 uij3 = uij2 * uij;
675 diff2 = uij2-lij2;
677 lij_inv = gmx::invsqrt(lij2);
678 sk2 = sk*sk;
679 sk2_rinv = sk2*rinv;
680 prod = 0.25*sk2_rinv;
682 log_term = std::log(uij*lij_inv);
683 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
684 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
686 if (rai < sk-dr)
688 tmp = tmp + 2.0 * (rai_inv-lij);
691 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
692 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
694 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
696 dadxi = (dlij*t1+t2+t3)*rinv;
698 sum_ai += 0.5*tmp;
700 else
702 dadxi = 0.0;
705 /* ai -> aj interaction */
706 if (raj < dr + sk_ai)
708 lij = 1.0/(dr-sk_ai);
709 dlij = 1.0;
710 raj_inv = 1.0/raj;
712 if (raj > dr-sk_ai)
714 lij = raj_inv;
715 dlij = 0.0;
718 lij2 = lij * lij;
719 lij3 = lij2 * lij;
721 uij = 1.0/(dr+sk_ai);
722 uij2 = uij * uij;
723 uij3 = uij2 * uij;
725 diff2 = uij2-lij2;
727 lij_inv = gmx::invsqrt(lij2);
728 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
729 sk2_rinv = sk2*rinv;
730 prod = 0.25 * sk2_rinv;
732 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
733 log_term = std::log(uij*lij_inv);
735 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
737 if (raj < sk_ai-dr)
739 tmp = tmp + 2.0 * (raj_inv-lij);
742 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
743 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
744 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
746 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
748 born->gpol_hct_work[k] += 0.5*tmp;
750 else
752 dadxj = 0.0;
754 fr->dadx[n++] = dadxi;
755 fr->dadx[n++] = dadxj;
760 /* Main part, no exclusions */
761 for (j = nj1; j < nj2; j++)
763 k = j%natoms;
765 /* load j atom coordinates */
766 jx = x[3*k];
767 jy = x[3*k+1];
768 jz = x[3*k+2];
770 /* Calculate distance */
771 dx = ix - jx;
772 dy = iy - jy;
773 dz = iz - jz;
774 rsq = dx*dx+dy*dy+dz*dz;
776 /* Calculate 1/r and 1/r2 */
777 rinv = gmx::invsqrt(rsq);
778 dr = rsq*rinv;
780 /* sk is precalculated in init_gb() */
781 sk = born->param[k];
782 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
784 /* aj -> ai interaction */
785 if (rai < dr+sk)
787 lij = 1.0/(dr-sk);
788 dlij = 1.0;
790 if (rai > dr-sk)
792 lij = rai_inv;
793 dlij = 0.0;
796 uij = 1.0/(dr+sk);
797 lij2 = lij * lij;
798 lij3 = lij2 * lij;
799 uij2 = uij * uij;
800 uij3 = uij2 * uij;
802 diff2 = uij2-lij2;
804 lij_inv = gmx::invsqrt(lij2);
805 sk2 = sk*sk;
806 sk2_rinv = sk2*rinv;
807 prod = 0.25*sk2_rinv;
809 log_term = std::log(uij*lij_inv);
810 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
811 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
813 if (rai < sk-dr)
815 tmp = tmp + 2.0 * (rai_inv-lij);
818 /* duij = 1.0; */
819 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
820 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
821 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
823 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
825 sum_ai += 0.5*tmp;
827 else
829 dadxi = 0.0;
832 /* ai -> aj interaction */
833 if (raj < dr + sk_ai)
835 lij = 1.0/(dr-sk_ai);
836 dlij = 1.0;
837 raj_inv = 1.0/raj;
839 if (raj > dr-sk_ai)
841 lij = raj_inv;
842 dlij = 0.0;
845 lij2 = lij * lij;
846 lij3 = lij2 * lij;
848 uij = 1.0/(dr+sk_ai);
849 uij2 = uij * uij;
850 uij3 = uij2 * uij;
852 diff2 = uij2-lij2;
854 lij_inv = gmx::invsqrt(lij2);
855 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
856 sk2_rinv = sk2*rinv;
857 prod = 0.25 * sk2_rinv;
859 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
860 log_term = std::log(uij*lij_inv);
862 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
864 if (raj < sk_ai-dr)
866 tmp = tmp + 2.0 * (raj_inv-lij);
869 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
870 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
871 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
873 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
875 born->gpol_hct_work[k] += 0.5*tmp;
877 else
879 dadxj = 0.0;
881 fr->dadx[n++] = dadxi;
882 fr->dadx[n++] = dadxj;
884 born->gpol_hct_work[i] += sum_ai;
887 /* Parallel summations would go here if ever implemented with DD */
889 if (gb_algorithm == egbHCT)
891 /* HCT */
892 for (i = 0; i < natoms; i++)
894 if (born->use[i] != 0)
896 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
897 sum_ai = 1.0/rai - born->gpol_hct_work[i];
898 min_rad = rai + born->gb_doffset;
899 rad = 1.0/sum_ai;
901 born->bRad[i] = std::max(rad, min_rad);
902 fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
907 else
909 /* OBC */
910 /* Calculate the radii */
911 for (i = 0; i < natoms; i++)
913 if (born->use[i] != 0)
915 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
916 rai_inv2 = 1.0/rai;
917 rai = rai-doffset;
918 rai_inv = 1.0/rai;
919 sum_ai = rai * born->gpol_hct_work[i];
920 sum_ai2 = sum_ai * sum_ai;
921 sum_ai3 = sum_ai2 * sum_ai;
923 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
924 born->bRad[i] = rai_inv - tsum*rai_inv2;
925 born->bRad[i] = 1.0 / born->bRad[i];
927 fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
929 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
930 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
934 return 0;
942 genborn_allvsall_calc_chainrule(t_forcerec * fr,
943 t_mdatoms * mdatoms,
944 gmx_genborn_t * born,
945 real * x,
946 real * f,
947 int gb_algorithm,
948 void * work)
950 gmx_allvsallgb2_data_t *aadata;
951 int natoms;
952 int ni0, ni1;
953 int nj0, nj1, nj2;
954 int i, j, k, n;
955 int idx;
956 int * mask;
958 real ix, iy, iz;
959 real fix, fiy, fiz;
960 real jx, jy, jz;
961 real dx, dy, dz;
962 real tx, ty, tz;
963 real rbai, rbaj, fgb, fgb_ai, rbi;
964 real * rb;
965 real * dadx;
967 natoms = mdatoms->nr;
968 ni0 = 0;
969 ni1 = mdatoms->homenr;
970 dadx = fr->dadx;
972 aadata = (gmx_allvsallgb2_data_t *)work;
974 n = 0;
975 rb = born->work;
977 /* Loop to get the proper form for the Born radius term */
978 if (gb_algorithm == egbSTILL)
980 for (i = 0; i < natoms; i++)
982 rbi = born->bRad[i];
983 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
986 else if (gb_algorithm == egbHCT)
988 for (i = 0; i < natoms; i++)
990 rbi = born->bRad[i];
991 rb[i] = rbi * rbi * fr->dvda[i];
994 else if (gb_algorithm == egbOBC)
996 for (idx = 0; idx < natoms; idx++)
998 rbi = born->bRad[idx];
999 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1003 for (i = ni0; i < ni1; i++)
1005 /* We assume shifts are NOT used for all-vs-all interactions */
1007 /* Load i atom data */
1008 ix = x[3*i];
1009 iy = x[3*i+1];
1010 iz = x[3*i+2];
1012 fix = 0;
1013 fiy = 0;
1014 fiz = 0;
1016 rbai = rb[i];
1018 /* Load limits for loop over neighbors */
1019 nj0 = aadata->jindex_gb[3*i];
1020 nj1 = aadata->jindex_gb[3*i+1];
1021 nj2 = aadata->jindex_gb[3*i+2];
1023 mask = aadata->exclusion_mask_gb[i];
1025 /* Prologue part, including exclusion mask */
1026 for (j = nj0; j < nj1; j++, mask++)
1028 if (*mask != 0)
1030 k = j%natoms;
1032 /* load j atom coordinates */
1033 jx = x[3*k];
1034 jy = x[3*k+1];
1035 jz = x[3*k+2];
1037 /* Calculate distance */
1038 dx = ix - jx;
1039 dy = iy - jy;
1040 dz = iz - jz;
1042 rbaj = rb[k];
1044 fgb = rbai*dadx[n++];
1045 fgb_ai = rbaj*dadx[n++];
1047 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1048 fgb = fgb + fgb_ai;
1050 tx = fgb * dx;
1051 ty = fgb * dy;
1052 tz = fgb * dz;
1054 fix = fix + tx;
1055 fiy = fiy + ty;
1056 fiz = fiz + tz;
1058 /* Update force on atom aj */
1059 f[3*k] = f[3*k] - tx;
1060 f[3*k+1] = f[3*k+1] - ty;
1061 f[3*k+2] = f[3*k+2] - tz;
1065 /* Main part, no exclusions */
1066 for (j = nj1; j < nj2; j++)
1068 k = j%natoms;
1070 /* load j atom coordinates */
1071 jx = x[3*k];
1072 jy = x[3*k+1];
1073 jz = x[3*k+2];
1075 /* Calculate distance */
1076 dx = ix - jx;
1077 dy = iy - jy;
1078 dz = iz - jz;
1080 rbaj = rb[k];
1082 fgb = rbai*dadx[n++];
1083 fgb_ai = rbaj*dadx[n++];
1085 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1086 fgb = fgb + fgb_ai;
1088 tx = fgb * dx;
1089 ty = fgb * dy;
1090 tz = fgb * dz;
1092 fix = fix + tx;
1093 fiy = fiy + ty;
1094 fiz = fiz + tz;
1096 /* Update force on atom aj */
1097 f[3*k] = f[3*k] - tx;
1098 f[3*k+1] = f[3*k+1] - ty;
1099 f[3*k+2] = f[3*k+2] - tz;
1101 /* Update force and shift forces on atom ai */
1102 f[3*i] = f[3*i] + fix;
1103 f[3*i+1] = f[3*i+1] + fiy;
1104 f[3*i+2] = f[3*i+2] + fiz;
1107 return 0;