Replace our fftpack version with Numpy's version
[gromacs.git] / src / mdlib / genborn_allvsall.c
blob36111bef22132bf74ac91a79085b45a803eecc1c
1 /*
2 * This source code is part of
4 * G R O M A C S
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2009, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
16 * later version.
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
30 #ifdef HAVE_CONFIG_H
31 #include <config.h>
32 #endif
34 #include <math.h>
35 #include "types/simple.h"
37 #include "vec.h"
38 #include "smalloc.h"
40 #include "partdec.h"
41 #include "network.h"
42 #include "physics.h"
43 #include "genborn.h"
44 #include "genborn_allvsall.h"
47 typedef struct
49 int * jindex_gb;
50 int ** exclusion_mask_gb;
52 gmx_allvsallgb2_data_t;
54 static int
55 calc_maxoffset(int i,int natoms)
57 int maxoffset;
59 if ((natoms % 2) == 1)
61 /* Odd number of atoms, easy */
62 maxoffset = natoms/2;
64 else if ((natoms % 4) == 0)
66 /* Multiple of four is hard */
67 if (i < natoms/2)
69 if ((i % 2) == 0)
71 maxoffset=natoms/2;
73 else
75 maxoffset=natoms/2-1;
78 else
80 if ((i % 2) == 1)
82 maxoffset=natoms/2;
84 else
86 maxoffset=natoms/2-1;
90 else
92 /* natoms/2 = odd */
93 if ((i % 2) == 0)
95 maxoffset=natoms/2;
97 else
99 maxoffset=natoms/2-1;
103 return maxoffset;
106 static void
107 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
108 t_ilist * ilist,
109 int natoms,
110 gmx_bool bInclude12,
111 gmx_bool bInclude13,
112 gmx_bool bInclude14)
114 int i,j,k,tp;
115 int a1,a2;
116 int nj0,nj1;
117 int max_offset;
118 int max_excl_offset;
119 int nj;
121 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
122 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
123 * whether they should interact or not.
125 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
126 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
127 * we create a jindex array with three elements per i atom: the starting point, the point to
128 * which we need to check exclusions, and the end point.
129 * This way we only have to allocate a short exclusion mask per i atom.
132 /* Allocate memory for jindex arrays */
133 snew(aadata->jindex_gb,3*natoms);
135 /* Pointer to lists with exclusion masks */
136 snew(aadata->exclusion_mask_gb,natoms);
138 for(i=0;i<natoms;i++)
140 /* Start */
141 aadata->jindex_gb[3*i] = i+1;
142 max_offset = calc_maxoffset(i,natoms);
144 /* first check the max range of atoms to EXCLUDE */
145 max_excl_offset = 0;
146 if(!bInclude12)
148 for(j=0;j<ilist[F_GB12].nr;j+=3)
150 a1 = ilist[F_GB12].iatoms[j+1];
151 a2 = ilist[F_GB12].iatoms[j+2];
153 if(a1==i)
155 k = a2-a1;
157 else if(a2==i)
159 k = a1+natoms-a2;
161 else
163 continue;
165 if(k>0 && k<=max_offset)
167 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
171 if(!bInclude13)
173 for(j=0;j<ilist[F_GB13].nr;j+=3)
175 a1 = ilist[F_GB13].iatoms[j+1];
176 a2 = ilist[F_GB13].iatoms[j+2];
179 if(a1==i)
181 k = a2-a1;
183 else if(a2==i)
185 k = a1+natoms-a2;
187 else
189 continue;
191 if(k>0 && k<=max_offset)
193 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
197 if(!bInclude14)
199 for(j=0;j<ilist[F_GB14].nr;j+=3)
201 a1 = ilist[F_GB14].iatoms[j+1];
202 a2 = ilist[F_GB14].iatoms[j+2];
205 if(a1==i)
207 k = a2-a1;
209 else if(a2==i)
211 k = a1+natoms-a2;
213 else
215 continue;
217 if(k>0 && k<=max_offset)
219 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
223 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
225 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
227 snew(aadata->exclusion_mask_gb[i],max_excl_offset);
229 /* Include everything by default */
230 for(j=0;j<max_excl_offset;j++)
232 /* Use all-ones to mark interactions that should be present, compatible with SSE */
233 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
235 /* Go through exclusions again */
236 if(!bInclude12)
238 for(j=0;j<ilist[F_GB12].nr;j+=3)
240 a1 = ilist[F_GB12].iatoms[j+1];
241 a2 = ilist[F_GB12].iatoms[j+2];
243 if(a1==i)
245 k = a2-a1;
247 else if(a2==i)
249 k = a1+natoms-a2;
251 else
253 continue;
255 if(k>0 && k<=max_offset)
257 aadata->exclusion_mask_gb[i][k-1] = 0;
261 if(!bInclude13)
263 for(j=0;j<ilist[F_GB13].nr;j+=3)
265 a1 = ilist[F_GB13].iatoms[j+1];
266 a2 = ilist[F_GB13].iatoms[j+2];
268 if(a1==i)
270 k = a2-a1;
272 else if(a2==i)
274 k = a1+natoms-a2;
276 else
278 continue;
280 if(k>0 && k<=max_offset)
282 aadata->exclusion_mask_gb[i][k-1] = 0;
286 if(!bInclude14)
288 for(j=0;j<ilist[F_GB14].nr;j+=3)
290 a1 = ilist[F_GB14].iatoms[j+1];
291 a2 = ilist[F_GB14].iatoms[j+2];
293 if(a1==i)
295 k = a2-a1;
297 else if(a2==i)
299 k = a1+natoms-a2;
301 else
303 continue;
305 if(k>0 && k<=max_offset)
307 aadata->exclusion_mask_gb[i][k-1] = 0;
312 /* End */
314 /* End */
315 aadata->jindex_gb[3*i+2] = i+1+max_offset;
320 static void
321 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
322 t_ilist * ilist,
323 int natoms,
324 gmx_bool bInclude12,
325 gmx_bool bInclude13,
326 gmx_bool bInclude14)
328 int i,j,idx;
329 gmx_allvsallgb2_data_t *aadata;
330 real *p;
332 snew(aadata,1);
333 *p_aadata = aadata;
335 setup_gb_exclusions_and_indices(aadata,ilist,natoms,bInclude12,bInclude13,bInclude14);
341 genborn_allvsall_calc_still_radii(t_forcerec * fr,
342 t_mdatoms * mdatoms,
343 gmx_genborn_t * born,
344 gmx_localtop_t * top,
345 real * x,
346 t_commrec * cr,
347 void * work)
349 gmx_allvsallgb2_data_t *aadata;
350 int natoms;
351 int ni0,ni1;
352 int nj0,nj1,nj2;
353 int i,j,k,n;
354 int * mask;
356 real ix,iy,iz;
357 real jx,jy,jz;
358 real dx,dy,dz;
359 real rsq,rinv;
360 real gpi,rai,vai;
361 real prod_ai;
362 real irsq,idr4,idr6;
363 real raj,rvdw,ratio;
364 real vaj,ccf,dccf,theta,cosq;
365 real term,prod,icf4,icf6,gpi2,factor,sinq;
367 natoms = mdatoms->nr;
368 ni0 = mdatoms->start;
369 ni1 = mdatoms->start+mdatoms->homenr;
370 factor = 0.5*ONE_4PI_EPS0;
371 n = 0;
373 aadata = *((gmx_allvsallgb2_data_t **)work);
375 if(aadata==NULL)
377 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
378 FALSE,FALSE,TRUE);
379 *((gmx_allvsallgb2_data_t **)work) = aadata;
383 for(i=0;i<born->nr;i++)
385 born->gpol_still_work[i] = 0;
389 for(i=ni0; i<ni1; i++)
391 /* We assume shifts are NOT used for all-vs-all interactions */
393 /* Load i atom data */
394 ix = x[3*i];
395 iy = x[3*i+1];
396 iz = x[3*i+2];
398 gpi = 0.0;
400 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
401 vai = born->vsolv[i];
402 prod_ai = STILL_P4*vai;
404 /* Load limits for loop over neighbors */
405 nj0 = aadata->jindex_gb[3*i];
406 nj1 = aadata->jindex_gb[3*i+1];
407 nj2 = aadata->jindex_gb[3*i+2];
409 mask = aadata->exclusion_mask_gb[i];
411 /* Prologue part, including exclusion mask */
412 for(j=nj0; j<nj1; j++,mask++)
414 if(*mask!=0)
416 k = j%natoms;
418 /* load j atom coordinates */
419 jx = x[3*k];
420 jy = x[3*k+1];
421 jz = x[3*k+2];
423 /* Calculate distance */
424 dx = ix - jx;
425 dy = iy - jy;
426 dz = iz - jz;
427 rsq = dx*dx+dy*dy+dz*dz;
429 /* Calculate 1/r and 1/r2 */
430 rinv = gmx_invsqrt(rsq);
431 irsq = rinv*rinv;
432 idr4 = irsq*irsq;
433 idr6 = idr4*irsq;
435 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
437 rvdw = rai + raj;
439 ratio = rsq / (rvdw * rvdw);
440 vaj = born->vsolv[k];
443 if(ratio>STILL_P5INV)
445 ccf=1.0;
446 dccf=0.0;
448 else
450 theta = ratio*STILL_PIP5;
451 cosq = cos(theta);
452 term = 0.5*(1.0-cosq);
453 ccf = term*term;
454 sinq = 1.0 - cosq*cosq;
455 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
458 prod = STILL_P4*vaj;
459 icf4 = ccf*idr4;
460 icf6 = (4*ccf-dccf)*idr6;
462 born->gpol_still_work[k] += prod_ai*icf4;
463 gpi = gpi+prod*icf4;
465 /* Save ai->aj and aj->ai chain rule terms */
466 fr->dadx[n++] = prod*icf6;
467 fr->dadx[n++] = prod_ai*icf6;
469 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
474 /* Main part, no exclusions */
475 for(j=nj1; j<nj2; j++)
477 k = j%natoms;
479 /* load j atom coordinates */
480 jx = x[3*k];
481 jy = x[3*k+1];
482 jz = x[3*k+2];
484 /* Calculate distance */
485 dx = ix - jx;
486 dy = iy - jy;
487 dz = iz - jz;
488 rsq = dx*dx+dy*dy+dz*dz;
490 /* Calculate 1/r and 1/r2 */
491 rinv = gmx_invsqrt(rsq);
492 irsq = rinv*rinv;
493 idr4 = irsq*irsq;
494 idr6 = idr4*irsq;
496 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
498 rvdw = rai + raj;
500 ratio = rsq / (rvdw * rvdw);
501 vaj = born->vsolv[k];
503 if(ratio>STILL_P5INV)
505 ccf=1.0;
506 dccf=0.0;
508 else
510 theta = ratio*STILL_PIP5;
511 cosq = cos(theta);
512 term = 0.5*(1.0-cosq);
513 ccf = term*term;
514 sinq = 1.0 - cosq*cosq;
515 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
518 prod = STILL_P4*vaj;
519 icf4 = ccf*idr4;
520 icf6 = (4*ccf-dccf)*idr6;
522 born->gpol_still_work[k] += prod_ai*icf4;
523 gpi = gpi+prod*icf4;
525 /* Save ai->aj and aj->ai chain rule terms */
526 fr->dadx[n++] = prod*icf6;
527 fr->dadx[n++] = prod_ai*icf6;
529 born->gpol_still_work[i]+=gpi;
532 /* Parallel summations */
533 if(PARTDECOMP(cr))
535 gmx_sum(natoms, born->gpol_still_work, cr);
538 /* Calculate the radii */
539 for(i=0;i<natoms;i++)
541 if(born->use[i] != 0)
543 gpi = born->gpol[i]+born->gpol_still_work[i];
544 gpi2 = gpi * gpi;
545 born->bRad[i] = factor*gmx_invsqrt(gpi2);
546 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
550 return 0;
556 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
557 t_mdatoms * mdatoms,
558 gmx_genborn_t * born,
559 int gb_algorithm,
560 gmx_localtop_t * top,
561 real * x,
562 t_commrec * cr,
563 void * work)
565 gmx_allvsallgb2_data_t *aadata;
566 int natoms;
567 int ni0,ni1;
568 int nj0,nj1,nj2;
569 int i,j,k,n;
570 int * mask;
572 real ix,iy,iz;
573 real jx,jy,jz;
574 real dx,dy,dz;
575 real rsq,rinv;
576 real prod,raj;
577 real rai,doffset,rai_inv,rai_inv2,sk_ai,sk2_ai,sum_ai;
578 real dr,sk,lij,dlij,lij2,lij3,uij2,uij3,diff2,uij,log_term;
579 real lij_inv,sk2,sk2_rinv,tmp,t1,t2,t3,raj_inv,sum_ai2,sum_ai3,tsum;
580 real tchain;
581 real dadxi,dadxj;
582 real rad,min_rad;
584 natoms = mdatoms->nr;
585 ni0 = mdatoms->start;
586 ni1 = mdatoms->start+mdatoms->homenr;
588 n = 0;
589 prod = 0;
590 raj = 0;
591 doffset = born->gb_doffset;
593 aadata = *((gmx_allvsallgb2_data_t **)work);
595 if(aadata==NULL)
597 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
598 TRUE,TRUE,TRUE);
599 *((gmx_allvsallgb2_data_t **)work) = aadata;
602 for(i=0;i<born->nr;i++)
604 born->gpol_hct_work[i] = 0;
607 for(i=ni0; i<ni1; i++)
609 /* We assume shifts are NOT used for all-vs-all interactions */
611 /* Load i atom data */
612 ix = x[3*i];
613 iy = x[3*i+1];
614 iz = x[3*i+2];
616 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
617 rai_inv = 1.0/rai;
619 sk_ai = born->param[i];
620 sk2_ai = sk_ai*sk_ai;
622 sum_ai = 0;
624 /* Load limits for loop over neighbors */
625 nj0 = aadata->jindex_gb[3*i];
626 nj1 = aadata->jindex_gb[3*i+1];
627 nj2 = aadata->jindex_gb[3*i+2];
629 mask = aadata->exclusion_mask_gb[i];
631 /* Prologue part, including exclusion mask */
632 for(j=nj0; j<nj1; j++,mask++)
634 if(*mask!=0)
636 k = j%natoms;
638 /* load j atom coordinates */
639 jx = x[3*k];
640 jy = x[3*k+1];
641 jz = x[3*k+2];
643 /* Calculate distance */
644 dx = ix - jx;
645 dy = iy - jy;
646 dz = iz - jz;
647 rsq = dx*dx+dy*dy+dz*dz;
649 /* Calculate 1/r and 1/r2 */
650 rinv = gmx_invsqrt(rsq);
651 dr = rsq*rinv;
653 /* sk is precalculated in init_gb() */
654 sk = born->param[k];
655 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
657 /* aj -> ai interaction */
660 if(rai < dr+sk)
662 lij = 1.0/(dr-sk);
663 dlij = 1.0;
665 if(rai>dr-sk)
667 lij = rai_inv;
668 dlij = 0.0;
671 uij = 1.0/(dr+sk);
672 lij2 = lij * lij;
673 lij3 = lij2 * lij;
674 uij2 = uij * uij;
675 uij3 = uij2 * uij;
677 diff2 = uij2-lij2;
679 lij_inv = gmx_invsqrt(lij2);
680 sk2 = sk*sk;
681 sk2_rinv = sk2*rinv;
682 prod = 0.25*sk2_rinv;
684 log_term = log(uij*lij_inv);
685 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
686 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
688 if(rai < sk-dr)
690 tmp = tmp + 2.0 * (rai_inv-lij);
693 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
694 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
696 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
698 dadxi = (dlij*t1+t2+t3)*rinv;
700 sum_ai += 0.5*tmp;
702 else
704 dadxi = 0.0;
707 /* ai -> aj interaction */
708 if(raj < dr + sk_ai)
710 lij = 1.0/(dr-sk_ai);
711 dlij = 1.0;
712 raj_inv = 1.0/raj;
714 if(raj>dr-sk_ai)
716 lij = raj_inv;
717 dlij = 0.0;
720 lij2 = lij * lij;
721 lij3 = lij2 * lij;
723 uij = 1.0/(dr+sk_ai);
724 uij2 = uij * uij;
725 uij3 = uij2 * uij;
727 diff2 = uij2-lij2;
729 lij_inv = gmx_invsqrt(lij2);
730 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
731 sk2_rinv = sk2*rinv;
732 prod = 0.25 * sk2_rinv;
734 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
735 log_term = log(uij*lij_inv);
737 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
739 if(raj<sk_ai-dr)
741 tmp = tmp + 2.0 * (raj_inv-lij);
744 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
745 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
746 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
748 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
750 born->gpol_hct_work[k] += 0.5*tmp;
752 else
754 dadxj = 0.0;
756 fr->dadx[n++] = dadxi;
757 fr->dadx[n++] = dadxj;
762 /* Main part, no exclusions */
763 for(j=nj1; j<nj2; j++)
765 k = j%natoms;
767 /* load j atom coordinates */
768 jx = x[3*k];
769 jy = x[3*k+1];
770 jz = x[3*k+2];
772 /* Calculate distance */
773 dx = ix - jx;
774 dy = iy - jy;
775 dz = iz - jz;
776 rsq = dx*dx+dy*dy+dz*dz;
778 /* Calculate 1/r and 1/r2 */
779 rinv = gmx_invsqrt(rsq);
780 dr = rsq*rinv;
782 /* sk is precalculated in init_gb() */
783 sk = born->param[k];
784 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
786 /* aj -> ai interaction */
787 if(rai < dr+sk)
789 lij = 1.0/(dr-sk);
790 dlij = 1.0;
792 if(rai>dr-sk)
794 lij = rai_inv;
795 dlij = 0.0;
798 uij = 1.0/(dr+sk);
799 lij2 = lij * lij;
800 lij3 = lij2 * lij;
801 uij2 = uij * uij;
802 uij3 = uij2 * uij;
804 diff2 = uij2-lij2;
806 lij_inv = gmx_invsqrt(lij2);
807 sk2 = sk*sk;
808 sk2_rinv = sk2*rinv;
809 prod = 0.25*sk2_rinv;
811 log_term = log(uij*lij_inv);
812 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
813 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
815 if(rai < sk-dr)
817 tmp = tmp + 2.0 * (rai_inv-lij);
820 /* duij = 1.0; */
821 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
822 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
823 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
825 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
827 sum_ai += 0.5*tmp;
829 else
831 dadxi = 0.0;
834 /* ai -> aj interaction */
835 if(raj < dr + sk_ai)
837 lij = 1.0/(dr-sk_ai);
838 dlij = 1.0;
839 raj_inv = 1.0/raj;
841 if(raj>dr-sk_ai)
843 lij = raj_inv;
844 dlij = 0.0;
847 lij2 = lij * lij;
848 lij3 = lij2 * lij;
850 uij = 1.0/(dr+sk_ai);
851 uij2 = uij * uij;
852 uij3 = uij2 * uij;
854 diff2 = uij2-lij2;
856 lij_inv = gmx_invsqrt(lij2);
857 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
858 sk2_rinv = sk2*rinv;
859 prod = 0.25 * sk2_rinv;
861 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
862 log_term = log(uij*lij_inv);
864 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
866 if(raj<sk_ai-dr)
868 tmp = tmp + 2.0 * (raj_inv-lij);
871 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
872 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
873 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
875 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
877 born->gpol_hct_work[k] += 0.5*tmp;
879 else
881 dadxj = 0.0;
883 fr->dadx[n++] = dadxi;
884 fr->dadx[n++] = dadxj;
886 born->gpol_hct_work[i]+=sum_ai;
889 /* Parallel summations */
890 if(PARTDECOMP(cr))
892 gmx_sum(natoms, born->gpol_hct_work, cr);
895 if(gb_algorithm==egbHCT)
897 /* HCT */
898 for(i=0;i<natoms;i++)
900 if(born->use[i] != 0)
902 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
903 sum_ai = 1.0/rai - born->gpol_hct_work[i];
904 min_rad = rai + born->gb_doffset;
905 rad = 1.0/sum_ai;
907 born->bRad[i] = rad > min_rad ? rad : min_rad;
908 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
913 else
915 /* OBC */
916 /* Calculate the radii */
917 for(i=0;i<natoms;i++)
919 if(born->use[i] != 0)
921 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
922 rai_inv2 = 1.0/rai;
923 rai = rai-doffset;
924 rai_inv = 1.0/rai;
925 sum_ai = rai * born->gpol_hct_work[i];
926 sum_ai2 = sum_ai * sum_ai;
927 sum_ai3 = sum_ai2 * sum_ai;
929 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
930 born->bRad[i] = rai_inv - tsum*rai_inv2;
931 born->bRad[i] = 1.0 / born->bRad[i];
933 fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
935 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
936 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
940 return 0;
948 genborn_allvsall_calc_chainrule(t_forcerec * fr,
949 t_mdatoms * mdatoms,
950 gmx_genborn_t * born,
951 real * x,
952 real * f,
953 int gb_algorithm,
954 void * work)
956 gmx_allvsallgb2_data_t *aadata;
957 int natoms;
958 int ni0,ni1;
959 int nj0,nj1,nj2;
960 int i,j,k,n;
961 int idx;
962 int * mask;
964 real ix,iy,iz;
965 real fix,fiy,fiz;
966 real jx,jy,jz;
967 real dx,dy,dz;
968 real tx,ty,tz;
969 real rbai,rbaj,fgb,fgb_ai,rbi;
970 real * rb;
971 real * dadx;
973 natoms = mdatoms->nr;
974 ni0 = mdatoms->start;
975 ni1 = mdatoms->start+mdatoms->homenr;
976 dadx = fr->dadx;
978 aadata = (gmx_allvsallgb2_data_t *)work;
980 n = 0;
981 rb = born->work;
983 /* Loop to get the proper form for the Born radius term */
984 if(gb_algorithm==egbSTILL)
986 for(i=0;i<natoms;i++)
988 rbi = born->bRad[i];
989 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
992 else if(gb_algorithm==egbHCT)
994 for(i=0;i<natoms;i++)
996 rbi = born->bRad[i];
997 rb[i] = rbi * rbi * fr->dvda[i];
1000 else if(gb_algorithm==egbOBC)
1002 for(idx=0;idx<natoms;idx++)
1004 rbi = born->bRad[idx];
1005 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1009 for(i=ni0; i<ni1; i++)
1011 /* We assume shifts are NOT used for all-vs-all interactions */
1013 /* Load i atom data */
1014 ix = x[3*i];
1015 iy = x[3*i+1];
1016 iz = x[3*i+2];
1018 fix = 0;
1019 fiy = 0;
1020 fiz = 0;
1022 rbai = rb[i];
1024 /* Load limits for loop over neighbors */
1025 nj0 = aadata->jindex_gb[3*i];
1026 nj1 = aadata->jindex_gb[3*i+1];
1027 nj2 = aadata->jindex_gb[3*i+2];
1029 mask = aadata->exclusion_mask_gb[i];
1031 /* Prologue part, including exclusion mask */
1032 for(j=nj0; j<nj1; j++,mask++)
1034 if(*mask!=0)
1036 k = j%natoms;
1038 /* load j atom coordinates */
1039 jx = x[3*k];
1040 jy = x[3*k+1];
1041 jz = x[3*k+2];
1043 /* Calculate distance */
1044 dx = ix - jx;
1045 dy = iy - jy;
1046 dz = iz - jz;
1048 rbaj = rb[k];
1050 fgb = rbai*dadx[n++];
1051 fgb_ai = rbaj*dadx[n++];
1053 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1054 fgb = fgb + fgb_ai;
1056 tx = fgb * dx;
1057 ty = fgb * dy;
1058 tz = fgb * dz;
1060 fix = fix + tx;
1061 fiy = fiy + ty;
1062 fiz = fiz + tz;
1064 /* Update force on atom aj */
1065 f[3*k] = f[3*k] - tx;
1066 f[3*k+1] = f[3*k+1] - ty;
1067 f[3*k+2] = f[3*k+2] - tz;
1071 /* Main part, no exclusions */
1072 for(j=nj1; j<nj2; j++)
1074 k = j%natoms;
1076 /* load j atom coordinates */
1077 jx = x[3*k];
1078 jy = x[3*k+1];
1079 jz = x[3*k+2];
1081 /* Calculate distance */
1082 dx = ix - jx;
1083 dy = iy - jy;
1084 dz = iz - jz;
1086 rbaj = rb[k];
1088 fgb = rbai*dadx[n++];
1089 fgb_ai = rbaj*dadx[n++];
1091 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1092 fgb = fgb + fgb_ai;
1094 tx = fgb * dx;
1095 ty = fgb * dy;
1096 tz = fgb * dz;
1098 fix = fix + tx;
1099 fiy = fiy + ty;
1100 fiz = fiz + tz;
1102 /* Update force on atom aj */
1103 f[3*k] = f[3*k] - tx;
1104 f[3*k+1] = f[3*k+1] - ty;
1105 f[3*k+2] = f[3*k+2] - tz;
1107 /* Update force and shift forces on atom ai */
1108 f[3*i] = f[3*i] + fix;
1109 f[3*i+1] = f[3*i+1] + fiy;
1110 f[3*i+2] = f[3*i+2] + fiz;
1113 return 0;