src/gmxlib/mkinl_interactions.c

   1 /*
   2  * $Id$
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 /* This file is NOT threadsafe, but it is only used to create
  37  * the innerloops during the build process, so it will never be
  38  * executed by multiple threads.
  39  */
  40
  41 #include "mkinl.h"
  42 #include <string.h>
  43
  44 char forcebuf[255];
  45 char tabforcebuf[255];
  46
  47
  48 int calc_scalar_force(int i,int j)
  49 {
  50   int ij = 10*i+j;
  51   int nflop = 0;
  52
  53   /* calculate scalar force */
  54   if(!DO_SOFTCORE) {
  55     /* For softcore loops fs is calculated directly in the routine,
  56      * due to the special construction with rinva and rivb. We also
  57      * fix the extra flop added for each buffer. */
  58     comment("Calculate the scalar force");
  59     if(strlen(tabforcebuf) && strlen(forcebuf)) {
  60       /* both table and non-table interactions */
  61       assign("fs%d","((%s)*rinv%d-(%s))*rinv%d",ij,forcebuf,ij,tabforcebuf,ij);
  62       nflop ++; /* 3-2 flops */
  63     }
  64     else if(strlen(tabforcebuf))
  65       /* only table interactions */
  66       assign("fs%d","-(%s)*rinv%d",ij,tabforcebuf,ij); /* 1-1 flops */
  67     else if(strlen(forcebuf))
  68       /* only non-table interactions */
  69       assign("fs%d","(%s)*rinvsq%d",ij,forcebuf,ij); /* 1-1 flops */
  70   }
  71   return nflop;
  72 }
  73
  74
  75 int update_inner_forces(int i,int j)
  76 {
  77   int ij = 10*i+j;
  78   int nflop = 0;
  79   int m,jidxnr,offset;
  80   char src[25],dest[25];
  81
  82
  83   comment("Convert scalar force to cartesian coords");
  84   if(DO_VECTORIZE) {
  85     assign("tx%d","%s*fs%d",ij,ARRAY(drbuf,m3),ij);
  86     increment("m3","1");
  87     assign("ty%d","%s*fs%d",ij,ARRAY(drbuf,m3),ij);
  88     increment("m3","1");
  89     assign("tz%d","%s*fs%d",ij,ARRAY(drbuf,m3),ij);
  90     increment("m3","1");
  91   } else {
  92     assign("tx%d","dx%d*fs%d",ij,ij,ij);
  93     assign("ty%d","dy%d*fs%d",ij,ij,ij);
  94     assign("tz%d","dz%d*fs%d",ij,ij,ij);
  95   }
  96
  97   /* i forces */
  98   increment("fix%d","tx%d",i,ij);
  99   increment("fiy%d","ty%d",i,ij);
 100   increment("fiz%d","tz%d",i,ij);
 101   nflop += 6;
 102
 103   /* and finally, j forces */
 104   for(m=0;m<DIM;m++) {
 105
 106     offset = 3*(j-1)+m;
 107
 108     /* determine source array or variable */
 109     if(!DO_PREFETCH && i==1) /* source is faction or fbuf */
 110       sprintf(src, arch.vectorcpu ? _array("fbuf","kk+%d",offset) : _array("faction","j3+%d",offset));
 111     else  /* source is fj */
 112       sprintf(src,"fj%c%d",m+'x',j);
 113
 114     /* determine destination array or variable */
 115     if(i==loop.ni) /* dest is faction or fbuf */
 116       sprintf(dest, _array("faction", "j3+%d",offset));
 117     else  /* dest is fj */
 118       sprintf(dest,"fj%c%d",m+'x',j);
 119
 120     assign(dest,"%s-t%c%d%d",src,m+'x',i,j); /* dest=src-tx */
 121     nflop++;
 122   }
 123   return nflop;
 124 }
 125
 126
 127 int table_index(char *rin)
 128 {
 129     assign("rt",rin);
 130 #if (defined __GNUC__ && (defined i386 || defined __386__) && !defined DOUBLE && defined USE_X86TRUNC)
 131     if(bC)
 132       code("x86trunc(rt,n0);");
 133     else
 134 #endif
 135       assign("n0","rt"); /* also used for g77 */
 136
 137     assign("eps","rt-n0");
 138     assign("eps2","eps*eps");
 139     return 2;
 140 }
 141
 142
 143 int extract_table(char *pos)
 144 {
 145     char buf[25];
 146     int nflop=0;
 147
 148     assign("nnn",pos);
 149     assign("Y",ARRAY(VFtab,nnn));
 150     assign("F",ARRAY(VFtab,nnn+1));
 151     sprintf(buf,"eps*%s",ARRAY(VFtab,nnn+2));
 152     assign("Geps",buf);
 153     sprintf(buf,"eps2*%s",ARRAY(VFtab,nnn+3));
 154     assign("Heps2",buf);
 155     assign("Fp","F+Geps+Heps2");
 156     assign("VV","Y+eps*Fp");
 157
 158     nflop = 6;
 159
 160     if(DO_FORCE || DO_SOFTCORE) {
 161       assign("FF","Fp+Geps+two*Heps2");
 162       nflop += 3;
 163     }
 164     return nflop;
 165 }
 166
 167
 168
 169 static int assign_chargeproduct(int i,int j,char *qq)
 170 {
 171   /* Assign and return a string with the name of
 172    * the variable containing the i charge * j charge. */
 173   int nflop = 0;
 174
 175   if(loop.free==FREE_LAMBDA) {
 176     assign("qqA","iqA*%s",ARRAY(charge,jnr));
 177     assign("qqB","iqB*%s",ARRAY(chargeB,jnr));
 178     sprintf(qq,"qqq");
 179     nflop+=2;
 180   } else if(loop.free==FREE_SOFTCORE) {
 181     sprintf(qq,"iq*jq");
 182     nflop++;
 183   } else {
 184     switch(loop.sol) {
 185     case SOL_WATER:
 186       assign("qq","%s*%s", arch.simplewater ? "iqA" : ((i==1) ? "qO" : "qH"),ARRAY(charge,jnr));
 187       nflop += 1;
 188       sprintf(qq,"qq");
 189       break;
 190     case SOL_WATERWATER:
 191       if(arch.simplewater) {
 192           sprintf(qq,"iqA*%s", (j==1) ? "qO" : "qH");
 193       } else {
 194       if(i==1 && j==1)
 195         sprintf(qq,"qqOO");
 196       else if(i==1 || j==1)
 197         sprintf(qq,"qqOH");
 198       else
 199         sprintf(qq,"qqHH");
 200       }
 201       break;
 202     default:
 203       assign("qq","iqA*%s",ARRAY(charge,jnr));
 204       nflop += 1;
 205       sprintf(qq,"qq");
 206     }
 207   }
 208   return nflop;
 209 }
 210
 211
 212 int do_softcore(int i, int j)
 213 {
 214     int nflop = 0;
 215     char fabuf[50],fbbuf[50],dvdlbuf[100];
 216     char ifbuf[100];
 217     int ij=10*i+j;
 218
 219     fabuf[0]=fbbuf[0]=dvdlbuf[0]=0;
 220
 221     comment("Tabulated Softcore free energy interactions");
 222
 223     assign("tjA","ntiA+%d*%s%s", N_VDWPARAM, ARRAY(type,jnr) , bC ? "" : "+1");
 224     assign("tjB","ntiB+%d*%s%s", N_VDWPARAM, ARRAY(typeB,jnr) , bC ? "" : "+1");
 225     assign("c6a",ARRAY(nbfp,tjA));
 226     assign("c6b",ARRAY(nbfp,tjB));
 227     assign( DO_BHAM ? "cexp1a" : "c12a",ARRAY(nbfp,tjA+1));
 228     assign( DO_BHAM ? "cexp1b" : "c12b",ARRAY(nbfp,tjB+1));
 229     if(DO_BHAM) {
 230       assign("cexp2a",ARRAY(nbfp,tjA+2));
 231       assign("cexp2b",ARRAY(nbfp,tjB+2));
 232       assign("sigma6a","defsigma6");
 233       assign("sigma6b","defsigma6");
 234     }
 235
 236     if(!DO_BHAM) {
 237       start_if( bC ? "(c6a > 0) && (c12a > 0)" :
 238                 "(c6a.gt.0).and.(c12a.gt.0)");
 239       /* check if we can compute sigma_a from c6/c12 */
 240       assign("sigma6a","c12a/c6a");
 241       do_else(); /* use default value for sigma_a */
 242     }
 243     assign("sigma6a","defsigma6");
 244     if(!DO_BHAM) {
 245       end_if(); /* end of check for sigma_a */
 246
 247       start_if( bC ? "(c6b > 0) && (c12b > 0)" :
 248                 "(c6b.gt.0).and.(c12b.gt.0)");
 249       /* corresponding check for sigma_b */
 250       assign("sigma6b","c12b/c6b");
 251       do_else();
 252     }
 253     assign("sigma6b","defsigma6");
 254     if(!DO_BHAM)
 255       end_if(); /* end of sigma_b check */
 256
 257     assign("rfour","rsq%d*rsq%d",ij,ij);
 258     assign("rsix","rfour*rsq%d",ij);
 259
 260     ifbuf[0]=0;
 261
 262     if(DO_COULTAB) {
 263       assign("qqA","iqA*%s",ARRAY(charge,jnr));
 264       sprintf(ifbuf, bC ? "(qqA != 0)" : "(qqA.ne.0)");
 265     }
 266     if(DO_VDWTAB) {
 267       if(strlen(ifbuf)>0)
 268         strcat(ifbuf, bC ? " || " : ".or.");
 269       strcat(ifbuf, bC ? "(c6a > 0) || " : "(c6a.gt.0).or.");
 270       if(DO_BHAM)
 271         strcat(ifbuf, bC ? "(cexp1a > 0)" : "(cexp1a.gt.0)");
 272       else
 273         strcat(ifbuf, bC ? "(c12a > 0)" : "(c12a.gt.0)");
 274     }
 275
 276     start_if(ifbuf);
 277     /* do state A */
 278     assign("rA", bC ? "pow(Alpha*sigma6a*lam2+rsix,onesixth)" :
 279            "cpow(Alpha*sigma6a*lam2+rsix,onesixth)");
 280     assign("rinva","1.0/rA");
 281     assign("rinv5a","rinva*rinva");
 282     assign("rinv5a","rinv5a*rinv5a*rinva");
 283
 284     /* Do the table lookup on r_A */
 285     comment("Lookup on rA");
 286     nflop += 22;
 287     nflop += table_index("rA*tabscale");
 288     assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 289
 290     if(DO_COULTAB) {
 291       comment("Coulomb table");
 292       nflop += extract_table("n1");
 293       assign("VVCa","qqA*VV");
 294       assign("FFCa","qqA*tabscale*FF");
 295       nflop += 4;
 296     }
 297     if(DO_VDWTAB) {
 298       comment("Dispersion");
 299       nflop += extract_table( DO_COULTAB ? "n1+4" :"n1");
 300       assign("VVDa","c6a*VV");
 301       assign("FFDa","c6a*tabscale*FF");
 302       nflop += 3;
 303       if(DO_BHAM) {
 304         comment("Buckingham repulsion");
 305         /* Make a new lookup index for the exponential table */
 306         nflop += table_index("cexp2a*rA*tabscale");
 307         assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 308         extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 309         assign("VVRa","cexp1a*VV");
 310         assign("FFRa","cexp1a*cexp2a*exptabscale*FF");
 311         nflop += 6;
 312       } else {
 313         comment("Repulsion");
 314         extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 315         assign("VVRa","c12a*VV");
 316         assign("FFRa","c12a*tabscale*FF");
 317         nflop += 3;
 318       }
 319     }
 320     do_else(); /* If all A interaction parameters are 0 */
 321     if(DO_COULTAB) {
 322       assign("VVCa","0");
 323       assign("FFCa","0");
 324     }
 325     if(DO_VDWTAB) {
 326       assign("VVDa","0");
 327       assign("FFDa","0");
 328       assign("VVRa","0");
 329       assign("FFRa","0");
 330     }
 331     assign("rinv5a","0");
 332     end_if(); /* finished A */
 333
 334     ifbuf[0]=0;
 335
 336     /* now do B */
 337     if(DO_COULTAB) {
 338       assign("qqB","iqB*%s",ARRAY(chargeB,jnr));
 339       sprintf(ifbuf, bC ? "(qqB != 0)" : "(qqB.ne.0)");
 340     }
 341     if(DO_VDWTAB) {
 342       if(strlen(ifbuf)>0)
 343         strcat(ifbuf, bC ? " || " : ".or.");
 344       strcat(ifbuf, bC ? "(c6b > 0) || " : "(c6b.gt.0).or.");
 345       if(DO_BHAM)
 346         strcat(ifbuf, bC ? "(cexp1b > 0)" : "(cexp1b.gt.0)");
 347       else
 348         strcat(ifbuf, bC ? "(c12b > 0)" : "(c12b.gt.0)");
 349     }
 350
 351     start_if(ifbuf);
 352
 353     assign("rB", bC ? "pow(Alpha*sigma6b*L12+rsix,onesixth)" :
 354            "cpow(Alpha*sigma6b*L12+rsix,onesixth)");
 355     assign("rinvb","1.0/rB");
 356     assign("rinv5b","rinvb*rinvb");
 357     assign("rinv5b","rinv5b*rinv5b*rinvb");
 358     /* do the B table */
 359     comment("Lookup on rB");
 360     nflop += 1 + table_index("rB*tabscale");
 361
 362     assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 363     if(DO_COULTAB) {
 364       comment("Coulomb table");
 365       nflop += extract_table("n1");
 366       assign("VVCb","qqB*VV");
 367       assign("FFCb","qqB*tabscale*FF");
 368       nflop += 4;
 369     }
 370     if(DO_VDWTAB) {
 371       comment("Dispersion");
 372       nflop += extract_table( DO_COULTAB ? "n1+4" : "n1");
 373       assign("VVDb","c6b*VV");
 374       assign("FFDb","c6b*tabscale*FF");
 375       nflop += 3;
 376       if(DO_BHAM) {
 377         comment("Buckingham repulsion");
 378         /* Make a new lookup index for the exponential table */
 379         nflop += 2 + table_index("cexp2b*rB*tabscale");
 380         assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 381         nflop += extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 382         assign("VVRb","cexp1b*VV");
 383         assign("FFRb","cexp1b*cexp2b*exptabscale*FF");
 384         nflop += 4;
 385       } else {
 386         comment("Repulsion");
 387         nflop += extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 388         assign("VVRb","c12b*VV");
 389         assign("FFRb","c12b*tabscale*FF");
 390         nflop += 3;
 391       }
 392     }
 393     do_else(); /* If all B interaction parameters are 0 */
 394     if(DO_COULTAB) {
 395       assign("VVCb","0");
 396       assign("FFCb","0");
 397     }
 398     if(DO_VDWTAB) {
 399       assign("VVDb","0");
 400       assign("FFDb","0");
 401       assign("VVRb","0");
 402       assign("FFRb","0");
 403     }
 404     assign("rinv5b","0");
 405     end_if(); /* finished B */
 406
 407     /* OK. Now we have all potential and force lookup values,
 408      * all that is left is to calculate the actual force, potential
 409      * and dv/dlambda.
 410      */
 411     if(DO_COULTAB) {
 412       add_to_buffer(fabuf,"FFCa");
 413       add_to_buffer(fbbuf,"FFCb");
 414       assign("vcoul","lambda*VVCb+L1*VVCa");
 415       add_to_buffer(dvdlbuf,"VVCb-VVCa");
 416       nflop += 6;
 417     }
 418     if(DO_VDWTAB) {
 419       increment("vnbtot","lambda*(VVDb+VVRb)+L1*(VVDa+VVRa)");
 420       add_to_buffer(dvdlbuf,"VVDb+VVRb-VVDa-VVRa");
 421       add_to_buffer(fabuf,"FFDa+FFRa");
 422       add_to_buffer(fbbuf,"FFDb+FFRb");
 423       nflop += 13;
 424     }
 425     assign("Fa","-(%s)",fabuf);
 426     assign("Fb","-(%s)",fbbuf);
 427     assign("fs%d","(L1*Fa*rinv5a + lambda*Fb*rinv5b)*rfour",ij);
 428     add_to_buffer(dvdlbuf,"onethird*Alpha*lambda*L1*(Fb*sigma6b*rinv5b-Fa*sigma6a*rinv5a)");
 429     increment("dvdl",dvdlbuf);
 430     nflop += 16;
 431     return nflop;
 432 }
 433
 434
 435
 436 int do_table(int i,int j)
 437 {
 438   int ij = 10*i+j;
 439   char qq[100],buf[100];
 440   char f1buf[100],f2buf[100];
 441   int nflop = 0;
 442
 443   f1buf[0]=f2buf[0]=0;
 444
 445   comment("Tabulated interactions");
 446   sprintf(buf,"r%d*tabscale",ij);
 447   nflop += table_index(buf) + 1;
 448
 449   assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 450
 451   if(DO_COULTAB) {
 452     /* determine the name of and set the charge parameter q_i*q_j */
 453     nflop += assign_chargeproduct(i,j,qq);
 454     if(loop.free) {
 455       assign("qqq","L1*qqA + lambda*qqB");
 456       nflop += 3;
 457     }
 458     nflop += extract_table("n1");
 459     assign("vcoul","%s*VV",qq);
 460     nflop ++;
 461     if(DO_FORCE) {
 462       assign("fijC","%s*FF",qq);
 463       nflop ++;
 464     }
 465     if(loop.free) {
 466       increment("dvdl","(qqB - qqA)*VV",j,j);
 467       nflop += 7;
 468     }
 469     if(DO_FORCE) {
 470       add_to_buffer(f1buf,"fijC");
 471       nflop ++;
 472     }
 473   }
 474   /* only do VDW for the first atom in water interactions */
 475   if(DO_VDWTAB &&
 476      (!DO_WATER || (loop.sol==SOL_WATER && i==1) || (i==1 && j==1))) {
 477     /* first determine nonbonded parameters */
 478     if(loop.free) {
 479       assign("tjA","ntiA+%d*%s%s", N_VDWPARAM, ARRAY(type,jnr) , bC ? "" : "+1");
 480       assign("tjB","ntiB+%d*%s%s", N_VDWPARAM, ARRAY(typeB,jnr) , bC ? "" : "+1");
 481       assign("c6a",ARRAY(nbfp,tjA));
 482       assign("c6b",ARRAY(nbfp,tjB));
 483       assign("c6","L1*c6a + lambda*c6b");
 484       nflop += 3;
 485       assign( DO_BHAM ? "cexp1a" : "c12a" ,ARRAY(nbfp,tjA+1));
 486       assign( DO_BHAM ? "cexp1b" : "c12b" ,ARRAY(nbfp,tjB+1));
 487       if(DO_BHAM) {
 488         assign("cexp2a",ARRAY(nbfp,tjA+2));
 489         assign("cexp2b",ARRAY(nbfp,tjB+2));
 490       } else {
 491         assign("c12","L1*c12a + lambda*c12b");
 492         nflop += 3;
 493       }
 494     } else if(loop.sol!=SOL_WATERWATER) {
 495       assign("tjA","ntiA+%d*%s%s", N_VDWPARAM, ARRAY(type,jnr) , bC ? "" : "+1");
 496       assign("c6",ARRAY(nbfp,tjA));
 497       assign( DO_BHAM ? "cexp1" : "c12",ARRAY(nbfp,tjA+1));
 498       if(DO_BHAM)
 499         assign("cexp2",ARRAY(nbfp,tjA+2));
 500     }
 501     comment("Dispersion");
 502     nflop += extract_table( DO_COULTAB ? "n1+4" : "n1");
 503     assign("vnb6","c6*VV");
 504     nflop++;
 505     if(DO_FORCE) {
 506       assign("fijD","c6*FF");
 507       nflop++;
 508     }
 509     if(loop.free) {
 510       increment("dvdl","(c6b - c6a)*VV");
 511       nflop += 3;
 512     }
 513     if(DO_FORCE) {
 514      add_to_buffer(f1buf,"fijD");
 515      nflop++;
 516     }
 517     if(DO_BHAM) {
 518       comment("Buckingham repulsion");
 519       /* Make a new lookup index for the exponential table */
 520       sprintf(buf, loop.free ? "cexp2a*r%d*exptabscale" :
 521               "cexp2*r%d*exptabscale",ij);
 522       nflop += 2 + table_index(buf);
 523       assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 524       nflop += extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 525
 526       if(loop.free) {
 527         assign("vnbexpa","cexp1a*VV");
 528         if(DO_FORCE)
 529           assign("fijRa","cexp1a*cexp2a*FF");
 530         sprintf(buf,"cexp2b*r%d*exptabscale",ij);
 531         nflop += table_index(buf);
 532         assign("n1","%d*n0%s",table_element_size, bC ? "" : "+1");
 533         nflop += extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 534
 535         assign("vnbexpb","cexp1b*VV");
 536         if(DO_FORCE) {
 537           assign("fijRb","cexp1b*cexp2b*FF");
 538           assign("fijR","L1*fijRa + lambda*fijRb");
 539         }
 540         increment("vnbtot","vnb6 + L1*vnbexpa + lambda*vnbexpb");
 541         increment("dvdl","vnbexpb - vnbexpa");
 542         nflop += 18;
 543       } else {
 544         assign("vnbexp","cexp1*VV");
 545         nflop++;
 546         if(DO_FORCE) {
 547           assign("fijR","cexp1*cexp2*FF");
 548           nflop += 2;
 549         }
 550         increment("vnbtot","vnb6 + vnbexp");
 551       }
 552       if(DO_FORCE) {
 553         add_to_buffer(f2buf,"fijR");
 554         nflop++;
 555       }
 556     } else {
 557       comment("Lennard-Jones repulsion");
 558       nflop += extract_table( DO_COULTAB ? "n1+8" : "n1+4");
 559       assign("vnb12","c12*VV");
 560       if(DO_FORCE) {
 561         assign("fijR","c12*FF");
 562         nflop++;
 563       }
 564       increment("vnbtot","vnb6 + vnb12");
 565       if(loop.free) {
 566         increment("dvdl","(c12b - c12a)*VV");
 567         nflop += 3;
 568       }
 569       add_to_buffer(f1buf,"fijR");
 570       nflop += 4;
 571     }
 572   }
 573
 574   if(DO_FORCE) {
 575     sprintf(buf,"(%s)*tabscale",f1buf);
 576     if(strlen(f1buf)) {
 577       add_to_buffer(tabforcebuf,buf);
 578     }
 579     sprintf(buf,"(%s)*exptabscale",f2buf);
 580     if(strlen(f2buf)) {
 581       add_to_buffer(tabforcebuf,buf);
 582     }
 583     nflop+=2;
 584   }
 585   return nflop;
 586 }
 587
 588
 589 int do_coul(int i, int j)
 590 {
 591     int nflop = 0;
 592     char qq[16];
 593     int ij = 10*i+j;
 594
 595     comment("Coulomb");
 596
 597     /* determine the name of and set the charge product iq*jq,
 598      * depending on wheter we do water loops or not. */
 599     nflop += assign_chargeproduct(i,j,qq);
 600
 601     /* this short code is the actual interaction! */
 602     assign("vcoul","%s*rinv%d",qq,ij);
 603     /* concatenate the potential to the force buffer string.
 604      * This is later multiplied by 1/r to get the force once
 605      * all interactions for this pair have been calculated
 606      */
 607     nflop += 1;
 608     if(DO_FORCE) {
 609       nflop ++;
 610       add_to_buffer(forcebuf,"vcoul");
 611     }
 612     return nflop;
 613 }
 614
 615
 616 int do_rf(int i, int j)
 617 {
 618     int nflop = 0;
 619     char qq[16],buf[50];
 620     int ij = 10*i+j;
 621
 622     comment("Reaction field coulomb");
 623
 624     nflop += assign_chargeproduct(i,j,qq);
 625
 626     /* this short code is the actual interaction! */
 627     assign("krsq","krf*rsq%d",ij);
 628     assign("vcoul","%s*(rinv%d+krsq-crf)",qq,ij);
 629
 630     /* concatenate the potential to the force buffer string.
 631      * This is later multiplied by 1/r to get the force once
 632      * all interactions for this pair have been calculated
 633      */
 634     nflop += 4;
 635     if(DO_FORCE) {
 636       sprintf(buf,"%s*(rinv%d-two*krsq)",qq,ij);
 637       add_to_buffer(forcebuf,buf);
 638       nflop += 4;
 639     }
 640     return nflop;
 641 }
 642
 643
 644 int do_lj(int i, int j)
 645 {
 646     int ij = 10*i+j;
 647     int nflop=0;
 648
 649     comment("LJ");
 650     /* calculate r^-6 from r^-2 */
 651     assign("rinvsix","rinvsq%d*rinvsq%d*rinvsq%d",ij,ij,ij);
 652     /* for water-water loops this is only called for O-O interactions,
 653      * and we have already extracted those c6/c12
 654      */
 655     if(loop.sol==SOL_WATERWATER) {
 656       assign("vnb6","c6*rinvsix");
 657       assign("vnb12","c12*rinvsix*rinvsix");
 658     } else {
 659       assign("tjA","ntiA+2*%s%s", ARRAY(type,jnr) , bC ? "" : "+1");
 660       assign("vnb6","rinvsix*%s",ARRAY(nbfp,tjA));
 661       assign("vnb12","rinvsix*rinvsix*%s",ARRAY(nbfp,tjA+1));
 662     }
 663
 664     nflop = 7;
 665     if(DO_FORCE) {
 666       add_to_buffer(forcebuf,"twelve*vnb12-six*vnb6");
 667       nflop += 4;
 668     }
 669     increment("vnbtot","vnb12-vnb6");
 670     return nflop;
 671 }
 672
 673
 674 int do_bham(int i, int j)
 675 {
 676   int ij = 10*i+j;
 677   int nflop=0;
 678
 679   comment("Buckingham");
 680
 681   /* calculate r^-6 from r^-2 */
 682   assign("rinvsix","rinvsq%d*rinvsq%d*rinvsq%d",ij,ij,ij);
 683   if(loop.sol==SOL_WATERWATER) {
 684     assign("vnb6","c6*rinvsix");
 685     assign("br","cexp2*r%d",ij);
 686     assign("vnbexp","cexp1*exp(-br)"); /* V=C_a*exp(-C_b*r) */
 687   } else {
 688     /* fortran indices start at one, instead of 0 for C.
 689      * And there are three nb parameters for buckingham
 690      */
 691     assign("tjA","ntiA+3*%s%s", ARRAY(type,jnr) , bC ? "" : "+1");
 692     assign("vnb6","rinvsix*%s",ARRAY(nbfp,tjA));
 693     assign("br","r%d*%s",ij,ARRAY(nbfp,tjA+2));
 694     assign("vnbexp","exp(-br)*%s",ARRAY(nbfp,tjA+1));
 695   }
 696   nflop=8;
 697   if(DO_FORCE) {
 698     add_to_buffer(forcebuf,"br*vnbexp-six*vnb6");
 699     nflop += 4;
 700   }
 701   increment("vnbtot","vnbexp-vnb6");
 702   return nflop;
 703 }
 704
 705
 706 int calc_interactions(void)
 707 {
 708   /* Remember to set need_invsqrt, need_reciprocal and need_exp
 709    * to the correct needs for your new interaction already
 710    * in mkinl.c. Then add new entries in the switch statements
 711    * and call your interaction implementation in a separate routine.
 712    * Have a look at e.g. do_coul to see how it is done!
 713    */
 714   int nflop = 0;
 715   int i,j,ij;
 716   bool do_vdw;
 717   /* once we get here the distance measures
 718    * we requested in mkinl.c are calculated, but we might
 719    * have to extract them from the buffers
 720    * used for vectorization.
 721    */
 722   for(i=1;i<=loop.ni;i++)
 723     for(j=1;j<=loop.nj;j++) {
 724       ij=10*i+j;
 725       /* Dont do vdw for coul-only water atoms. For the general
 726        * solvent we change the value of loop.vdw outside this loop */
 727       do_vdw = loop.vdw &&
 728         (!DO_WATER || (loop.sol==SOL_WATER && i==1) ||
 729          (loop.sol==SOL_WATERWATER && i==1 && j==1));
 730
 731       forcebuf[0]=tabforcebuf[0]=0;
 732
 733       if(DO_VECTORIZE) {
 734         /* get rinv or rinvsq from temp buffer */
 735         assign( loop.invsqrt ? "rinv%d" : "rinvsq%d",
 736                 OVERWRITE_RSQ ? ARRAY(buf1,m) : ARRAY(buf2,m), ij);
 737         /* Do we need to copy rsq too? */
 738         if((loop.coul_needs_rsq) || (loop.vdw_needs_rsq && do_vdw))
 739           assign( "rsq%d" , ARRAY(buf1,m), ij);
 740         /* And what about r? */
 741         if((loop.coul_needs_r) || (loop.vdw_needs_r && do_vdw))
 742           assign( "r%d" ,"%s*%s",ij,ARRAY(buf1,m),ARRAY(buf2,m));
 743         increment("m","1");
 744       } else { /* no vectorization */
 745         /* If we aren't trying to hide the latency of invsqrt
 746          * we calculate it here just before doing the
 747          * interaction for each pair of atoms!
 748          */
 749         if(opt.delay_invsqrt) {
 750           assign( loop.invsqrt ? "rinv%d" : "rinvsq%d",
 751                   loop.invsqrt ? "1.0/sqrt(rsq%d)" :
 752                   "1.0/rsq%d",ij,ij);
 753           nflop++;
 754         }
 755         if((loop.coul_needs_r) || (loop.vdw_needs_r && do_vdw)) {
 756           assign( "r%d","rsq%d*rinv%d", ij,ij,ij);
 757           nflop++;
 758         }
 759       }
 760       /* Do we need rinvsq when calculating rinv? */
 761       if(((loop.coul_needs_rinvsq) ||
 762           (loop.vdw_needs_rinvsq && do_vdw)) && !loop.recip) {
 763         assign("rinvsq%d","rinv%d*rinv%d",ij,ij,ij);
 764         nflop++;
 765       }
 766       /* which interactions should we call ? */
 767       if(do_vdw)
 768         switch(loop.vdw) {
 769         case VDW_LJ:
 770           nflop += do_lj(i,j);
 771           break;
 772         case VDW_BHAM:
 773           nflop += do_bham(i,j);
 774           break;
 775         case VDW_TAB:
 776         case VDW_BHAMTAB:
 777           nflop += DO_SOFTCORE ? do_softcore(i,j) : do_table(i,j);
 778           break;
 779         default:
 780           comment("No nonbonded interaction");
 781           break;
 782         }
 783
 784       switch(loop.coul) {
 785       case COUL_NORMAL:
 786         nflop += do_coul(i,j);
 787         break;
 788       case COUL_RF:
 789         nflop += do_rf(i,j);
 790         break;
 791       case COUL_TAB:
 792         if(!(do_vdw && DO_VDWTAB))
 793           nflop += DO_SOFTCORE ? do_softcore(i,j) : do_table(i,j);
 794         /* coul table was done simultaneous with
 795          * vdw if the latter was tabulated.
 796          */
 797         break;
 798       default:
 799         comment("No coulomb interaction");
 800         break;
 801       }
 802       /* The total coulomb energy is incremented after calculating
 803        * the scalar force to hide a flop.
 804        */
 805
 806       if(DO_FORCE)
 807         nflop += calc_scalar_force(i,j);
 808
 809       if(loop.coul) {
 810         increment("vctot","vcoul");
 811         nflop ++;
 812       }
 813       if(DO_FORCE)
 814         nflop += update_inner_forces(i,j);
 815     }
 816   return nflop;
 817 }
 818
 819
 820