src/mdlib/vsite.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROwing Monsters And Cloning Shrimps
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <stdio.h>
  40 #include "typedefs.h"
  41 #include "vsite.h"
  42 #include "macros.h"
  43 #include "smalloc.h"
  44 #include "nrnb.h"
  45 #include "vec.h"
  46 #include "mvdata.h"
  47 #include "network.h"
  48 #include "mshift.h"
  49 #include "pbc.h"
  50 #include "domdec.h"
  51 #include "partdec.h"
  52 #include "mtop_util.h"
  53
  54 /* Routines to send/recieve coordinates and force
  55  * of constructing atoms.
  56  */
  57
  58 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
  59 {
  60         rvec *sendbuf;
  61         rvec *recvbuf;
  62         int i,ia;
  63
  64         sendbuf = vsitecomm->send_buf;
  65         recvbuf = vsitecomm->recv_buf;
  66
  67
  68                 /* Prepare pulse left by copying to send buffer */
  69                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
  70                 {
  71                         ia = vsitecomm->left_export_construct[i];
  72                         copy_rvec(x[ia],sendbuf[i]);
  73                 }
  74
  75                 /* Pulse coordinates left */
  76                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
  77
  78                 /* Copy from receive buffer to coordinate array */
  79                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
  80                 {
  81                         ia = vsitecomm->right_import_construct[i];
  82                         copy_rvec(recvbuf[i],x[ia]);
  83                 }
  84
  85                 /* Prepare pulse right by copying to send buffer */
  86                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
  87                 {
  88                         ia = vsitecomm->right_export_construct[i];
  89                         copy_rvec(x[ia],sendbuf[i]);
  90                 }
  91
  92                 /* Pulse coordinates right */
  93                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
  94
  95                 /* Copy from receive buffer to coordinate array */
  96                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
  97                 {
  98                         ia = vsitecomm->left_import_construct[i];
  99                         copy_rvec(recvbuf[i],x[ia]);
 100                 }
 101 }
 102
 103
 104 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
 105 {
 106         rvec *sendbuf;
 107         rvec *recvbuf;
 108         int i,ia;
 109
 110         sendbuf = vsitecomm->send_buf;
 111         recvbuf = vsitecomm->recv_buf;
 112
 113                 /* Prepare pulse right by copying to send buffer */
 114                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 115                 {
 116                         ia = vsitecomm->right_import_construct[i];
 117                         copy_rvec(f[ia],sendbuf[i]);
 118                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 119                 }
 120
 121                 /* Pulse forces right */
 122                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
 123
 124                 /* Copy from receive buffer to coordinate array */
 125                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
 126                 {
 127                         ia = vsitecomm->left_export_construct[i];
 128                         rvec_inc(f[ia],recvbuf[i]);
 129                 }
 130
 131                 /* Prepare pulse left by copying to send buffer */
 132                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 133                 {
 134                         ia = vsitecomm->left_import_construct[i];
 135                         copy_rvec(f[ia],sendbuf[i]);
 136                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 137                 }
 138
 139                 /* Pulse coordinates left */
 140                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
 141
 142                 /* Copy from receive buffer to coordinate array */
 143                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
 144                 {
 145                         ia = vsitecomm->right_export_construct[i];
 146                         rvec_inc(f[ia],recvbuf[i]);
 147                 }
 148
 149         /* All forces are now on the home processors */
 150 }
 151
 152
 153 static void
 154 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
 155 {
 156         int i,ia;
 157
 158         for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 159         {
 160                 ia = vsitecomm->left_import_construct[i];
 161                 clear_rvec(f[ia]);
 162         }
 163         for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 164         {
 165                 ia = vsitecomm->right_import_construct[i];
 166                 clear_rvec(f[ia]);
 167         }
 168 }
 169
 170
 171
 172 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 173 {
 174   if (pbc) {
 175     return pbc_dx_aiuc(pbc,xi,xj,dx);
 176   }
 177   else {
 178     rvec_sub(xi,xj,dx);
 179     return CENTRAL;
 180   }
 181 }
 182
 183 /* Vsite construction routines */
 184
 185 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
 186 {
 187   real b;
 188   rvec dx;
 189
 190   b=1.0-a;
 191   /* 1 flop */
 192
 193   if (pbc) {
 194     pbc_dx_aiuc(pbc,xj,xi,dx);
 195     x[XX] = xi[XX] + a*dx[XX];
 196     x[YY] = xi[YY] + a*dx[YY];
 197     x[ZZ] = xi[ZZ] + a*dx[ZZ];
 198   } else {
 199     x[XX] = b*xi[XX] + a*xj[XX];
 200     x[YY] = b*xi[YY] + a*xj[YY];
 201     x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
 202     /* 9 Flops */
 203   }
 204
 205   /* TOTAL: 10 flops */
 206 }
 207
 208 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 209                           t_pbc *pbc)
 210 {
 211   real c;
 212   rvec dxj,dxk;
 213
 214   c=1.0-a-b;
 215   /* 2 flops */
 216
 217   if (pbc) {
 218     pbc_dx_aiuc(pbc,xj,xi,dxj);
 219     pbc_dx_aiuc(pbc,xk,xi,dxk);
 220     x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
 221     x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
 222     x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
 223   } else {
 224     x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
 225     x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
 226     x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
 227   /* 15 Flops */
 228   }
 229
 230   /* TOTAL: 17 flops */
 231 }
 232
 233 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 234                             t_pbc *pbc)
 235 {
 236   rvec xij,xjk,temp;
 237   real c;
 238
 239   pbc_rvec_sub(pbc,xj,xi,xij);
 240   pbc_rvec_sub(pbc,xk,xj,xjk);
 241   /* 6 flops */
 242
 243   /* temp goes from i to a point on the line jk */
 244   temp[XX] = xij[XX] + a*xjk[XX];
 245   temp[YY] = xij[YY] + a*xjk[YY];
 246   temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
 247   /* 6 flops */
 248
 249   c=b*gmx_invsqrt(iprod(temp,temp));
 250   /* 6 + 10 flops */
 251
 252   x[XX] = xi[XX] + c*temp[XX];
 253   x[YY] = xi[YY] + c*temp[YY];
 254   x[ZZ] = xi[ZZ] + c*temp[ZZ];
 255   /* 6 Flops */
 256
 257   /* TOTAL: 34 flops */
 258 }
 259
 260 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
 261 {
 262   rvec xij,xjk,xp;
 263   real a1,b1,c1,invdij;
 264
 265   pbc_rvec_sub(pbc,xj,xi,xij);
 266   pbc_rvec_sub(pbc,xk,xj,xjk);
 267   /* 6 flops */
 268
 269   invdij = gmx_invsqrt(iprod(xij,xij));
 270   c1 = invdij * invdij * iprod(xij,xjk);
 271   xp[XX] = xjk[XX] - c1*xij[XX];
 272   xp[YY] = xjk[YY] - c1*xij[YY];
 273   xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 274   a1 = a*invdij;
 275   b1 = b*gmx_invsqrt(iprod(xp,xp));
 276   /* 45 */
 277
 278   x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
 279   x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
 280   x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
 281   /* 12 Flops */
 282
 283   /* TOTAL: 63 flops */
 284 }
 285
 286 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
 287                              real a,real b,real c,t_pbc *pbc)
 288 {
 289   rvec xij,xik,temp;
 290
 291   pbc_rvec_sub(pbc,xj,xi,xij);
 292   pbc_rvec_sub(pbc,xk,xi,xik);
 293   cprod(xij,xik,temp);
 294   /* 15 Flops */
 295
 296   x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
 297   x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
 298   x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
 299   /* 18 Flops */
 300
 301   /* TOTAL: 33 flops */
 302 }
 303
 304 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 305                             real a,real b,real c,t_pbc *pbc)
 306 {
 307   rvec xij,xjk,xjl,temp;
 308   real d;
 309
 310   pbc_rvec_sub(pbc,xj,xi,xij);
 311   pbc_rvec_sub(pbc,xk,xj,xjk);
 312   pbc_rvec_sub(pbc,xl,xj,xjl);
 313   /* 9 flops */
 314
 315   /* temp goes from i to a point on the plane jkl */
 316   temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
 317   temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
 318   temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
 319   /* 12 flops */
 320
 321   d=c*gmx_invsqrt(iprod(temp,temp));
 322   /* 6 + 10 flops */
 323
 324   x[XX] = xi[XX] + d*temp[XX];
 325   x[YY] = xi[YY] + d*temp[YY];
 326   x[ZZ] = xi[ZZ] + d*temp[ZZ];
 327   /* 6 Flops */
 328
 329   /* TOTAL: 43 flops */
 330 }
 331
 332
 333 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 334                              real a,real b,real c,t_pbc *pbc)
 335 {
 336     rvec xij,xik,xil,ra,rb,rja,rjb,rm;
 337     real d;
 338
 339     pbc_rvec_sub(pbc,xj,xi,xij);
 340     pbc_rvec_sub(pbc,xk,xi,xik);
 341     pbc_rvec_sub(pbc,xl,xi,xil);
 342     /* 9 flops */
 343
 344     ra[XX] = a*xik[XX];
 345     ra[YY] = a*xik[YY];
 346     ra[ZZ] = a*xik[ZZ];
 347
 348     rb[XX] = b*xil[XX];
 349     rb[YY] = b*xil[YY];
 350     rb[ZZ] = b*xil[ZZ];
 351
 352     /* 6 flops */
 353
 354     rvec_sub(ra,xij,rja);
 355     rvec_sub(rb,xij,rjb);
 356     /* 6 flops */
 357
 358     cprod(rja,rjb,rm);
 359     /* 9 flops */
 360
 361     d=c*gmx_invsqrt(norm2(rm));
 362     /* 5+5+1 flops */
 363
 364     x[XX] = xi[XX] + d*rm[XX];
 365     x[YY] = xi[YY] + d*rm[YY];
 366     x[ZZ] = xi[ZZ] + d*rm[ZZ];
 367     /* 6 Flops */
 368
 369     /* TOTAL: 47 flops */
 370 }
 371
 372
 373 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
 374                          rvec *x, t_pbc *pbc)
 375 {
 376   rvec xs,x1,dx;
 377   dvec dsum;
 378   int  n3,av,ai,i;
 379   real a;
 380
 381   n3 = 3*ip[ia[0]].vsiten.n;
 382   av = ia[1];
 383   ai = ia[2];
 384   copy_rvec(x[ai],x1);
 385   clear_dvec(dsum);
 386   for(i=3; i<n3; i+=3) {
 387     ai = ia[i+2];
 388     a = ip[ia[i]].vsiten.a;
 389     if (pbc) {
 390       pbc_dx_aiuc(pbc,x[ai],x1,dx);
 391     } else {
 392       rvec_sub(x[ai],x1,dx);
 393     }
 394     dsum[XX] += a*dx[XX];
 395     dsum[YY] += a*dx[YY];
 396     dsum[ZZ] += a*dx[ZZ];
 397     /* 9 Flops */
 398   }
 399
 400   x[av][XX] = x1[XX] + dsum[XX];
 401   x[av][YY] = x1[YY] + dsum[YY];
 402   x[av][ZZ] = x1[ZZ] + dsum[ZZ];
 403
 404   return n3;
 405 }
 406
 407
 408 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
 409                       rvec x[],t_nrnb *nrnb,
 410                       real dt,rvec *v,
 411                       t_iparams ip[],t_ilist ilist[],
 412                       int ePBC,gmx_bool bMolPBC,t_graph *graph,
 413                       t_commrec *cr,matrix box)
 414 {
 415   rvec      xpbc,xv,vv,dx;
 416   real      a1,b1,c1,inv_dt;
 417   int       i,inc,ii,nra,nr,tp,ftype;
 418   t_iatom   avsite,ai,aj,ak,al,pbc_atom;
 419   t_iatom   *ia;
 420   t_pbc     pbc,*pbc_null,*pbc_null2;
 421   gmx_bool      bDomDec;
 422   int       *vsite_pbc,ishift;
 423   rvec      reftmp,vtmp,rtmp;
 424
 425   bDomDec = cr && DOMAINDECOMP(cr);
 426
 427   /* We only need to do pbc when we have inter-cg vsites */
 428   if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite) {
 429     /* This is wasting some CPU time as we now do this multiple times
 430      * per MD step. But how often do we have vsites with full pbc?
 431      */
 432     pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
 433   } else {
 434     pbc_null = NULL;
 435   }
 436
 437   if (cr) {
 438     if (bDomDec) {
 439       dd_move_x_vsites(cr->dd,box,x);
 440     } else if (vsite->bPDvsitecomm) {
 441       /* I'm not sure whether the periodicity and shift are guaranteed
 442        * to be consistent between different nodes when running e.g. polymers
 443        * in parallel. In this special case we thus unshift/shift,
 444        * but only when necessary. This is to make sure the coordinates
 445        * we move don't end up a box away...
 446        */
 447                 if (graph)
 448                         unshift_self(graph,box,x);
 449
 450                 move_construct_x(vsite->vsitecomm,x,cr);
 451
 452                 if (graph)
 453                         shift_self(graph,box,x);
 454     }
 455   }
 456
 457   if (v) {
 458     inv_dt = 1.0/dt;
 459   } else {
 460     inv_dt = 1.0;
 461   }
 462
 463   pbc_null2 = NULL;
 464   for(ftype=0; (ftype<F_NRE); ftype++) {
 465     if (interaction_function[ftype].flags & IF_VSITE) {
 466       nra    = interaction_function[ftype].nratoms;
 467       nr     = ilist[ftype].nr;
 468       ia     = ilist[ftype].iatoms;
 469
 470       if (pbc_null) {
 471         vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 472       } else {
 473         vsite_pbc = NULL;
 474       }
 475
 476       for(i=0; (i<nr); ) {
 477         tp   = ia[0];
 478         /*
 479         if (ftype != idef->functype[tp])
 480           gmx_incons("Function types for vsites wrong");
 481         */
 482
 483         /* The vsite and constructing atoms */
 484         avsite = ia[1];
 485         ai   = ia[2];
 486         aj   = ia[3];
 487
 488         /* Constants for constructing vsites */
 489         a1   = ip[tp].vsite.a;
 490         /* Check what kind of pbc we need to use */
 491         if (vsite_pbc) {
 492           pbc_atom = vsite_pbc[i/(1+nra)];
 493           if (pbc_atom > -2) {
 494             if (pbc_atom >= 0) {
 495               /* We need to copy the coordinates here,
 496                * single for single atom cg's pbc_atom is the vsite itself.
 497                */
 498               copy_rvec(x[pbc_atom],xpbc);
 499             }
 500             pbc_null2 = pbc_null;
 501           } else {
 502             pbc_null2 = NULL;
 503           }
 504         } else {
 505           pbc_atom = -2;
 506         }
 507         /* Copy the old position */
 508         copy_rvec(x[avsite],xv);
 509
 510         /* Construct the vsite depending on type */
 511         inc = nra+1;
 512         switch (ftype) {
 513         case F_VSITE2:
 514           constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
 515           break;
 516         case F_VSITE3:
 517           ak = ia[4];
 518           b1 = ip[tp].vsite.b;
 519           constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 520           break;
 521         case F_VSITE3FD:
 522           ak = ia[4];
 523           b1 = ip[tp].vsite.b;
 524           constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 525           break;
 526         case F_VSITE3FAD:
 527           ak = ia[4];
 528           b1 = ip[tp].vsite.b;
 529           constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 530           break;
 531         case F_VSITE3OUT:
 532           ak = ia[4];
 533           b1 = ip[tp].vsite.b;
 534           c1 = ip[tp].vsite.c;
 535           constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
 536           break;
 537         case F_VSITE4FD:
 538           ak = ia[4];
 539           al = ia[5];
 540           b1 = ip[tp].vsite.b;
 541           c1 = ip[tp].vsite.c;
 542           constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 543                           pbc_null2);
 544           break;
 545         case F_VSITE4FDN:
 546           ak = ia[4];
 547           al = ia[5];
 548           b1 = ip[tp].vsite.b;
 549           c1 = ip[tp].vsite.c;
 550           constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 551                         pbc_null2);
 552           break;
 553         case F_VSITEN:
 554           inc = constr_vsiten(ia,ip,x,pbc_null2);
 555           break;
 556         default:
 557           gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
 558                       ftype,__FILE__,__LINE__);
 559         }
 560
 561         if (pbc_atom >= 0) {
 562           /* Match the pbc of this vsite to the rest of its charge group */
 563           ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
 564           if (ishift != CENTRAL)
 565             rvec_add(xpbc,dx,x[avsite]);
 566         }
 567         if (v) {
 568           /* Calculate velocity of vsite... */
 569           rvec_sub(x[avsite],xv,vv);
 570           svmul(inv_dt,vv,v[avsite]);
 571         }
 572
 573         /* Increment loop variables */
 574         i  += inc;
 575         ia += inc;
 576       }
 577     }
 578   }
 579 }
 580
 581 static void spread_vsite2(t_iatom ia[],real a,
 582                             rvec x[],rvec f[],rvec fshift[],
 583                             t_pbc *pbc,t_graph *g)
 584 {
 585   rvec    fi,fj,dx;
 586   t_iatom av,ai,aj;
 587   ivec    di;
 588   real    b;
 589   int     siv,sij;
 590
 591   av = ia[1];
 592   ai = ia[2];
 593   aj = ia[3];
 594
 595   svmul(1-a,f[av],fi);
 596   svmul(  a,f[av],fj);
 597   /* 7 flop */
 598
 599   rvec_inc(f[ai],fi);
 600   rvec_inc(f[aj],fj);
 601   /* 6 Flops */
 602
 603   if (g) {
 604     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
 605     siv = IVEC2IS(di);
 606     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 607     sij = IVEC2IS(di);
 608   } else if (pbc) {
 609     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 610     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 611   } else {
 612     siv = CENTRAL;
 613     sij = CENTRAL;
 614   }
 615
 616   if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
 617     rvec_inc(fshift[siv],f[av]);
 618     rvec_dec(fshift[CENTRAL],fi);
 619     rvec_dec(fshift[sij],fj);
 620   }
 621
 622   /* TOTAL: 13 flops */
 623 }
 624
 625 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
 626                            gmx_mtop_t *mtop,rvec x[])
 627 {
 628   int as,mb,mol;
 629   gmx_molblock_t *molb;
 630   gmx_moltype_t  *molt;
 631
 632   as = 0;
 633   for(mb=0; mb<mtop->nmolblock; mb++) {
 634     molb = &mtop->molblock[mb];
 635     molt = &mtop->moltype[molb->type];
 636     for(mol=0; mol<molb->nmol; mol++) {
 637       construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
 638                        mtop->ffparams.iparams,molt->ilist,
 639                        epbcNONE,TRUE,NULL,NULL,NULL);
 640       as += molt->atoms.nr;
 641     }
 642   }
 643 }
 644
 645 static void spread_vsite3(t_iatom ia[],real a,real b,
 646                             rvec x[],rvec f[],rvec fshift[],
 647                             t_pbc *pbc,t_graph *g)
 648 {
 649   rvec    fi,fj,fk,dx;
 650   atom_id av,ai,aj,ak;
 651   ivec    di;
 652   int     siv,sij,sik;
 653
 654   av = ia[1];
 655   ai = ia[2];
 656   aj = ia[3];
 657   ak = ia[4];
 658
 659   svmul(1-a-b,f[av],fi);
 660   svmul(    a,f[av],fj);
 661   svmul(    b,f[av],fk);
 662   /* 11 flops */
 663
 664   rvec_inc(f[ai],fi);
 665   rvec_inc(f[aj],fj);
 666   rvec_inc(f[ak],fk);
 667   /* 9 Flops */
 668
 669   if (g) {
 670     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
 671     siv = IVEC2IS(di);
 672     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 673     sij = IVEC2IS(di);
 674     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
 675     sik = IVEC2IS(di);
 676   } else if (pbc) {
 677     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 678     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 679     sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
 680   } else {
 681     siv = CENTRAL;
 682     sij = CENTRAL;
 683     sik = CENTRAL;
 684   }
 685
 686   if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
 687     rvec_inc(fshift[siv],f[av]);
 688     rvec_dec(fshift[CENTRAL],fi);
 689     rvec_dec(fshift[sij],fj);
 690     rvec_dec(fshift[sik],fk);
 691   }
 692
 693   /* TOTAL: 20 flops */
 694 }
 695
 696 static void spread_vsite3FD(t_iatom ia[],real a,real b,
 697                             rvec x[],rvec f[],rvec fshift[],
 698                             t_pbc *pbc,t_graph *g)
 699 {
 700   real fx,fy,fz,c,invl,fproj,a1;
 701   rvec xvi,xij,xjk,xix,fv,temp;
 702   t_iatom av,ai,aj,ak;
 703   int     svi,sji,skj,d;
 704   ivec    di;
 705
 706   av = ia[1];
 707   ai = ia[2];
 708   aj = ia[3];
 709   ak = ia[4];
 710   copy_rvec(f[av],fv);
 711
 712   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 713   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 714   /* 6 flops */
 715
 716   /* xix goes from i to point x on the line jk */
 717   xix[XX]=xij[XX]+a*xjk[XX];
 718   xix[YY]=xij[YY]+a*xjk[YY];
 719   xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
 720   /* 6 flops */
 721
 722   invl=gmx_invsqrt(iprod(xix,xix));
 723   c=b*invl;
 724   /* 4 + ?10? flops */
 725
 726   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 727
 728   temp[XX]=c*(fv[XX]-fproj*xix[XX]);
 729   temp[YY]=c*(fv[YY]-fproj*xix[YY]);
 730   temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
 731   /* 16 */
 732
 733   /* c is already calculated in constr_vsite3FD
 734      storing c somewhere will save 26 flops!     */
 735
 736   a1=1-a;
 737   f[ai][XX] += fv[XX] - temp[XX];
 738   f[ai][YY] += fv[YY] - temp[YY];
 739   f[ai][ZZ] += fv[ZZ] - temp[ZZ];
 740   f[aj][XX] += a1*temp[XX];
 741   f[aj][YY] += a1*temp[YY];
 742   f[aj][ZZ] += a1*temp[ZZ];
 743   f[ak][XX] += a*temp[XX];
 744   f[ak][YY] += a*temp[YY];
 745   f[ak][ZZ] += a*temp[ZZ];
 746   /* 19 Flops */
 747
 748   if (g) {
 749     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 750     svi = IVEC2IS(di);
 751     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 752     sji = IVEC2IS(di);
 753     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 754     skj = IVEC2IS(di);
 755   } else if (pbc) {
 756     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 757   } else {
 758     svi = CENTRAL;
 759   }
 760
 761   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 762     rvec_dec(fshift[svi],fv);
 763     fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
 764     fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
 765     fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
 766     fshift[    sji][XX] += temp[XX];
 767     fshift[    sji][YY] += temp[YY];
 768     fshift[    sji][ZZ] += temp[ZZ];
 769     fshift[    skj][XX] += a*temp[XX];
 770     fshift[    skj][YY] += a*temp[YY];
 771     fshift[    skj][ZZ] += a*temp[ZZ];
 772   }
 773
 774   /* TOTAL: 61 flops */
 775 }
 776
 777 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
 778                              rvec x[],rvec f[],rvec fshift[],
 779                              t_pbc *pbc,t_graph *g)
 780 {
 781   rvec    xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
 782   real    a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
 783   t_iatom av,ai,aj,ak;
 784   int     svi,sji,skj,d;
 785   ivec    di;
 786
 787   av = ia[1];
 788   ai = ia[2];
 789   aj = ia[3];
 790   ak = ia[4];
 791   copy_rvec(f[ia[1]],fv);
 792
 793   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 794   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 795   /* 6 flops */
 796
 797   invdij = gmx_invsqrt(iprod(xij,xij));
 798   invdij2 = invdij * invdij;
 799   c1 = iprod(xij,xjk) * invdij2;
 800   xperp[XX] = xjk[XX] - c1*xij[XX];
 801   xperp[YY] = xjk[YY] - c1*xij[YY];
 802   xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 803   /* xperp in plane ijk, perp. to ij */
 804   invdp = gmx_invsqrt(iprod(xperp,xperp));
 805   a1 = a*invdij;
 806   b1 = b*invdp;
 807   /* 45 flops */
 808
 809   /* a1, b1 and c1 are already calculated in constr_vsite3FAD
 810      storing them somewhere will save 45 flops!     */
 811
 812   fproj=iprod(xij  ,fv)*invdij2;
 813   svmul(fproj,                      xij,  Fpij); /* proj. f on xij */
 814   svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
 815   svmul(b1*fproj,                   xperp,f3);
 816   /* 23 flops */
 817
 818   rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
 819   rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
 820   for (d=0; (d<DIM); d++) {
 821     f1[d]*=a1;
 822     f2[d]*=b1;
 823   }
 824   /* 12 flops */
 825
 826   c2=1+c1;
 827   f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
 828   f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
 829   f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
 830   f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
 831   f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
 832   f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
 833   f[ak][XX] +=                      f2[XX];
 834   f[ak][YY] +=                      f2[YY];
 835   f[ak][ZZ] +=                      f2[ZZ];
 836   /* 30 Flops */
 837
 838   if (g) {
 839     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 840     svi = IVEC2IS(di);
 841     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 842     sji = IVEC2IS(di);
 843     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 844     skj = IVEC2IS(di);
 845   } else if (pbc) {
 846     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 847   } else {
 848     svi = CENTRAL;
 849   }
 850
 851   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 852     rvec_dec(fshift[svi],fv);
 853     fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
 854     fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
 855     fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
 856     fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
 857     fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
 858     fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
 859     fshift[    skj][XX] +=                          f2[XX];
 860     fshift[    skj][YY] +=                          f2[YY];
 861     fshift[    skj][ZZ] +=                          f2[ZZ];
 862   }
 863
 864   /* TOTAL: 113 flops */
 865 }
 866
 867 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
 868                              rvec x[],rvec f[],rvec fshift[],
 869                              t_pbc *pbc,t_graph *g)
 870 {
 871   rvec    xvi,xij,xik,fv,fj,fk;
 872   real    cfx,cfy,cfz;
 873   atom_id av,ai,aj,ak;
 874   ivec    di;
 875   int     svi,sji,ski;
 876
 877   av = ia[1];
 878   ai = ia[2];
 879   aj = ia[3];
 880   ak = ia[4];
 881
 882   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 883   ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
 884   /* 6 Flops */
 885
 886   copy_rvec(f[av],fv);
 887
 888   cfx = c*fv[XX];
 889   cfy = c*fv[YY];
 890   cfz = c*fv[ZZ];
 891   /* 3 Flops */
 892
 893   fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
 894   fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
 895   fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
 896
 897   fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
 898   fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
 899   fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
 900   /* 30 Flops */
 901
 902   f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
 903   f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
 904   f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 905   rvec_inc(f[aj],fj);
 906   rvec_inc(f[ak],fk);
 907   /* 15 Flops */
 908
 909   if (g) {
 910     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 911     svi = IVEC2IS(di);
 912     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 913     sji = IVEC2IS(di);
 914     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
 915     ski = IVEC2IS(di);
 916   } else if (pbc) {
 917     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 918   } else {
 919     svi = CENTRAL;
 920   }
 921
 922   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
 923     rvec_dec(fshift[svi],fv);
 924     fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
 925     fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
 926     fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 927     rvec_inc(fshift[sji],fj);
 928     rvec_inc(fshift[ski],fk);
 929   }
 930
 931   /* TOTAL: 54 flops */
 932 }
 933
 934 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
 935                             rvec x[],rvec f[],rvec fshift[],
 936                             t_pbc *pbc,t_graph *g)
 937 {
 938   real    d,invl,fproj,a1;
 939   rvec    xvi,xij,xjk,xjl,xix,fv,temp;
 940   atom_id av,ai,aj,ak,al;
 941   ivec    di;
 942   int     svi,sji,skj,slj,m;
 943
 944   av = ia[1];
 945   ai = ia[2];
 946   aj = ia[3];
 947   ak = ia[4];
 948   al = ia[5];
 949
 950   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 951   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 952   slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
 953   /* 9 flops */
 954
 955   /* xix goes from i to point x on the plane jkl */
 956   for(m=0; m<DIM; m++)
 957     xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
 958   /* 12 flops */
 959
 960   invl=gmx_invsqrt(iprod(xix,xix));
 961   d=c*invl;
 962   /* 4 + ?10? flops */
 963
 964   copy_rvec(f[av],fv);
 965
 966   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 967
 968   for(m=0; m<DIM; m++)
 969     temp[m] = d*(fv[m] - fproj*xix[m]);
 970   /* 16 */
 971
 972   /* c is already calculated in constr_vsite3FD
 973      storing c somewhere will save 35 flops!     */
 974
 975   a1 = 1 - a - b;
 976   for(m=0; m<DIM; m++) {
 977     f[ai][m] += fv[m] - temp[m];
 978     f[aj][m] += a1*temp[m];
 979     f[ak][m] += a*temp[m];
 980     f[al][m] += b*temp[m];
 981   }
 982   /* 26 Flops */
 983
 984   if (g) {
 985     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 986     svi = IVEC2IS(di);
 987     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 988     sji = IVEC2IS(di);
 989     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 990     skj = IVEC2IS(di);
 991     ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
 992     slj = IVEC2IS(di);
 993   } else if (pbc) {
 994     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 995   } else {
 996     svi = CENTRAL;
 997   }
 998
 999   if (fshift &&
1000       (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1001     rvec_dec(fshift[svi],fv);
1002     for(m=0; m<DIM; m++) {
1003       fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1004       fshift[    sji][m] += temp[m];
1005       fshift[    skj][m] += a*temp[m];
1006       fshift[    slj][m] += b*temp[m];
1007     }
1008   }
1009
1010   /* TOTAL: 77 flops */
1011 }
1012
1013
1014 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1015                              rvec x[],rvec f[],rvec fshift[],
1016                              t_pbc *pbc,t_graph *g)
1017 {
1018     rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1019     rvec fv,fj,fk,fl;
1020     real invrm,denom;
1021     real cfx,cfy,cfz;
1022     ivec di;
1023     int  av,ai,aj,ak,al;
1024     int  svi,sij,sik,sil;
1025
1026     /* DEBUG: check atom indices */
1027     av = ia[1];
1028     ai = ia[2];
1029     aj = ia[3];
1030     ak = ia[4];
1031     al = ia[5];
1032
1033     copy_rvec(f[av],fv);
1034
1035     sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1036     sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1037     sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1038     /* 9 flops */
1039
1040     ra[XX] = a*xik[XX];
1041     ra[YY] = a*xik[YY];
1042     ra[ZZ] = a*xik[ZZ];
1043
1044     rb[XX] = b*xil[XX];
1045     rb[YY] = b*xil[YY];
1046     rb[ZZ] = b*xil[ZZ];
1047
1048     /* 6 flops */
1049
1050     rvec_sub(ra,xij,rja);
1051     rvec_sub(rb,xij,rjb);
1052     rvec_sub(rb,ra,rab);
1053     /* 9 flops */
1054
1055     cprod(rja,rjb,rm);
1056     /* 9 flops */
1057
1058     invrm=gmx_invsqrt(norm2(rm));
1059     denom=invrm*invrm;
1060     /* 5+5+2 flops */
1061
1062     cfx = c*invrm*fv[XX];
1063     cfy = c*invrm*fv[YY];
1064     cfz = c*invrm*fv[ZZ];
1065     /* 6 Flops */
1066
1067     cprod(rm,rab,rt);
1068     /* 9 flops */
1069
1070     rt[XX] *= denom;
1071     rt[YY] *= denom;
1072     rt[ZZ] *= denom;
1073     /* 3flops */
1074
1075     fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1076     fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1077     fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
1078     /* 30 flops */
1079
1080     cprod(rjb,rm,rt);
1081     /* 9 flops */
1082
1083     rt[XX] *= denom*a;
1084     rt[YY] *= denom*a;
1085     rt[ZZ] *= denom*a;
1086     /* 3flops */
1087
1088     fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1089     fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1090     fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1091     /* 36 flops */
1092
1093     cprod(rm,rja,rt);
1094     /* 9 flops */
1095
1096     rt[XX] *= denom*b;
1097     rt[YY] *= denom*b;
1098     rt[ZZ] *= denom*b;
1099     /* 3flops */
1100
1101     fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1102     fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1103     fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1104     /* 36 flops */
1105
1106     f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1107     f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1108     f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1109     rvec_inc(f[aj],fj);
1110     rvec_inc(f[ak],fk);
1111     rvec_inc(f[al],fl);
1112     /* 21 flops */
1113
1114     if (g) {
1115         ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1116         svi = IVEC2IS(di);
1117         ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1118         sij = IVEC2IS(di);
1119         ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1120         sik = IVEC2IS(di);
1121         ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1122         sil = IVEC2IS(di);
1123     } else if (pbc) {
1124         svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1125     } else {
1126         svi = CENTRAL;
1127     }
1128
1129     if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1130         rvec_dec(fshift[svi],fv);
1131         fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1132         fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1133         fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1134         rvec_inc(fshift[sij],fj);
1135         rvec_inc(fshift[sik],fk);
1136         rvec_inc(fshift[sil],fl);
1137     }
1138
1139     /* Total: 207 flops (Yuck!) */
1140 }
1141
1142
1143 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1144                          rvec x[],rvec f[],rvec fshift[],
1145                          t_pbc *pbc,t_graph *g)
1146 {
1147   rvec xv,dx,fi;
1148   int  n3,av,i,ai;
1149   real a;
1150   ivec di;
1151   int  siv;
1152
1153   n3 = 3*ip[ia[0]].vsiten.n;
1154   av = ia[1];
1155   copy_rvec(x[av],xv);
1156
1157   for(i=0; i<n3; i+=3) {
1158     ai = ia[i+2];
1159     if (g) {
1160       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1161       siv = IVEC2IS(di);
1162     } else if (pbc) {
1163       siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1164     } else {
1165       siv = CENTRAL;
1166     }
1167     a = ip[ia[i]].vsiten.a;
1168     svmul(a,f[av],fi);
1169     rvec_inc(f[ai],fi);
1170     if (fshift && siv != CENTRAL) {
1171       rvec_inc(fshift[siv],fi);
1172       rvec_dec(fshift[CENTRAL],fi);
1173     }
1174     /* 6 Flops */
1175   }
1176
1177   return n3;
1178 }
1179
1180
1181 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1182                     rvec x[],rvec f[],rvec *fshift,
1183                     t_nrnb *nrnb,t_idef *idef,
1184                     int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1185                     t_commrec *cr)
1186 {
1187   real      a1,b1,c1;
1188   int       i,inc,m,nra,nr,tp,ftype;
1189   int       nd2,nd3,nd3FD,nd3FAD,nd3OUT,nd4FD,nd4FDN,ndN;
1190   t_iatom   *ia;
1191   t_iparams *ip;
1192   t_pbc     pbc,*pbc_null,*pbc_null2;
1193   int       *vsite_pbc;
1194
1195   /* We only need to do pbc when we have inter-cg vsites */
1196   if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite) {
1197     /* This is wasting some CPU time as we now do this multiple times
1198      * per MD step. But how often do we have vsites with full pbc?
1199      */
1200     pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1201   } else {
1202     pbc_null = NULL;
1203   }
1204
1205   if (DOMAINDECOMP(cr))
1206   {
1207     dd_clear_f_vsites(cr->dd,f);
1208   }
1209   else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1210   {
1211     pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1212   }
1213
1214   ip     = idef->iparams;
1215
1216   nd2        = 0;
1217   nd3        = 0;
1218   nd3FD      = 0;
1219   nd3FAD     = 0;
1220   nd3OUT     = 0;
1221   nd4FD      = 0;
1222   nd4FDN     = 0;
1223   ndN        = 0;
1224
1225   /* this loop goes backwards to be able to build *
1226    * higher type vsites from lower types         */
1227   pbc_null2 = NULL;
1228   for(ftype=F_NRE-1; (ftype>=0); ftype--) {
1229     if (interaction_function[ftype].flags & IF_VSITE) {
1230       nra    = interaction_function[ftype].nratoms;
1231       nr     = idef->il[ftype].nr;
1232       ia     = idef->il[ftype].iatoms;
1233
1234       if (pbc_null) {
1235         vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1236       } else {
1237         vsite_pbc = NULL;
1238       }
1239
1240       for(i=0; (i<nr); ) {
1241         /* Check if we need to apply pbc for this vsite */
1242         if (vsite_pbc) {
1243           if (vsite_pbc[i/(1+nra)] > -2)
1244             pbc_null2 = pbc_null;
1245           else
1246             pbc_null2 = NULL;
1247         }
1248
1249         tp   = ia[0];
1250         if (ftype != idef->functype[tp])
1251           gmx_incons("Functiontypes for vsites wrong");
1252
1253         /* Constants for constructing */
1254         a1   = ip[tp].vsite.a;
1255         /* Construct the vsite depending on type */
1256         inc = nra+1;
1257         switch (ftype) {
1258         case F_VSITE2:
1259           spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1260           nd2++;
1261           break;
1262         case F_VSITE3:
1263           b1 = ip[tp].vsite.b;
1264           spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1265           nd3++;
1266           break;
1267         case F_VSITE3FD:
1268           b1 = ip[tp].vsite.b;
1269           spread_vsite3FD(ia,a1,b1,x,f,fshift,pbc_null2,g);
1270           nd3FD++;
1271           break;
1272         case F_VSITE3FAD:
1273           b1 = ip[tp].vsite.b;
1274           spread_vsite3FAD(ia,a1,b1,x,f,fshift,pbc_null2,g);
1275           nd3FAD++;
1276           break;
1277         case F_VSITE3OUT:
1278           b1 = ip[tp].vsite.b;
1279           c1 = ip[tp].vsite.c;
1280           spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,pbc_null2,g);
1281           nd3OUT++;
1282           break;
1283         case F_VSITE4FD:
1284           b1 = ip[tp].vsite.b;
1285           c1 = ip[tp].vsite.c;
1286           spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,pbc_null2,g);
1287           nd4FD++;
1288           break;
1289         case F_VSITE4FDN:
1290           b1 = ip[tp].vsite.b;
1291           c1 = ip[tp].vsite.c;
1292           spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,pbc_null2,g);
1293           nd4FDN++;
1294           break;
1295         case F_VSITEN:
1296           inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1297           ndN += inc;
1298           break;
1299         default:
1300           gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1301                       ftype,__FILE__,__LINE__);
1302         }
1303         clear_rvec(f[ia[1]]);
1304
1305         /* Increment loop variables */
1306         i  += inc;
1307         ia += inc;
1308       }
1309     }
1310   }
1311
1312   inc_nrnb(nrnb,eNR_VSITE2,   nd2     );
1313   inc_nrnb(nrnb,eNR_VSITE3,   nd3     );
1314   inc_nrnb(nrnb,eNR_VSITE3FD, nd3FD   );
1315   inc_nrnb(nrnb,eNR_VSITE3FAD,nd3FAD  );
1316   inc_nrnb(nrnb,eNR_VSITE3OUT,nd3OUT  );
1317   inc_nrnb(nrnb,eNR_VSITE4FD, nd4FD   );
1318   inc_nrnb(nrnb,eNR_VSITE4FDN,nd4FDN  );
1319   inc_nrnb(nrnb,eNR_VSITEN,   ndN     );
1320
1321   if (DOMAINDECOMP(cr)) {
1322     dd_move_f_vsites(cr->dd,f,fshift);
1323   } else if (vsite->bPDvsitecomm) {
1324     /* We only move forces here, and they are independent of shifts */
1325     move_construct_f(vsite->vsitecomm,f,cr);
1326   }
1327 }
1328
1329 static int *atom2cg(t_block *cgs)
1330 {
1331   int *a2cg,cg,i;
1332
1333   snew(a2cg,cgs->index[cgs->nr]);
1334   for(cg=0; cg<cgs->nr; cg++) {
1335     for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1336       a2cg[i] = cg;
1337   }
1338
1339   return a2cg;
1340 }
1341
1342 static int count_intercg_vsite(gmx_mtop_t *mtop)
1343 {
1344   int  mb,mt,ftype,nral,i,cg,a;
1345   gmx_molblock_t *molb;
1346   gmx_moltype_t *molt;
1347   int  *a2cg;
1348   t_ilist *il;
1349   t_iatom *ia;
1350   int  n_intercg_vsite;
1351
1352   n_intercg_vsite = 0;
1353   for(mb=0; mb<mtop->nmolblock; mb++) {
1354     molb = &mtop->molblock[mb];
1355     molt = &mtop->moltype[molb->type];
1356     a2cg = atom2cg(&molt->cgs);
1357     for(ftype=0; ftype<F_NRE; ftype++) {
1358       if (interaction_function[ftype].flags & IF_VSITE) {
1359         nral = NRAL(ftype);
1360         il = &molt->ilist[ftype];
1361         ia  = il->iatoms;
1362         for(i=0; i<il->nr; i+=1+nral) {
1363           cg = a2cg[ia[1+i]];
1364           for(a=1; a<nral; a++) {
1365             if (a2cg[ia[1+a]] != cg) {
1366               n_intercg_vsite += molb->nmol;
1367               break;
1368             }
1369           }
1370         }
1371       }
1372     }
1373     sfree(a2cg);
1374   }
1375
1376   return n_intercg_vsite;
1377 }
1378
1379 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1380                            t_atom *atom,t_mdatoms *md,
1381                            t_block *cgs,int *a2cg)
1382 {
1383   int  ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1384   t_ilist *il;
1385   t_iatom *ia;
1386   int  **vsite_pbc,*vsite_pbc_f;
1387   char *pbc_set;
1388   gmx_bool bViteOnlyCG_and_FirstAtom;
1389
1390   /* Make an array that tells if the pbc of an atom is set */
1391   snew(pbc_set,cgs->index[cgs->nr]);
1392   /* PBC is set for all non vsites */
1393   for(a=0; a<cgs->index[cgs->nr]; a++) {
1394     if ((atom && atom[a].ptype != eptVSite) ||
1395         (md   && md->ptype[a]  != eptVSite)) {
1396       pbc_set[a] = 1;
1397     }
1398   }
1399
1400   snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1401
1402   for(ftype=0; ftype<F_NRE; ftype++) {
1403     if (interaction_function[ftype].flags & IF_VSITE) {
1404       nral = NRAL(ftype);
1405       il = &ilist[ftype];
1406       ia  = il->iatoms;
1407
1408       snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1409       vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1410
1411       i = 0;
1412       while (i < il->nr) {
1413         vsi = i/(1+nral);
1414         vsite = ia[i+1];
1415         cg_v = a2cg[vsite];
1416         /* A value of -2 signals that this vsite and its contructing
1417          * atoms are all within the same cg, so no pbc is required.
1418          */
1419         vsite_pbc_f[vsi] = -2;
1420         /* Check if constructing atoms are outside the vsite's cg */
1421         nc3 = 0;
1422         if (ftype == F_VSITEN) {
1423           nc3 = 3*iparams[ia[i]].vsiten.n;
1424           for(j=0; j<nc3; j+=3) {
1425             if (a2cg[ia[i+j+2]] != cg_v)
1426               vsite_pbc_f[vsi] = -1;
1427           }
1428         } else {
1429           for(a=1; a<nral; a++) {
1430             if (a2cg[ia[i+1+a]] != cg_v)
1431               vsite_pbc_f[vsi] = -1;
1432           }
1433         }
1434         if (vsite_pbc_f[vsi] == -1) {
1435           /* Check if this is the first processed atom of a vsite only cg */
1436           bViteOnlyCG_and_FirstAtom = TRUE;
1437           for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1438             /* Non-vsites already have pbc set, so simply check for pbc_set */
1439             if (pbc_set[a]) {
1440               bViteOnlyCG_and_FirstAtom = FALSE;
1441               break;
1442             }
1443           }
1444           if (bViteOnlyCG_and_FirstAtom) {
1445             /* First processed atom of a vsite only charge group.
1446              * The pbc of the input coordinates to construct_vsites
1447              * should be preserved.
1448              */
1449             vsite_pbc_f[vsi] = vsite;
1450           } else if (cg_v != a2cg[ia[1+i+1]]) {
1451             /* This vsite has a different charge group index
1452              * than it's first constructing atom
1453              * and the charge group has more than one atom,
1454              * search for the first normal particle
1455              * or vsite that already had its pbc defined.
1456              * If nothing is found, use full pbc for this vsite.
1457              */
1458             for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1459               if (a != vsite && pbc_set[a]) {
1460                 vsite_pbc_f[vsi] = a;
1461                 if (gmx_debug_at)
1462                   fprintf(debug,"vsite %d match pbc with atom %d\n",
1463                           vsite+1,a+1);
1464                 break;
1465               }
1466             }
1467             if (gmx_debug_at)
1468               fprintf(debug,"vsite atom %d  cg %d - %d pbc atom %d\n",
1469                       vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1470                       vsite_pbc_f[vsi]+1);
1471           }
1472         }
1473         if (ftype == F_VSITEN) {
1474           /* The other entries in vsite_pbc_f are not used for center vsites */
1475           i += nc3;
1476         } else {
1477           i += 1+nral;
1478         }
1479
1480         /* This vsite now has its pbc defined */
1481         pbc_set[vsite] = 1;
1482       }
1483     }
1484   }
1485
1486   sfree(pbc_set);
1487
1488   return vsite_pbc;
1489 }
1490
1491 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr)
1492 {
1493   int nvsite,i;
1494   int *a2cg,cg;
1495   gmx_vsite_t *vsite;
1496   int mt;
1497   gmx_moltype_t *molt;
1498
1499   /* check if there are vsites */
1500   nvsite = 0;
1501   for(i=0; i<F_NRE; i++) {
1502     if (interaction_function[i].flags & IF_VSITE) {
1503       nvsite += gmx_mtop_ftype_count(mtop,i);
1504     }
1505   }
1506
1507   if (nvsite == 0) {
1508     return NULL;
1509   }
1510
1511   snew(vsite,1);
1512
1513   vsite->n_intercg_vsite = count_intercg_vsite(mtop);
1514
1515   if (vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr)) {
1516     vsite->nvsite_pbc_molt = mtop->nmoltype;
1517     snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1518     for(mt=0; mt<mtop->nmoltype; mt++) {
1519       molt = &mtop->moltype[mt];
1520       /* Make an atom to charge group index */
1521       a2cg = atom2cg(&molt->cgs);
1522       vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1523                                                 molt->ilist,
1524                                                 molt->atoms.atom,NULL,
1525                                                 &molt->cgs,a2cg);
1526       sfree(a2cg);
1527     }
1528
1529     snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1530     snew(vsite->vsite_pbc_loc       ,F_VSITEN-F_VSITE2+1);
1531   }
1532
1533
1534   return vsite;
1535 }
1536
1537 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
1538                    t_commrec *cr)
1539 {
1540   int *a2cg;
1541
1542   /* Make an atom to charge group index */
1543   a2cg = atom2cg(&top->cgs);
1544
1545   if (vsite->n_intercg_vsite > 0) {
1546     vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
1547                                          top->idef.il,NULL,md,
1548                                          &top->cgs,a2cg);
1549
1550     if (PARTDECOMP(cr)) {
1551       snew(vsite->vsitecomm,1);
1552                 vsite->bPDvsitecomm =
1553                 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
1554     }
1555   }
1556
1557   sfree(a2cg);
1558 }