src/mdlib/vsite.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <stdio.h>
  41 #include "typedefs.h"
  42 #include "vsite.h"
  43 #include "macros.h"
  44 #include "smalloc.h"
  45 #include "nrnb.h"
  46 #include "vec.h"
  47 #include "mvdata.h"
  48 #include "network.h"
  49 #include "mshift.h"
  50 #include "pbc.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54
  55 /* Routines to send/recieve coordinates and force
  56  * of constructing atoms.
  57  */
  58
  59 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
  60 {
  61         rvec *sendbuf;
  62         rvec *recvbuf;
  63         int i,ia;
  64
  65         sendbuf = vsitecomm->send_buf;
  66         recvbuf = vsitecomm->recv_buf;
  67
  68
  69                 /* Prepare pulse left by copying to send buffer */
  70                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
  71                 {
  72                         ia = vsitecomm->left_export_construct[i];
  73                         copy_rvec(x[ia],sendbuf[i]);
  74                 }
  75
  76                 /* Pulse coordinates left */
  77                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
  78
  79                 /* Copy from receive buffer to coordinate array */
  80                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
  81                 {
  82                         ia = vsitecomm->right_import_construct[i];
  83                         copy_rvec(recvbuf[i],x[ia]);
  84                 }
  85
  86                 /* Prepare pulse right by copying to send buffer */
  87                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
  88                 {
  89                         ia = vsitecomm->right_export_construct[i];
  90                         copy_rvec(x[ia],sendbuf[i]);
  91                 }
  92
  93                 /* Pulse coordinates right */
  94                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
  95
  96                 /* Copy from receive buffer to coordinate array */
  97                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
  98                 {
  99                         ia = vsitecomm->left_import_construct[i];
 100                         copy_rvec(recvbuf[i],x[ia]);
 101                 }
 102 }
 103
 104
 105 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
 106 {
 107         rvec *sendbuf;
 108         rvec *recvbuf;
 109         int i,ia;
 110
 111         sendbuf = vsitecomm->send_buf;
 112         recvbuf = vsitecomm->recv_buf;
 113
 114                 /* Prepare pulse right by copying to send buffer */
 115                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 116                 {
 117                         ia = vsitecomm->right_import_construct[i];
 118                         copy_rvec(f[ia],sendbuf[i]);
 119                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 120                 }
 121
 122                 /* Pulse forces right */
 123                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
 124
 125                 /* Copy from receive buffer to coordinate array */
 126                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
 127                 {
 128                         ia = vsitecomm->left_export_construct[i];
 129                         rvec_inc(f[ia],recvbuf[i]);
 130                 }
 131
 132                 /* Prepare pulse left by copying to send buffer */
 133                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 134                 {
 135                         ia = vsitecomm->left_import_construct[i];
 136                         copy_rvec(f[ia],sendbuf[i]);
 137                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 138                 }
 139
 140                 /* Pulse coordinates left */
 141                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
 142
 143                 /* Copy from receive buffer to coordinate array */
 144                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
 145                 {
 146                         ia = vsitecomm->right_export_construct[i];
 147                         rvec_inc(f[ia],recvbuf[i]);
 148                 }
 149
 150         /* All forces are now on the home processors */
 151 }
 152
 153
 154 static void
 155 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
 156 {
 157         int i,ia;
 158
 159         for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 160         {
 161                 ia = vsitecomm->left_import_construct[i];
 162                 clear_rvec(f[ia]);
 163         }
 164         for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 165         {
 166                 ia = vsitecomm->right_import_construct[i];
 167                 clear_rvec(f[ia]);
 168         }
 169 }
 170
 171
 172
 173 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 174 {
 175   if (pbc) {
 176     return pbc_dx_aiuc(pbc,xi,xj,dx);
 177   }
 178   else {
 179     rvec_sub(xi,xj,dx);
 180     return CENTRAL;
 181   }
 182 }
 183
 184 /* Vsite construction routines */
 185
 186 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
 187 {
 188   real b;
 189   rvec dx;
 190
 191   b=1.0-a;
 192   /* 1 flop */
 193
 194   if (pbc) {
 195     pbc_dx_aiuc(pbc,xj,xi,dx);
 196     x[XX] = xi[XX] + a*dx[XX];
 197     x[YY] = xi[YY] + a*dx[YY];
 198     x[ZZ] = xi[ZZ] + a*dx[ZZ];
 199   } else {
 200     x[XX] = b*xi[XX] + a*xj[XX];
 201     x[YY] = b*xi[YY] + a*xj[YY];
 202     x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
 203     /* 9 Flops */
 204   }
 205
 206   /* TOTAL: 10 flops */
 207 }
 208
 209 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 210                           t_pbc *pbc)
 211 {
 212   real c;
 213   rvec dxj,dxk;
 214
 215   c=1.0-a-b;
 216   /* 2 flops */
 217
 218   if (pbc) {
 219     pbc_dx_aiuc(pbc,xj,xi,dxj);
 220     pbc_dx_aiuc(pbc,xk,xi,dxk);
 221     x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
 222     x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
 223     x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
 224   } else {
 225     x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
 226     x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
 227     x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
 228   /* 15 Flops */
 229   }
 230
 231   /* TOTAL: 17 flops */
 232 }
 233
 234 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 235                             t_pbc *pbc)
 236 {
 237   rvec xij,xjk,temp;
 238   real c;
 239
 240   pbc_rvec_sub(pbc,xj,xi,xij);
 241   pbc_rvec_sub(pbc,xk,xj,xjk);
 242   /* 6 flops */
 243
 244   /* temp goes from i to a point on the line jk */
 245   temp[XX] = xij[XX] + a*xjk[XX];
 246   temp[YY] = xij[YY] + a*xjk[YY];
 247   temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
 248   /* 6 flops */
 249
 250   c=b*gmx_invsqrt(iprod(temp,temp));
 251   /* 6 + 10 flops */
 252
 253   x[XX] = xi[XX] + c*temp[XX];
 254   x[YY] = xi[YY] + c*temp[YY];
 255   x[ZZ] = xi[ZZ] + c*temp[ZZ];
 256   /* 6 Flops */
 257
 258   /* TOTAL: 34 flops */
 259 }
 260
 261 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
 262 {
 263   rvec xij,xjk,xp;
 264   real a1,b1,c1,invdij;
 265
 266   pbc_rvec_sub(pbc,xj,xi,xij);
 267   pbc_rvec_sub(pbc,xk,xj,xjk);
 268   /* 6 flops */
 269
 270   invdij = gmx_invsqrt(iprod(xij,xij));
 271   c1 = invdij * invdij * iprod(xij,xjk);
 272   xp[XX] = xjk[XX] - c1*xij[XX];
 273   xp[YY] = xjk[YY] - c1*xij[YY];
 274   xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 275   a1 = a*invdij;
 276   b1 = b*gmx_invsqrt(iprod(xp,xp));
 277   /* 45 */
 278
 279   x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
 280   x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
 281   x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
 282   /* 12 Flops */
 283
 284   /* TOTAL: 63 flops */
 285 }
 286
 287 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
 288                              real a,real b,real c,t_pbc *pbc)
 289 {
 290   rvec xij,xik,temp;
 291
 292   pbc_rvec_sub(pbc,xj,xi,xij);
 293   pbc_rvec_sub(pbc,xk,xi,xik);
 294   cprod(xij,xik,temp);
 295   /* 15 Flops */
 296
 297   x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
 298   x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
 299   x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
 300   /* 18 Flops */
 301
 302   /* TOTAL: 33 flops */
 303 }
 304
 305 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 306                             real a,real b,real c,t_pbc *pbc)
 307 {
 308   rvec xij,xjk,xjl,temp;
 309   real d;
 310
 311   pbc_rvec_sub(pbc,xj,xi,xij);
 312   pbc_rvec_sub(pbc,xk,xj,xjk);
 313   pbc_rvec_sub(pbc,xl,xj,xjl);
 314   /* 9 flops */
 315
 316   /* temp goes from i to a point on the plane jkl */
 317   temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
 318   temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
 319   temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
 320   /* 12 flops */
 321
 322   d=c*gmx_invsqrt(iprod(temp,temp));
 323   /* 6 + 10 flops */
 324
 325   x[XX] = xi[XX] + d*temp[XX];
 326   x[YY] = xi[YY] + d*temp[YY];
 327   x[ZZ] = xi[ZZ] + d*temp[ZZ];
 328   /* 6 Flops */
 329
 330   /* TOTAL: 43 flops */
 331 }
 332
 333
 334 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 335                              real a,real b,real c,t_pbc *pbc)
 336 {
 337     rvec xij,xik,xil,ra,rb,rja,rjb,rm;
 338     real d;
 339
 340     pbc_rvec_sub(pbc,xj,xi,xij);
 341     pbc_rvec_sub(pbc,xk,xi,xik);
 342     pbc_rvec_sub(pbc,xl,xi,xil);
 343     /* 9 flops */
 344
 345     ra[XX] = a*xik[XX];
 346     ra[YY] = a*xik[YY];
 347     ra[ZZ] = a*xik[ZZ];
 348
 349     rb[XX] = b*xil[XX];
 350     rb[YY] = b*xil[YY];
 351     rb[ZZ] = b*xil[ZZ];
 352
 353     /* 6 flops */
 354
 355     rvec_sub(ra,xij,rja);
 356     rvec_sub(rb,xij,rjb);
 357     /* 6 flops */
 358
 359     cprod(rja,rjb,rm);
 360     /* 9 flops */
 361
 362     d=c*gmx_invsqrt(norm2(rm));
 363     /* 5+5+1 flops */
 364
 365     x[XX] = xi[XX] + d*rm[XX];
 366     x[YY] = xi[YY] + d*rm[YY];
 367     x[ZZ] = xi[ZZ] + d*rm[ZZ];
 368     /* 6 Flops */
 369
 370     /* TOTAL: 47 flops */
 371 }
 372
 373
 374 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
 375                          rvec *x, t_pbc *pbc)
 376 {
 377   rvec xs,x1,dx;
 378   dvec dsum;
 379   int  n3,av,ai,i;
 380   real a;
 381
 382   n3 = 3*ip[ia[0]].vsiten.n;
 383   av = ia[1];
 384   ai = ia[2];
 385   copy_rvec(x[ai],x1);
 386   clear_dvec(dsum);
 387   for(i=3; i<n3; i+=3) {
 388     ai = ia[i+2];
 389     a = ip[ia[i]].vsiten.a;
 390     if (pbc) {
 391       pbc_dx_aiuc(pbc,x[ai],x1,dx);
 392     } else {
 393       rvec_sub(x[ai],x1,dx);
 394     }
 395     dsum[XX] += a*dx[XX];
 396     dsum[YY] += a*dx[YY];
 397     dsum[ZZ] += a*dx[ZZ];
 398     /* 9 Flops */
 399   }
 400
 401   x[av][XX] = x1[XX] + dsum[XX];
 402   x[av][YY] = x1[YY] + dsum[YY];
 403   x[av][ZZ] = x1[ZZ] + dsum[ZZ];
 404
 405   return n3;
 406 }
 407
 408
 409 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
 410                       rvec x[],t_nrnb *nrnb,
 411                       real dt,rvec *v,
 412                       t_iparams ip[],t_ilist ilist[],
 413                       int ePBC,gmx_bool bMolPBC,t_graph *graph,
 414                       t_commrec *cr,matrix box)
 415 {
 416   rvec      xpbc,xv,vv,dx;
 417   real      a1,b1,c1,inv_dt;
 418   int       i,inc,ii,nra,nr,tp,ftype;
 419   t_iatom   avsite,ai,aj,ak,al,pbc_atom;
 420   t_iatom   *ia;
 421   t_pbc     pbc,*pbc_null,*pbc_null2;
 422   gmx_bool      bDomDec;
 423   int       *vsite_pbc,ishift;
 424   rvec      reftmp,vtmp,rtmp;
 425
 426   bDomDec = cr && DOMAINDECOMP(cr);
 427
 428   /* We only need to do pbc when we have inter-cg vsites */
 429   if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite) {
 430     /* This is wasting some CPU time as we now do this multiple times
 431      * per MD step. But how often do we have vsites with full pbc?
 432      */
 433     pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
 434   } else {
 435     pbc_null = NULL;
 436   }
 437
 438   if (cr) {
 439     if (bDomDec) {
 440       dd_move_x_vsites(cr->dd,box,x);
 441     } else if (vsite->bPDvsitecomm) {
 442       /* I'm not sure whether the periodicity and shift are guaranteed
 443        * to be consistent between different nodes when running e.g. polymers
 444        * in parallel. In this special case we thus unshift/shift,
 445        * but only when necessary. This is to make sure the coordinates
 446        * we move don't end up a box away...
 447        */
 448                 if (graph)
 449                         unshift_self(graph,box,x);
 450
 451                 move_construct_x(vsite->vsitecomm,x,cr);
 452
 453                 if (graph)
 454                         shift_self(graph,box,x);
 455     }
 456   }
 457
 458   if (v) {
 459     inv_dt = 1.0/dt;
 460   } else {
 461     inv_dt = 1.0;
 462   }
 463
 464   pbc_null2 = NULL;
 465   for(ftype=0; (ftype<F_NRE); ftype++) {
 466     if (interaction_function[ftype].flags & IF_VSITE) {
 467       nra    = interaction_function[ftype].nratoms;
 468       nr     = ilist[ftype].nr;
 469       ia     = ilist[ftype].iatoms;
 470
 471       if (pbc_null) {
 472         vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 473       } else {
 474         vsite_pbc = NULL;
 475       }
 476
 477       for(i=0; (i<nr); ) {
 478         tp   = ia[0];
 479         /*
 480         if (ftype != idef->functype[tp])
 481           gmx_incons("Function types for vsites wrong");
 482         */
 483
 484         /* The vsite and constructing atoms */
 485         avsite = ia[1];
 486         ai   = ia[2];
 487         aj   = ia[3];
 488
 489         /* Constants for constructing vsites */
 490         a1   = ip[tp].vsite.a;
 491         /* Check what kind of pbc we need to use */
 492         if (vsite_pbc) {
 493           pbc_atom = vsite_pbc[i/(1+nra)];
 494           if (pbc_atom > -2) {
 495             if (pbc_atom >= 0) {
 496               /* We need to copy the coordinates here,
 497                * single for single atom cg's pbc_atom is the vsite itself.
 498                */
 499               copy_rvec(x[pbc_atom],xpbc);
 500             }
 501             pbc_null2 = pbc_null;
 502           } else {
 503             pbc_null2 = NULL;
 504           }
 505         } else {
 506           pbc_atom = -2;
 507         }
 508         /* Copy the old position */
 509         copy_rvec(x[avsite],xv);
 510
 511         /* Construct the vsite depending on type */
 512         inc = nra+1;
 513         switch (ftype) {
 514         case F_VSITE2:
 515           constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
 516           break;
 517         case F_VSITE3:
 518           ak = ia[4];
 519           b1 = ip[tp].vsite.b;
 520           constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 521           break;
 522         case F_VSITE3FD:
 523           ak = ia[4];
 524           b1 = ip[tp].vsite.b;
 525           constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 526           break;
 527         case F_VSITE3FAD:
 528           ak = ia[4];
 529           b1 = ip[tp].vsite.b;
 530           constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 531           break;
 532         case F_VSITE3OUT:
 533           ak = ia[4];
 534           b1 = ip[tp].vsite.b;
 535           c1 = ip[tp].vsite.c;
 536           constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
 537           break;
 538         case F_VSITE4FD:
 539           ak = ia[4];
 540           al = ia[5];
 541           b1 = ip[tp].vsite.b;
 542           c1 = ip[tp].vsite.c;
 543           constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 544                           pbc_null2);
 545           break;
 546         case F_VSITE4FDN:
 547           ak = ia[4];
 548           al = ia[5];
 549           b1 = ip[tp].vsite.b;
 550           c1 = ip[tp].vsite.c;
 551           constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 552                         pbc_null2);
 553           break;
 554         case F_VSITEN:
 555           inc = constr_vsiten(ia,ip,x,pbc_null2);
 556           break;
 557         default:
 558           gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
 559                       ftype,__FILE__,__LINE__);
 560         }
 561
 562         if (pbc_atom >= 0) {
 563           /* Match the pbc of this vsite to the rest of its charge group */
 564           ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
 565           if (ishift != CENTRAL)
 566             rvec_add(xpbc,dx,x[avsite]);
 567         }
 568         if (v) {
 569           /* Calculate velocity of vsite... */
 570           rvec_sub(x[avsite],xv,vv);
 571           svmul(inv_dt,vv,v[avsite]);
 572         }
 573
 574         /* Increment loop variables */
 575         i  += inc;
 576         ia += inc;
 577       }
 578     }
 579   }
 580 }
 581
 582 static void spread_vsite2(t_iatom ia[],real a,
 583                             rvec x[],rvec f[],rvec fshift[],
 584                             t_pbc *pbc,t_graph *g)
 585 {
 586   rvec    fi,fj,dx;
 587   t_iatom av,ai,aj;
 588   ivec    di;
 589   real    b;
 590   int     siv,sij;
 591
 592   av = ia[1];
 593   ai = ia[2];
 594   aj = ia[3];
 595
 596   svmul(1-a,f[av],fi);
 597   svmul(  a,f[av],fj);
 598   /* 7 flop */
 599
 600   rvec_inc(f[ai],fi);
 601   rvec_inc(f[aj],fj);
 602   /* 6 Flops */
 603
 604   if (g) {
 605     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
 606     siv = IVEC2IS(di);
 607     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 608     sij = IVEC2IS(di);
 609   } else if (pbc) {
 610     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 611     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 612   } else {
 613     siv = CENTRAL;
 614     sij = CENTRAL;
 615   }
 616
 617   if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
 618     rvec_inc(fshift[siv],f[av]);
 619     rvec_dec(fshift[CENTRAL],fi);
 620     rvec_dec(fshift[sij],fj);
 621   }
 622
 623   /* TOTAL: 13 flops */
 624 }
 625
 626 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
 627                            gmx_mtop_t *mtop,rvec x[])
 628 {
 629   int as,mb,mol;
 630   gmx_molblock_t *molb;
 631   gmx_moltype_t  *molt;
 632
 633   as = 0;
 634   for(mb=0; mb<mtop->nmolblock; mb++) {
 635     molb = &mtop->molblock[mb];
 636     molt = &mtop->moltype[molb->type];
 637     for(mol=0; mol<molb->nmol; mol++) {
 638       construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
 639                        mtop->ffparams.iparams,molt->ilist,
 640                        epbcNONE,TRUE,NULL,NULL,NULL);
 641       as += molt->atoms.nr;
 642     }
 643   }
 644 }
 645
 646 static void spread_vsite3(t_iatom ia[],real a,real b,
 647                             rvec x[],rvec f[],rvec fshift[],
 648                             t_pbc *pbc,t_graph *g)
 649 {
 650   rvec    fi,fj,fk,dx;
 651   atom_id av,ai,aj,ak;
 652   ivec    di;
 653   int     siv,sij,sik;
 654
 655   av = ia[1];
 656   ai = ia[2];
 657   aj = ia[3];
 658   ak = ia[4];
 659
 660   svmul(1-a-b,f[av],fi);
 661   svmul(    a,f[av],fj);
 662   svmul(    b,f[av],fk);
 663   /* 11 flops */
 664
 665   rvec_inc(f[ai],fi);
 666   rvec_inc(f[aj],fj);
 667   rvec_inc(f[ak],fk);
 668   /* 9 Flops */
 669
 670   if (g) {
 671     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
 672     siv = IVEC2IS(di);
 673     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 674     sij = IVEC2IS(di);
 675     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
 676     sik = IVEC2IS(di);
 677   } else if (pbc) {
 678     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 679     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 680     sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
 681   } else {
 682     siv = CENTRAL;
 683     sij = CENTRAL;
 684     sik = CENTRAL;
 685   }
 686
 687   if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
 688     rvec_inc(fshift[siv],f[av]);
 689     rvec_dec(fshift[CENTRAL],fi);
 690     rvec_dec(fshift[sij],fj);
 691     rvec_dec(fshift[sik],fk);
 692   }
 693
 694   /* TOTAL: 20 flops */
 695 }
 696
 697 static void spread_vsite3FD(t_iatom ia[],real a,real b,
 698                             rvec x[],rvec f[],rvec fshift[],
 699                             gmx_bool VirCorr,matrix dxdf,
 700                             t_pbc *pbc,t_graph *g)
 701 {
 702   real fx,fy,fz,c,invl,fproj,a1;
 703   rvec xvi,xij,xjk,xix,fv,temp;
 704   t_iatom av,ai,aj,ak;
 705   int     svi,sji,skj,d;
 706   ivec    di;
 707
 708   av = ia[1];
 709   ai = ia[2];
 710   aj = ia[3];
 711   ak = ia[4];
 712   copy_rvec(f[av],fv);
 713
 714   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 715   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 716   /* 6 flops */
 717
 718   /* xix goes from i to point x on the line jk */
 719   xix[XX]=xij[XX]+a*xjk[XX];
 720   xix[YY]=xij[YY]+a*xjk[YY];
 721   xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
 722   /* 6 flops */
 723
 724   invl=gmx_invsqrt(iprod(xix,xix));
 725   c=b*invl;
 726   /* 4 + ?10? flops */
 727
 728   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 729
 730   temp[XX]=c*(fv[XX]-fproj*xix[XX]);
 731   temp[YY]=c*(fv[YY]-fproj*xix[YY]);
 732   temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
 733   /* 16 */
 734
 735   /* c is already calculated in constr_vsite3FD
 736      storing c somewhere will save 26 flops!     */
 737
 738   a1=1-a;
 739   f[ai][XX] += fv[XX] - temp[XX];
 740   f[ai][YY] += fv[YY] - temp[YY];
 741   f[ai][ZZ] += fv[ZZ] - temp[ZZ];
 742   f[aj][XX] += a1*temp[XX];
 743   f[aj][YY] += a1*temp[YY];
 744   f[aj][ZZ] += a1*temp[ZZ];
 745   f[ak][XX] += a*temp[XX];
 746   f[ak][YY] += a*temp[YY];
 747   f[ak][ZZ] += a*temp[ZZ];
 748   /* 19 Flops */
 749
 750   if (g) {
 751     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 752     svi = IVEC2IS(di);
 753     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 754     sji = IVEC2IS(di);
 755     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 756     skj = IVEC2IS(di);
 757   } else if (pbc) {
 758     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 759   } else {
 760     svi = CENTRAL;
 761   }
 762
 763   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 764     rvec_dec(fshift[svi],fv);
 765     fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
 766     fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
 767     fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
 768     fshift[    sji][XX] += temp[XX];
 769     fshift[    sji][YY] += temp[YY];
 770     fshift[    sji][ZZ] += temp[ZZ];
 771     fshift[    skj][XX] += a*temp[XX];
 772     fshift[    skj][YY] += a*temp[YY];
 773     fshift[    skj][ZZ] += a*temp[ZZ];
 774   }
 775
 776     if (VirCorr)
 777     {
 778         /* When VirCorr=TRUE, the virial for the current forces is not
 779          * calculated from the redistributed forces. This means that
 780          * the effect of non-linear virtual site constructions on the virial
 781          * needs to be added separately. This contribution can be calculated
 782          * in many ways, but the simplest and cheapest way is to use
 783          * the first constructing atom ai as a reference position in space:
 784          * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
 785          */
 786         rvec xiv;
 787         int  i,j;
 788
 789         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 790
 791         for(i=0; i<DIM; i++)
 792         {
 793             for(j=0; j<DIM; j++)
 794             {
 795                 /* As xix is a linear combination of j and k, use that here */
 796                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
 797             }
 798         }
 799     }
 800
 801   /* TOTAL: 61 flops */
 802 }
 803
 804 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
 805                              rvec x[],rvec f[],rvec fshift[],
 806                              gmx_bool VirCorr,matrix dxdf,
 807                              t_pbc *pbc,t_graph *g)
 808 {
 809   rvec    xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
 810   real    a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
 811   t_iatom av,ai,aj,ak;
 812   int     svi,sji,skj,d;
 813   ivec    di;
 814
 815   av = ia[1];
 816   ai = ia[2];
 817   aj = ia[3];
 818   ak = ia[4];
 819   copy_rvec(f[ia[1]],fv);
 820
 821   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 822   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 823   /* 6 flops */
 824
 825   invdij = gmx_invsqrt(iprod(xij,xij));
 826   invdij2 = invdij * invdij;
 827   c1 = iprod(xij,xjk) * invdij2;
 828   xperp[XX] = xjk[XX] - c1*xij[XX];
 829   xperp[YY] = xjk[YY] - c1*xij[YY];
 830   xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 831   /* xperp in plane ijk, perp. to ij */
 832   invdp = gmx_invsqrt(iprod(xperp,xperp));
 833   a1 = a*invdij;
 834   b1 = b*invdp;
 835   /* 45 flops */
 836
 837   /* a1, b1 and c1 are already calculated in constr_vsite3FAD
 838      storing them somewhere will save 45 flops!     */
 839
 840   fproj=iprod(xij  ,fv)*invdij2;
 841   svmul(fproj,                      xij,  Fpij); /* proj. f on xij */
 842   svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
 843   svmul(b1*fproj,                   xperp,f3);
 844   /* 23 flops */
 845
 846   rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
 847   rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
 848   for (d=0; (d<DIM); d++) {
 849     f1[d]*=a1;
 850     f2[d]*=b1;
 851   }
 852   /* 12 flops */
 853
 854   c2=1+c1;
 855   f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
 856   f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
 857   f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
 858   f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
 859   f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
 860   f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
 861   f[ak][XX] +=                      f2[XX];
 862   f[ak][YY] +=                      f2[YY];
 863   f[ak][ZZ] +=                      f2[ZZ];
 864   /* 30 Flops */
 865
 866   if (g) {
 867     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 868     svi = IVEC2IS(di);
 869     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 870     sji = IVEC2IS(di);
 871     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 872     skj = IVEC2IS(di);
 873   } else if (pbc) {
 874     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 875   } else {
 876     svi = CENTRAL;
 877   }
 878
 879   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 880     rvec_dec(fshift[svi],fv);
 881     fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
 882     fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
 883     fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
 884     fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
 885     fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
 886     fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
 887     fshift[    skj][XX] +=                          f2[XX];
 888     fshift[    skj][YY] +=                          f2[YY];
 889     fshift[    skj][ZZ] +=                          f2[ZZ];
 890   }
 891
 892     if (VirCorr)
 893     {
 894         rvec xiv;
 895         int  i,j;
 896
 897         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 898
 899         for(i=0; i<DIM; i++)
 900         {
 901             for(j=0; j<DIM; j++)
 902             {
 903                 /* Note that xik=xij+xjk, so we have to add xij*f2 */
 904                 dxdf[i][j] +=
 905                     - xiv[i]*fv[j]
 906                     + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
 907                     + xjk[i]*f2[j];
 908             }
 909         }
 910     }
 911
 912   /* TOTAL: 113 flops */
 913 }
 914
 915 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
 916                              rvec x[],rvec f[],rvec fshift[],
 917                              gmx_bool VirCorr,matrix dxdf,
 918                              t_pbc *pbc,t_graph *g)
 919 {
 920   rvec    xvi,xij,xik,fv,fj,fk;
 921   real    cfx,cfy,cfz;
 922   atom_id av,ai,aj,ak;
 923   ivec    di;
 924   int     svi,sji,ski;
 925
 926   av = ia[1];
 927   ai = ia[2];
 928   aj = ia[3];
 929   ak = ia[4];
 930
 931   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 932   ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
 933   /* 6 Flops */
 934
 935   copy_rvec(f[av],fv);
 936
 937   cfx = c*fv[XX];
 938   cfy = c*fv[YY];
 939   cfz = c*fv[ZZ];
 940   /* 3 Flops */
 941
 942   fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
 943   fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
 944   fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
 945
 946   fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
 947   fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
 948   fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
 949   /* 30 Flops */
 950
 951   f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
 952   f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
 953   f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 954   rvec_inc(f[aj],fj);
 955   rvec_inc(f[ak],fk);
 956   /* 15 Flops */
 957
 958   if (g) {
 959     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 960     svi = IVEC2IS(di);
 961     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 962     sji = IVEC2IS(di);
 963     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
 964     ski = IVEC2IS(di);
 965   } else if (pbc) {
 966     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 967   } else {
 968     svi = CENTRAL;
 969   }
 970
 971   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
 972     rvec_dec(fshift[svi],fv);
 973     fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
 974     fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
 975     fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 976     rvec_inc(fshift[sji],fj);
 977     rvec_inc(fshift[ski],fk);
 978   }
 979
 980     if (VirCorr)
 981     {
 982         rvec xiv;
 983         int  i,j;
 984
 985         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 986
 987         for(i=0; i<DIM; i++)
 988         {
 989             for(j=0; j<DIM; j++)
 990             {
 991                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
 992             }
 993         }
 994     }
 995
 996   /* TOTAL: 54 flops */
 997 }
 998
 999 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
1000                             rvec x[],rvec f[],rvec fshift[],
1001                             gmx_bool VirCorr,matrix dxdf,
1002                             t_pbc *pbc,t_graph *g)
1003 {
1004   real    d,invl,fproj,a1;
1005   rvec    xvi,xij,xjk,xjl,xix,fv,temp;
1006   atom_id av,ai,aj,ak,al;
1007   ivec    di;
1008   int     svi,sji,skj,slj,m;
1009
1010   av = ia[1];
1011   ai = ia[2];
1012   aj = ia[3];
1013   ak = ia[4];
1014   al = ia[5];
1015
1016   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1017   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
1018   slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
1019   /* 9 flops */
1020
1021   /* xix goes from i to point x on the plane jkl */
1022   for(m=0; m<DIM; m++)
1023     xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1024   /* 12 flops */
1025
1026   invl=gmx_invsqrt(iprod(xix,xix));
1027   d=c*invl;
1028   /* 4 + ?10? flops */
1029
1030   copy_rvec(f[av],fv);
1031
1032   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1033
1034   for(m=0; m<DIM; m++)
1035     temp[m] = d*(fv[m] - fproj*xix[m]);
1036   /* 16 */
1037
1038   /* c is already calculated in constr_vsite3FD
1039      storing c somewhere will save 35 flops!     */
1040
1041   a1 = 1 - a - b;
1042   for(m=0; m<DIM; m++) {
1043     f[ai][m] += fv[m] - temp[m];
1044     f[aj][m] += a1*temp[m];
1045     f[ak][m] += a*temp[m];
1046     f[al][m] += b*temp[m];
1047   }
1048   /* 26 Flops */
1049
1050   if (g) {
1051     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1052     svi = IVEC2IS(di);
1053     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1054     sji = IVEC2IS(di);
1055     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
1056     skj = IVEC2IS(di);
1057     ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
1058     slj = IVEC2IS(di);
1059   } else if (pbc) {
1060     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1061   } else {
1062     svi = CENTRAL;
1063   }
1064
1065   if (fshift &&
1066       (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1067     rvec_dec(fshift[svi],fv);
1068     for(m=0; m<DIM; m++) {
1069       fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1070       fshift[    sji][m] += temp[m];
1071       fshift[    skj][m] += a*temp[m];
1072       fshift[    slj][m] += b*temp[m];
1073     }
1074   }
1075
1076     if (VirCorr)
1077     {
1078         rvec xiv;
1079         int  i,j;
1080
1081         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1082
1083         for(i=0; i<DIM; i++)
1084         {
1085             for(j=0; j<DIM; j++)
1086             {
1087                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1088             }
1089         }
1090     }
1091
1092   /* TOTAL: 77 flops */
1093 }
1094
1095
1096 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1097                              rvec x[],rvec f[],rvec fshift[],
1098                              gmx_bool VirCorr,matrix dxdf,
1099                              t_pbc *pbc,t_graph *g)
1100 {
1101     rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1102     rvec fv,fj,fk,fl;
1103     real invrm,denom;
1104     real cfx,cfy,cfz;
1105     ivec di;
1106     int  av,ai,aj,ak,al;
1107     int  svi,sij,sik,sil;
1108
1109     /* DEBUG: check atom indices */
1110     av = ia[1];
1111     ai = ia[2];
1112     aj = ia[3];
1113     ak = ia[4];
1114     al = ia[5];
1115
1116     copy_rvec(f[av],fv);
1117
1118     sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1119     sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1120     sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1121     /* 9 flops */
1122
1123     ra[XX] = a*xik[XX];
1124     ra[YY] = a*xik[YY];
1125     ra[ZZ] = a*xik[ZZ];
1126
1127     rb[XX] = b*xil[XX];
1128     rb[YY] = b*xil[YY];
1129     rb[ZZ] = b*xil[ZZ];
1130
1131     /* 6 flops */
1132
1133     rvec_sub(ra,xij,rja);
1134     rvec_sub(rb,xij,rjb);
1135     rvec_sub(rb,ra,rab);
1136     /* 9 flops */
1137
1138     cprod(rja,rjb,rm);
1139     /* 9 flops */
1140
1141     invrm=gmx_invsqrt(norm2(rm));
1142     denom=invrm*invrm;
1143     /* 5+5+2 flops */
1144
1145     cfx = c*invrm*fv[XX];
1146     cfy = c*invrm*fv[YY];
1147     cfz = c*invrm*fv[ZZ];
1148     /* 6 Flops */
1149
1150     cprod(rm,rab,rt);
1151     /* 9 flops */
1152
1153     rt[XX] *= denom;
1154     rt[YY] *= denom;
1155     rt[ZZ] *= denom;
1156     /* 3flops */
1157
1158     fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1159     fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1160     fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
1161     /* 30 flops */
1162
1163     cprod(rjb,rm,rt);
1164     /* 9 flops */
1165
1166     rt[XX] *= denom*a;
1167     rt[YY] *= denom*a;
1168     rt[ZZ] *= denom*a;
1169     /* 3flops */
1170
1171     fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1172     fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1173     fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1174     /* 36 flops */
1175
1176     cprod(rm,rja,rt);
1177     /* 9 flops */
1178
1179     rt[XX] *= denom*b;
1180     rt[YY] *= denom*b;
1181     rt[ZZ] *= denom*b;
1182     /* 3flops */
1183
1184     fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1185     fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1186     fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1187     /* 36 flops */
1188
1189     f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1190     f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1191     f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1192     rvec_inc(f[aj],fj);
1193     rvec_inc(f[ak],fk);
1194     rvec_inc(f[al],fl);
1195     /* 21 flops */
1196
1197     if (g) {
1198         ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1199         svi = IVEC2IS(di);
1200         ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1201         sij = IVEC2IS(di);
1202         ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1203         sik = IVEC2IS(di);
1204         ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1205         sil = IVEC2IS(di);
1206     } else if (pbc) {
1207         svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1208     } else {
1209         svi = CENTRAL;
1210     }
1211
1212     if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1213         rvec_dec(fshift[svi],fv);
1214         fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1215         fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1216         fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1217         rvec_inc(fshift[sij],fj);
1218         rvec_inc(fshift[sik],fk);
1219         rvec_inc(fshift[sil],fl);
1220     }
1221
1222     if (VirCorr)
1223     {
1224         rvec xiv;
1225         int  i,j;
1226
1227         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1228
1229         for(i=0; i<DIM; i++)
1230         {
1231             for(j=0; j<DIM; j++)
1232             {
1233                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1234             }
1235         }
1236     }
1237
1238     /* Total: 207 flops (Yuck!) */
1239 }
1240
1241
1242 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1243                          rvec x[],rvec f[],rvec fshift[],
1244                          t_pbc *pbc,t_graph *g)
1245 {
1246   rvec xv,dx,fi;
1247   int  n3,av,i,ai;
1248   real a;
1249   ivec di;
1250   int  siv;
1251
1252   n3 = 3*ip[ia[0]].vsiten.n;
1253   av = ia[1];
1254   copy_rvec(x[av],xv);
1255
1256   for(i=0; i<n3; i+=3) {
1257     ai = ia[i+2];
1258     if (g) {
1259       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1260       siv = IVEC2IS(di);
1261     } else if (pbc) {
1262       siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1263     } else {
1264       siv = CENTRAL;
1265     }
1266     a = ip[ia[i]].vsiten.a;
1267     svmul(a,f[av],fi);
1268     rvec_inc(f[ai],fi);
1269     if (fshift && siv != CENTRAL) {
1270       rvec_inc(fshift[siv],fi);
1271       rvec_dec(fshift[CENTRAL],fi);
1272     }
1273     /* 6 Flops */
1274   }
1275
1276   return n3;
1277 }
1278
1279
1280 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1281                     rvec x[],rvec f[],rvec *fshift,
1282                     gmx_bool VirCorr,matrix vir,
1283                     t_nrnb *nrnb,t_idef *idef,
1284                     int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1285                     t_commrec *cr)
1286 {
1287   real      a1,b1,c1;
1288   int       i,inc,m,nra,nr,tp,ftype;
1289   int       nd2,nd3,nd3FD,nd3FAD,nd3OUT,nd4FD,nd4FDN,ndN;
1290   t_iatom   *ia;
1291   t_iparams *ip;
1292   t_pbc     pbc,*pbc_null,*pbc_null2;
1293   int       *vsite_pbc;
1294   matrix    dxdf;
1295
1296   /* We only need to do pbc when we have inter-cg vsites */
1297   if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite) {
1298     /* This is wasting some CPU time as we now do this multiple times
1299      * per MD step. But how often do we have vsites with full pbc?
1300      */
1301     pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1302   } else {
1303     pbc_null = NULL;
1304   }
1305
1306   if (DOMAINDECOMP(cr))
1307   {
1308     dd_clear_f_vsites(cr->dd,f);
1309   }
1310   else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1311   {
1312     pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1313   }
1314
1315     if (vir != NULL)
1316     {
1317         clear_mat(dxdf);
1318     }
1319
1320   ip     = idef->iparams;
1321
1322   nd2        = 0;
1323   nd3        = 0;
1324   nd3FD      = 0;
1325   nd3FAD     = 0;
1326   nd3OUT     = 0;
1327   nd4FD      = 0;
1328   nd4FDN     = 0;
1329   ndN        = 0;
1330
1331   /* this loop goes backwards to be able to build *
1332    * higher type vsites from lower types         */
1333   pbc_null2 = NULL;
1334   for(ftype=F_NRE-1; (ftype>=0); ftype--) {
1335     if (interaction_function[ftype].flags & IF_VSITE) {
1336       nra    = interaction_function[ftype].nratoms;
1337       nr     = idef->il[ftype].nr;
1338       ia     = idef->il[ftype].iatoms;
1339
1340       if (pbc_null) {
1341         vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1342       } else {
1343         vsite_pbc = NULL;
1344       }
1345
1346       for(i=0; (i<nr); ) {
1347         /* Check if we need to apply pbc for this vsite */
1348         if (vsite_pbc) {
1349           if (vsite_pbc[i/(1+nra)] > -2)
1350             pbc_null2 = pbc_null;
1351           else
1352             pbc_null2 = NULL;
1353         }
1354
1355         tp   = ia[0];
1356         if (ftype != idef->functype[tp])
1357           gmx_incons("Functiontypes for vsites wrong");
1358
1359         /* Constants for constructing */
1360         a1   = ip[tp].vsite.a;
1361         /* Construct the vsite depending on type */
1362         inc = nra+1;
1363         switch (ftype) {
1364         case F_VSITE2:
1365           spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1366           nd2++;
1367           break;
1368         case F_VSITE3:
1369           b1 = ip[tp].vsite.b;
1370           spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1371           nd3++;
1372           break;
1373         case F_VSITE3FD:
1374           b1 = ip[tp].vsite.b;
1375           spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1376           nd3FD++;
1377           break;
1378         case F_VSITE3FAD:
1379           b1 = ip[tp].vsite.b;
1380           spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1381           nd3FAD++;
1382           break;
1383         case F_VSITE3OUT:
1384           b1 = ip[tp].vsite.b;
1385           c1 = ip[tp].vsite.c;
1386           spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1387           nd3OUT++;
1388           break;
1389         case F_VSITE4FD:
1390           b1 = ip[tp].vsite.b;
1391           c1 = ip[tp].vsite.c;
1392           spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1393           nd4FD++;
1394           break;
1395         case F_VSITE4FDN:
1396           b1 = ip[tp].vsite.b;
1397           c1 = ip[tp].vsite.c;
1398           spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1399           nd4FDN++;
1400           break;
1401         case F_VSITEN:
1402           inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1403           ndN += inc;
1404           break;
1405         default:
1406           gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1407                       ftype,__FILE__,__LINE__);
1408         }
1409         clear_rvec(f[ia[1]]);
1410
1411         /* Increment loop variables */
1412         i  += inc;
1413         ia += inc;
1414       }
1415     }
1416   }
1417
1418     if (VirCorr)
1419     {
1420         int i,j;
1421
1422         for(i=0; i<DIM; i++)
1423         {
1424             for(j=0; j<DIM; j++)
1425             {
1426                 vir[i][j] += -0.5*dxdf[i][j];
1427             }
1428         }
1429     }
1430
1431   inc_nrnb(nrnb,eNR_VSITE2,   nd2     );
1432   inc_nrnb(nrnb,eNR_VSITE3,   nd3     );
1433   inc_nrnb(nrnb,eNR_VSITE3FD, nd3FD   );
1434   inc_nrnb(nrnb,eNR_VSITE3FAD,nd3FAD  );
1435   inc_nrnb(nrnb,eNR_VSITE3OUT,nd3OUT  );
1436   inc_nrnb(nrnb,eNR_VSITE4FD, nd4FD   );
1437   inc_nrnb(nrnb,eNR_VSITE4FDN,nd4FDN  );
1438   inc_nrnb(nrnb,eNR_VSITEN,   ndN     );
1439
1440   if (DOMAINDECOMP(cr)) {
1441     dd_move_f_vsites(cr->dd,f,fshift);
1442   } else if (vsite->bPDvsitecomm) {
1443     /* We only move forces here, and they are independent of shifts */
1444     move_construct_f(vsite->vsitecomm,f,cr);
1445   }
1446 }
1447
1448 static int *atom2cg(t_block *cgs)
1449 {
1450   int *a2cg,cg,i;
1451
1452   snew(a2cg,cgs->index[cgs->nr]);
1453   for(cg=0; cg<cgs->nr; cg++) {
1454     for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1455       a2cg[i] = cg;
1456   }
1457
1458   return a2cg;
1459 }
1460
1461 static int count_intercg_vsite(gmx_mtop_t *mtop)
1462 {
1463   int  mb,mt,ftype,nral,i,cg,a;
1464   gmx_molblock_t *molb;
1465   gmx_moltype_t *molt;
1466   int  *a2cg;
1467   t_ilist *il;
1468   t_iatom *ia;
1469   int  n_intercg_vsite;
1470
1471   n_intercg_vsite = 0;
1472   for(mb=0; mb<mtop->nmolblock; mb++) {
1473     molb = &mtop->molblock[mb];
1474     molt = &mtop->moltype[molb->type];
1475     a2cg = atom2cg(&molt->cgs);
1476     for(ftype=0; ftype<F_NRE; ftype++) {
1477       if (interaction_function[ftype].flags & IF_VSITE) {
1478         nral = NRAL(ftype);
1479         il = &molt->ilist[ftype];
1480         ia  = il->iatoms;
1481         for(i=0; i<il->nr; i+=1+nral) {
1482           cg = a2cg[ia[1+i]];
1483           for(a=1; a<nral; a++) {
1484             if (a2cg[ia[1+a]] != cg) {
1485               n_intercg_vsite += molb->nmol;
1486               break;
1487             }
1488           }
1489         }
1490       }
1491     }
1492     sfree(a2cg);
1493   }
1494
1495   return n_intercg_vsite;
1496 }
1497
1498 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1499                            t_atom *atom,t_mdatoms *md,
1500                            t_block *cgs,int *a2cg)
1501 {
1502   int  ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1503   t_ilist *il;
1504   t_iatom *ia;
1505   int  **vsite_pbc,*vsite_pbc_f;
1506   char *pbc_set;
1507   gmx_bool bViteOnlyCG_and_FirstAtom;
1508
1509   /* Make an array that tells if the pbc of an atom is set */
1510   snew(pbc_set,cgs->index[cgs->nr]);
1511   /* PBC is set for all non vsites */
1512   for(a=0; a<cgs->index[cgs->nr]; a++) {
1513     if ((atom && atom[a].ptype != eptVSite) ||
1514         (md   && md->ptype[a]  != eptVSite)) {
1515       pbc_set[a] = 1;
1516     }
1517   }
1518
1519   snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1520
1521   for(ftype=0; ftype<F_NRE; ftype++) {
1522     if (interaction_function[ftype].flags & IF_VSITE) {
1523       nral = NRAL(ftype);
1524       il = &ilist[ftype];
1525       ia  = il->iatoms;
1526
1527       snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1528       vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1529
1530       i = 0;
1531       while (i < il->nr) {
1532         vsi = i/(1+nral);
1533         vsite = ia[i+1];
1534         cg_v = a2cg[vsite];
1535         /* A value of -2 signals that this vsite and its contructing
1536          * atoms are all within the same cg, so no pbc is required.
1537          */
1538         vsite_pbc_f[vsi] = -2;
1539         /* Check if constructing atoms are outside the vsite's cg */
1540         nc3 = 0;
1541         if (ftype == F_VSITEN) {
1542           nc3 = 3*iparams[ia[i]].vsiten.n;
1543           for(j=0; j<nc3; j+=3) {
1544             if (a2cg[ia[i+j+2]] != cg_v)
1545               vsite_pbc_f[vsi] = -1;
1546           }
1547         } else {
1548           for(a=1; a<nral; a++) {
1549             if (a2cg[ia[i+1+a]] != cg_v)
1550               vsite_pbc_f[vsi] = -1;
1551           }
1552         }
1553         if (vsite_pbc_f[vsi] == -1) {
1554           /* Check if this is the first processed atom of a vsite only cg */
1555           bViteOnlyCG_and_FirstAtom = TRUE;
1556           for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1557             /* Non-vsites already have pbc set, so simply check for pbc_set */
1558             if (pbc_set[a]) {
1559               bViteOnlyCG_and_FirstAtom = FALSE;
1560               break;
1561             }
1562           }
1563           if (bViteOnlyCG_and_FirstAtom) {
1564             /* First processed atom of a vsite only charge group.
1565              * The pbc of the input coordinates to construct_vsites
1566              * should be preserved.
1567              */
1568             vsite_pbc_f[vsi] = vsite;
1569           } else if (cg_v != a2cg[ia[1+i+1]]) {
1570             /* This vsite has a different charge group index
1571              * than it's first constructing atom
1572              * and the charge group has more than one atom,
1573              * search for the first normal particle
1574              * or vsite that already had its pbc defined.
1575              * If nothing is found, use full pbc for this vsite.
1576              */
1577             for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1578               if (a != vsite && pbc_set[a]) {
1579                 vsite_pbc_f[vsi] = a;
1580                 if (gmx_debug_at)
1581                   fprintf(debug,"vsite %d match pbc with atom %d\n",
1582                           vsite+1,a+1);
1583                 break;
1584               }
1585             }
1586             if (gmx_debug_at)
1587               fprintf(debug,"vsite atom %d  cg %d - %d pbc atom %d\n",
1588                       vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1589                       vsite_pbc_f[vsi]+1);
1590           }
1591         }
1592         if (ftype == F_VSITEN) {
1593           /* The other entries in vsite_pbc_f are not used for center vsites */
1594           i += nc3;
1595         } else {
1596           i += 1+nral;
1597         }
1598
1599         /* This vsite now has its pbc defined */
1600         pbc_set[vsite] = 1;
1601       }
1602     }
1603   }
1604
1605   sfree(pbc_set);
1606
1607   return vsite_pbc;
1608 }
1609
1610 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr)
1611 {
1612   int nvsite,i;
1613   int *a2cg,cg;
1614   gmx_vsite_t *vsite;
1615   int mt;
1616   gmx_moltype_t *molt;
1617
1618   /* check if there are vsites */
1619   nvsite = 0;
1620   for(i=0; i<F_NRE; i++) {
1621     if (interaction_function[i].flags & IF_VSITE) {
1622       nvsite += gmx_mtop_ftype_count(mtop,i);
1623     }
1624   }
1625
1626   if (nvsite == 0) {
1627     return NULL;
1628   }
1629
1630   snew(vsite,1);
1631
1632   vsite->n_intercg_vsite = count_intercg_vsite(mtop);
1633
1634   if (vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr)) {
1635     vsite->nvsite_pbc_molt = mtop->nmoltype;
1636     snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1637     for(mt=0; mt<mtop->nmoltype; mt++) {
1638       molt = &mtop->moltype[mt];
1639       /* Make an atom to charge group index */
1640       a2cg = atom2cg(&molt->cgs);
1641       vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1642                                                 molt->ilist,
1643                                                 molt->atoms.atom,NULL,
1644                                                 &molt->cgs,a2cg);
1645       sfree(a2cg);
1646     }
1647
1648     snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1649     snew(vsite->vsite_pbc_loc       ,F_VSITEN-F_VSITE2+1);
1650   }
1651
1652
1653   return vsite;
1654 }
1655
1656 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
1657                    t_commrec *cr)
1658 {
1659   int *a2cg;
1660
1661   /* Make an atom to charge group index */
1662   a2cg = atom2cg(&top->cgs);
1663
1664   if (vsite->n_intercg_vsite > 0) {
1665     vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
1666                                          top->idef.il,NULL,md,
1667                                          &top->cgs,a2cg);
1668
1669     if (PARTDECOMP(cr)) {
1670       snew(vsite->vsitecomm,1);
1671                 vsite->bPDvsitecomm =
1672                 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
1673     }
1674   }
1675
1676   sfree(a2cg);
1677 }