src/mdlib/clincs.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15  *
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 /* This file is completely threadsafe - keep it that way! */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "main.h"
  43 #include "constr.h"
  44 #include "copyrite.h"
  45 #include "physics.h"
  46 #include "vec.h"
  47 #include "pbc.h"
  48 #include "smalloc.h"
  49 #include "mdrun.h"
  50 #include "nrnb.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54 #include "gmxfio.h"
  55 #include "gmx_omp_nthreads.h"
  56
  57 typedef struct {
  58     int b0;           /* first constraint for this thread */
  59     int b1;           /* b1-1 is the last constraint for this thread */
  60     int nind;         /* number of indices */
  61     int *ind;         /* constraint index for updating atom data */
  62     int nind_r;       /* number of indices */
  63     int *ind_r;       /* constraint index for updating atom data */
  64     int ind_nalloc;   /* allocation size of ind and ind_r */
  65 } lincs_thread_t;
  66
  67 typedef struct gmx_lincsdata {
  68     int  ncg;         /* the global number of constraints */
  69     int  ncg_flex;    /* the global number of flexible constraints */
  70     int  ncg_triangle;/* the global number of constraints in triangles */
  71     int  nIter;       /* the number of iterations */
  72     int  nOrder;      /* the order of the matrix expansion */
  73     int  nc;          /* the number of constraints */
  74     int  nc_alloc;    /* the number we allocated memory for */
  75     int  ncc;         /* the number of constraint connections */
  76     int  ncc_alloc;   /* the number we allocated memory for */
  77     real matlam;      /* the FE lambda value used for filling blc and blmf */
  78     real *bllen0;     /* the reference distance in topology A */
  79     real *ddist;      /* the reference distance in top B - the r.d. in top A */
  80     int  *bla;        /* the atom pairs involved in the constraints */
  81     real *blc;        /* 1/sqrt(invmass1 + invmass2) */
  82     real *blc1;       /* as blc, but with all masses 1 */
  83     int  *blnr;       /* index into blbnb and blmf */
  84     int  *blbnb;      /* list of constraint connections */
  85     int  ntriangle;   /* the local number of constraints in triangles */
  86     int  *triangle;   /* the list of triangle constraints */
  87     int  *tri_bits;   /* the bits tell if the matrix element should be used */
  88     int  ncc_triangle;/* the number of constraint connections in triangles */
  89     real *blmf;       /* matrix of mass factors for constraint connections */
  90     real *blmf1;      /* as blmf, but with all masses 1 */
  91     real *bllen;      /* the reference bond length */
  92     int  nth;         /* The number of threads doing LINCS */
  93     lincs_thread_t *th; /* LINCS thread division */
  94     unsigned *atf;    /* atom flags for thread parallelization */
  95     int  atf_nalloc;  /* allocation size of atf */
  96     /* arrays for temporary storage in the LINCS algorithm */
  97     rvec *tmpv;
  98     real *tmpncc;
  99     real *tmp1;
 100     real *tmp2;
 101     real *tmp3;
 102     real *tmp4;
 103     real *mlambda;  /* the Lagrange multipliers * -1 */
 104     /* storage for the constraint RMS relative deviation output */
 105     real rmsd_data[3];
 106 } t_gmx_lincsdata;
 107
 108 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 109 {
 110     return lincsd->rmsd_data;
 111 }
 112
 113 real lincs_rmsd(struct gmx_lincsdata *lincsd,gmx_bool bSD2)
 114 {
 115     if (lincsd->rmsd_data[0] > 0)
 116     {
 117         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 118     }
 119     else
 120     {
 121         return 0;
 122     }
 123 }
 124
 125 /* Do a set of nrec LINCS matrix multiplications.
 126  * This function will return with up to date thread-local
 127  * constraint data, without an OpenMP barrier.
 128  */
 129 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 130                                 int b0,int b1,
 131                                 const real *blcc,
 132                                 real *rhs1,real *rhs2,real *sol)
 133 {
 134     int  nrec,rec,b,j,n,nr0,nr1;
 135     real mvb,*swap;
 136     int  ntriangle,tb,bits;
 137     const int *blnr=lincsd->blnr,*blbnb=lincsd->blbnb;
 138     const int *triangle=lincsd->triangle,*tri_bits=lincsd->tri_bits;
 139
 140     ntriangle = lincsd->ntriangle;
 141     nrec      = lincsd->nOrder;
 142
 143     for(rec=0; rec<nrec; rec++)
 144     {
 145 #pragma omp barrier
 146         for(b=b0; b<b1; b++)
 147         {
 148             mvb = 0;
 149             for(n=blnr[b]; n<blnr[b+1]; n++)
 150             {
 151                 j = blbnb[n];
 152                 mvb = mvb + blcc[n]*rhs1[j];
 153             }
 154             rhs2[b] = mvb;
 155             sol[b]  = sol[b] + mvb;
 156         }
 157         swap = rhs1;
 158         rhs1 = rhs2;
 159         rhs2 = swap;
 160     } /* nrec*(ncons+2*nrtot) flops */
 161
 162     if (ntriangle > 0)
 163     {
 164         /* Perform an extra nrec recursions for only the constraints
 165          * involved in rigid triangles.
 166          * In this way their accuracy should come close to those of the other
 167          * constraints, since traingles of constraints can produce eigenvalues
 168          * around 0.7, while the effective eigenvalue for bond constraints
 169          * is around 0.4 (and 0.7*0.7=0.5).
 170          */
 171         /* We need to copy the temporary array, since only the elements
 172          * for constraints involved in triangles are updated and then
 173          * the pointers are swapped. This saving copying the whole arrary.
 174          * We need barrier as other threads might still be reading from rhs2.
 175          */
 176 #pragma omp barrier
 177         for(b=b0; b<b1; b++)
 178         {
 179             rhs2[b] = rhs1[b];
 180         }
 181 #pragma omp barrier
 182 #pragma omp master
 183         {
 184             for(rec=0; rec<nrec; rec++)
 185             {
 186                 for(tb=0; tb<ntriangle; tb++)
 187                 {
 188                     b    = triangle[tb];
 189                     bits = tri_bits[tb];
 190                     mvb = 0;
 191                     nr0 = blnr[b];
 192                     nr1 = blnr[b+1];
 193                     for(n=nr0; n<nr1; n++)
 194                     {
 195                         if (bits & (1<<(n-nr0)))
 196                         {
 197                             j = blbnb[n];
 198                             mvb = mvb + blcc[n]*rhs1[j];
 199                         }
 200                     }
 201                     rhs2[b] = mvb;
 202                     sol[b]  = sol[b] + mvb;
 203                 }
 204                 swap = rhs1;
 205                 rhs1 = rhs2;
 206                 rhs2 = swap;
 207             }
 208         } /* flops count is missing here */
 209
 210         /* We need a barrier here as the calling routine will continue
 211          * to operate on the thread-local constraints without barrier.
 212          */
 213 #pragma omp barrier
 214     }
 215 }
 216
 217 static void lincs_update_atoms_noind(int ncons,const int *bla,
 218                                      real prefac,
 219                                      const real *fac,rvec *r,
 220                                      const real *invmass,
 221                                      rvec *x)
 222 {
 223     int  b,i,j;
 224     real mvb,im1,im2,tmp0,tmp1,tmp2;
 225
 226     for(b=0; b<ncons; b++)
 227     {
 228         i = bla[2*b];
 229         j = bla[2*b+1];
 230         mvb = prefac*fac[b];
 231         im1 = invmass[i];
 232         im2 = invmass[j];
 233         tmp0 = r[b][0]*mvb;
 234         tmp1 = r[b][1]*mvb;
 235         tmp2 = r[b][2]*mvb;
 236         x[i][0] -= tmp0*im1;
 237         x[i][1] -= tmp1*im1;
 238         x[i][2] -= tmp2*im1;
 239         x[j][0] += tmp0*im2;
 240         x[j][1] += tmp1*im2;
 241         x[j][2] += tmp2*im2;
 242     } /* 16 ncons flops */
 243 }
 244
 245 static void lincs_update_atoms_ind(int ncons,const int *ind,const int *bla,
 246                                    real prefac,
 247                                    const real *fac,rvec *r,
 248                                    const real *invmass,
 249                                    rvec *x)
 250 {
 251     int  bi,b,i,j;
 252     real mvb,im1,im2,tmp0,tmp1,tmp2;
 253
 254     for(bi=0; bi<ncons; bi++)
 255     {
 256         b = ind[bi];
 257         i = bla[2*b];
 258         j = bla[2*b+1];
 259         mvb = prefac*fac[b];
 260         im1 = invmass[i];
 261         im2 = invmass[j];
 262         tmp0 = r[b][0]*mvb;
 263         tmp1 = r[b][1]*mvb;
 264         tmp2 = r[b][2]*mvb;
 265         x[i][0] -= tmp0*im1;
 266         x[i][1] -= tmp1*im1;
 267         x[i][2] -= tmp2*im1;
 268         x[j][0] += tmp0*im2;
 269         x[j][1] += tmp1*im2;
 270         x[j][2] += tmp2*im2;
 271     } /* 16 ncons flops */
 272 }
 273
 274 static void lincs_update_atoms(struct gmx_lincsdata *li,int th,
 275                                real prefac,
 276                                const real *fac,rvec *r,
 277                                const real *invmass,
 278                                rvec *x)
 279 {
 280     if (li->nth == 1)
 281     {
 282         /* Single thread, we simply update for all constraints */
 283         lincs_update_atoms_noind(li->nc,li->bla,prefac,fac,r,invmass,x);
 284     }
 285     else
 286     {
 287         /* Update the atom vector components for our thread local
 288          * constraints that only access our local atom range.
 289          * This can be done without a barrier.
 290          */
 291         lincs_update_atoms_ind(li->th[th].nind,li->th[th].ind,
 292                                li->bla,prefac,fac,r,invmass,x);
 293
 294         if (li->th[li->nth].nind > 0)
 295         {
 296             /* Update the constraints that operate on atoms
 297              * in multiple thread atom blocks on the master thread.
 298              */
 299 #pragma omp barrier
 300 #pragma omp master
 301             {
 302                 lincs_update_atoms_ind(li->th[li->nth].nind,
 303                                        li->th[li->nth].ind,
 304                                        li->bla,prefac,fac,r,invmass,x);
 305             }
 306         }
 307     }
 308 }
 309
 310 static void do_lincsp(rvec *x,rvec *f,rvec *fp,t_pbc *pbc,
 311                       struct gmx_lincsdata *lincsd,real *invmass,
 312                       int econq,real *dvdlambda,
 313                       gmx_bool bCalcVir,tensor rmdf)
 314 {
 315     int     b,i,j,k,n;
 316     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,wfac,lam;
 317     rvec    dx;
 318     int     ncons,*bla,*blnr,*blbnb;
 319     rvec    *r;
 320     real    *blc,*blmf,*blcc,*rhs1,*rhs2,*sol;
 321
 322     ncons  = lincsd->nc;
 323     bla    = lincsd->bla;
 324     r      = lincsd->tmpv;
 325     blnr   = lincsd->blnr;
 326     blbnb  = lincsd->blbnb;
 327     if (econq != econqForce)
 328     {
 329         /* Use mass-weighted parameters */
 330         blc  = lincsd->blc;
 331         blmf = lincsd->blmf;
 332     }
 333     else
 334     {
 335         /* Use non mass-weighted parameters */
 336         blc  = lincsd->blc1;
 337         blmf = lincsd->blmf1;
 338     }
 339     blcc   = lincsd->tmpncc;
 340     rhs1   = lincsd->tmp1;
 341     rhs2   = lincsd->tmp2;
 342     sol    = lincsd->tmp3;
 343
 344     if (econq != econqForce)
 345     {
 346         dvdlambda = NULL;
 347     }
 348
 349     /* Compute normalized i-j vectors */
 350     if (pbc)
 351     {
 352         for(b=0; b<ncons; b++)
 353         {
 354             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 355             unitv(dx,r[b]);
 356         }
 357     }
 358     else
 359     {
 360         for(b=0; b<ncons; b++)
 361         {
 362             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
 363             unitv(dx,r[b]);
 364         } /* 16 ncons flops */
 365     }
 366
 367     for(b=0; b<ncons; b++)
 368     {
 369         tmp0 = r[b][0];
 370         tmp1 = r[b][1];
 371         tmp2 = r[b][2];
 372         i = bla[2*b];
 373         j = bla[2*b+1];
 374         for(n=blnr[b]; n<blnr[b+1]; n++)
 375         {
 376             k = blbnb[n];
 377             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 378         } /* 6 nr flops */
 379         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 380                       tmp1*(f[i][1] - f[j][1]) +
 381                       tmp2*(f[i][2] - f[j][2]));
 382         rhs1[b] = mvb;
 383         sol[b]  = mvb;
 384         /* 7 flops */
 385     }
 386     /* Together: 23*ncons + 6*nrtot flops */
 387
 388     lincs_matrix_expand(lincsd,0,ncons,blcc,rhs1,rhs2,sol);
 389     /* nrec*(ncons+2*nrtot) flops */
 390
 391     if (econq != econqForce)
 392     {
 393         for(b=0; b<ncons; b++)
 394         {
 395             /* With econqDeriv_FlexCon only use the flexible constraints */
 396             if (econq != econqDeriv_FlexCon ||
 397                 (lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 398             {
 399                 i = bla[2*b];
 400                 j = bla[2*b+1];
 401                 mvb = blc[b]*sol[b];
 402                 im1 = invmass[i];
 403                 im2 = invmass[j];
 404                 tmp0 = r[b][0]*mvb;
 405                 tmp1 = r[b][1]*mvb;
 406                 tmp2 = r[b][2]*mvb;
 407                 fp[i][0] -= tmp0*im1;
 408                 fp[i][1] -= tmp1*im1;
 409                 fp[i][2] -= tmp2*im1;
 410                 fp[j][0] += tmp0*im2;
 411                 fp[j][1] += tmp1*im2;
 412                 fp[j][2] += tmp2*im2;
 413                 if (dvdlambda)
 414                 {
 415                     /* This is only correct with forces and invmass=1 */
 416                     *dvdlambda -= mvb*lincsd->ddist[b];
 417                 }
 418             }
 419         } /* 16 ncons flops */
 420     }
 421     else
 422     {
 423         for(b=0; b<ncons; b++)
 424         {
 425             i = bla[2*b];
 426             j = bla[2*b+1];
 427             mvb = blc[b]*sol[b];
 428             tmp0 = r[b][0]*mvb;
 429             tmp1 = r[b][1]*mvb;
 430             tmp2 = r[b][2]*mvb;
 431             fp[i][0] -= tmp0;
 432             fp[i][1] -= tmp1;
 433             fp[i][2] -= tmp2;
 434             fp[j][0] += tmp0;
 435             fp[j][1] += tmp1;
 436             fp[j][2] += tmp2;
 437             if (dvdlambda)
 438             {
 439                 *dvdlambda -= mvb*lincsd->ddist[b];
 440             }
 441         }
 442         /* 10 ncons flops */
 443     }
 444
 445     if (bCalcVir)
 446     {
 447         /* Constraint virial,
 448          * determines sum r_bond x delta f,
 449          * where delta f is the constraint correction
 450          * of the quantity that is being constrained.
 451          */
 452         for(b=0; b<ncons; b++)
 453         {
 454             mvb = lincsd->bllen[b]*blc[b]*sol[b];
 455             for(i=0; i<DIM; i++)
 456             {
 457                 tmp1 = mvb*r[b][i];
 458                 for(j=0; j<DIM; j++)
 459                 {
 460                     rmdf[i][j] += tmp1*r[b][j];
 461                 }
 462             }
 463         } /* 23 ncons flops */
 464     }
 465 }
 466
 467 static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
 468                      struct gmx_lincsdata *lincsd,int th,
 469                      real *invmass,
 470                                          t_commrec *cr,
 471                      gmx_bool bCalcLambda,
 472                      real wangle,int *warn,
 473                      real invdt,rvec *v,
 474                      gmx_bool bCalcVir,tensor rmdr)
 475 {
 476     int     b0,b1,b,i,j,k,n,iter;
 477     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac;
 478     rvec    dx;
 479     int     ncons,*bla,*blnr,*blbnb;
 480     rvec    *r;
 481     real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*blc_sol,*mlambda;
 482     int     *nlocat;
 483
 484     b0 = lincsd->th[th].b0;
 485     b1 = lincsd->th[th].b1;
 486
 487     ncons  = lincsd->nc;
 488     bla    = lincsd->bla;
 489     r      = lincsd->tmpv;
 490     blnr   = lincsd->blnr;
 491     blbnb  = lincsd->blbnb;
 492     blc    = lincsd->blc;
 493     blmf   = lincsd->blmf;
 494     bllen  = lincsd->bllen;
 495     blcc   = lincsd->tmpncc;
 496     rhs1   = lincsd->tmp1;
 497     rhs2   = lincsd->tmp2;
 498     sol    = lincsd->tmp3;
 499     blc_sol= lincsd->tmp4;
 500     mlambda= lincsd->mlambda;
 501
 502     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 503     {
 504         nlocat = dd_constraints_nlocalatoms(cr->dd);
 505     }
 506     else if (PARTDECOMP(cr))
 507     {
 508         nlocat = pd_constraints_nlocalatoms(cr->pd);
 509     }
 510     else
 511     {
 512         nlocat = NULL;
 513     }
 514
 515     *warn = 0;
 516
 517     if (pbc)
 518     {
 519         /* Compute normalized i-j vectors */
 520         for(b=b0; b<b1; b++)
 521         {
 522             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 523             unitv(dx,r[b]);
 524         }
 525 #pragma omp barrier
 526         for(b=b0; b<b1; b++)
 527         {
 528             for(n=blnr[b]; n<blnr[b+1]; n++)
 529             {
 530                 blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
 531             }
 532             pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 533             mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
 534             rhs1[b] = mvb;
 535             sol[b]  = mvb;
 536         }
 537     }
 538     else
 539     {
 540         /* Compute normalized i-j vectors */
 541         for(b=b0; b<b1; b++)
 542         {
 543             i = bla[2*b];
 544             j = bla[2*b+1];
 545             tmp0 = x[i][0] - x[j][0];
 546             tmp1 = x[i][1] - x[j][1];
 547             tmp2 = x[i][2] - x[j][2];
 548             rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 549             r[b][0] = rlen*tmp0;
 550             r[b][1] = rlen*tmp1;
 551             r[b][2] = rlen*tmp2;
 552         } /* 16 ncons flops */
 553
 554 #pragma omp barrier
 555         for(b=b0; b<b1; b++)
 556         {
 557             tmp0 = r[b][0];
 558             tmp1 = r[b][1];
 559             tmp2 = r[b][2];
 560             len = bllen[b];
 561             i = bla[2*b];
 562             j = bla[2*b+1];
 563             for(n=blnr[b]; n<blnr[b+1]; n++)
 564             {
 565                 k = blbnb[n];
 566                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 567             } /* 6 nr flops */
 568             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 569                           tmp1*(xp[i][1] - xp[j][1]) +
 570                           tmp2*(xp[i][2] - xp[j][2]) - len);
 571             rhs1[b] = mvb;
 572             sol[b]  = mvb;
 573             /* 10 flops */
 574         }
 575         /* Together: 26*ncons + 6*nrtot flops */
 576     }
 577
 578     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 579     /* nrec*(ncons+2*nrtot) flops */
 580
 581     for(b=b0; b<b1; b++)
 582     {
 583         mlambda[b] = blc[b]*sol[b];
 584     }
 585
 586     /* Update the coordinates */
 587     lincs_update_atoms(lincsd,th,1.0,mlambda,r,invmass,xp);
 588
 589     /*
 590      ********  Correction for centripetal effects  ********
 591      */
 592
 593     wfac = cos(DEG2RAD*wangle);
 594     wfac = wfac*wfac;
 595
 596     for(iter=0; iter<lincsd->nIter; iter++)
 597     {
 598         if ((DOMAINDECOMP(cr) && cr->dd->constraints) ||
 599             PARTDECOMP(cr))
 600         {
 601 #pragma omp barrier
 602 #pragma omp master
 603             {
 604                  /* Communicate the corrected non-local coordinates */
 605                 if (DOMAINDECOMP(cr))
 606                 {
 607                     dd_move_x_constraints(cr->dd,box,xp,NULL);
 608                 }
 609                 else
 610                 {
 611                     pd_move_x_constraints(cr,xp,NULL);
 612                 }
 613             }
 614         }
 615
 616 #pragma omp barrier
 617         for(b=b0; b<b1; b++)
 618         {
 619             len = bllen[b];
 620             if (pbc)
 621             {
 622                 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 623             }
 624             else
 625             {
 626                 rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
 627             }
 628             len2 = len*len;
 629             dlen2 = 2*len2 - norm2(dx);
 630             if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
 631             {
 632                 *warn = b;
 633             }
 634             if (dlen2 > 0)
 635             {
 636                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 637             }
 638             else
 639             {
 640                 mvb = blc[b]*len;
 641             }
 642             rhs1[b] = mvb;
 643             sol[b]  = mvb;
 644         } /* 20*ncons flops */
 645
 646         lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 647         /* nrec*(ncons+2*nrtot) flops */
 648
 649         for(b=b0; b<b1; b++)
 650         {
 651             mvb = blc[b]*sol[b];
 652             blc_sol[b]  = mvb;
 653             mlambda[b] += mvb;
 654         }
 655
 656         /* Update the coordinates */
 657         lincs_update_atoms(lincsd,th,1.0,blc_sol,r,invmass,xp);
 658     }
 659     /* nit*ncons*(37+9*nrec) flops */
 660
 661     if (v != NULL)
 662     {
 663         /* Update the velocities */
 664         lincs_update_atoms(lincsd,th,invdt,mlambda,r,invmass,v);
 665         /* 16 ncons flops */
 666     }
 667
 668     if (nlocat && bCalcLambda)
 669     {
 670         /* Only account for local atoms */
 671         for(b=b0; b<b1; b++)
 672         {
 673             mlambda[b] *= 0.5*nlocat[b];
 674         }
 675 #pragma omp barrier
 676     }
 677
 678     if (bCalcVir)
 679     {
 680 #pragma omp master
 681         {
 682             /* Constraint virial */
 683             for(b=0; b<ncons; b++)
 684             {
 685                 tmp0 = -bllen[b]*mlambda[b];
 686                 for(i=0; i<DIM; i++)
 687                 {
 688                     tmp1 = tmp0*r[b][i];
 689                     for(j=0; j<DIM; j++)
 690                     {
 691                         rmdr[i][j] -= tmp1*r[b][j];
 692                     }
 693                 }
 694             } /* 22 ncons flops */
 695         }
 696     }
 697
 698     /* Total:
 699      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 700      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 701      *
 702      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 703      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 704      * if nit=1
 705      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 706      */
 707 }
 708
 709 void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda)
 710 {
 711     int i,a1,a2,n,k,sign,center;
 712     int end,nk,kk;
 713     const real invsqrt2=0.7071067811865475244;
 714
 715     for(i=0; (i<li->nc); i++)
 716     {
 717         a1 = li->bla[2*i];
 718         a2 = li->bla[2*i+1];
 719         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 720         li->blc1[i] = invsqrt2;
 721     }
 722
 723     /* Construct the coupling coefficient matrix blmf */
 724     li->ntriangle = 0;
 725     li->ncc_triangle = 0;
 726     for(i=0; (i<li->nc); i++)
 727     {
 728         a1 = li->bla[2*i];
 729         a2 = li->bla[2*i+1];
 730         for(n=li->blnr[i]; (n<li->blnr[i+1]); n++)
 731         {
 732             k = li->blbnb[n];
 733             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 734             {
 735                 sign = -1;
 736             }
 737             else
 738             {
 739                 sign = 1;
 740             }
 741             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 742             {
 743                 center = a1;
 744                 end    = a2;
 745             }
 746             else
 747             {
 748                 center = a2;
 749                 end    = a1;
 750             }
 751             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 752             li->blmf1[n] = sign*0.5;
 753             if (li->ncg_triangle > 0)
 754             {
 755                 /* Look for constraint triangles */
 756                 for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++)
 757                 {
 758                     kk = li->blbnb[nk];
 759                     if (kk != i && kk != k &&
 760                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 761                     {
 762                         if (li->ntriangle == 0 ||
 763                             li->triangle[li->ntriangle-1] < i)
 764                         {
 765                             /* Add this constraint to the triangle list */
 766                             li->triangle[li->ntriangle] = i;
 767                             li->tri_bits[li->ntriangle] = 0;
 768                             li->ntriangle++;
 769                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 770                             {
 771                                 gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 772                                           li->blnr[i+1] - li->blnr[i],
 773                                           sizeof(li->tri_bits[0])*8-1);
 774                             }
 775                         }
 776                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 777                         li->ncc_triangle++;
 778                     }
 779                 }
 780             }
 781         }
 782     }
 783
 784     if (debug)
 785     {
 786         fprintf(debug,"Of the %d constraints %d participate in triangles\n",
 787                 li->nc,li->ntriangle);
 788         fprintf(debug,"There are %d couplings of which %d in triangles\n",
 789                 li->ncc,li->ncc_triangle);
 790     }
 791
 792     /* Set matlam,
 793      * so we know with which lambda value the masses have been set.
 794      */
 795     li->matlam = lambda;
 796 }
 797
 798 static int count_triangle_constraints(t_ilist *ilist,t_blocka *at2con)
 799 {
 800     int  ncon1,ncon_tot;
 801     int  c0,a00,a01,n1,c1,a10,a11,ac1,n2,c2,a20,a21;
 802     int  ncon_triangle;
 803     gmx_bool bTriangle;
 804     t_iatom *ia1,*ia2,*iap;
 805
 806     ncon1    = ilist[F_CONSTR].nr/3;
 807     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 808
 809     ia1 = ilist[F_CONSTR].iatoms;
 810     ia2 = ilist[F_CONSTRNC].iatoms;
 811
 812     ncon_triangle = 0;
 813     for(c0=0; c0<ncon_tot; c0++)
 814     {
 815         bTriangle = FALSE;
 816         iap = constr_iatomptr(ncon1,ia1,ia2,c0);
 817         a00 = iap[1];
 818         a01 = iap[2];
 819         for(n1=at2con->index[a01]; n1<at2con->index[a01+1]; n1++)
 820         {
 821             c1 = at2con->a[n1];
 822             if (c1 != c0)
 823             {
 824                 iap = constr_iatomptr(ncon1,ia1,ia2,c1);
 825                 a10 = iap[1];
 826                 a11 = iap[2];
 827                 if (a10 == a01)
 828                 {
 829                     ac1 = a11;
 830                 }
 831                 else
 832                 {
 833                     ac1 = a10;
 834                 }
 835                 for(n2=at2con->index[ac1]; n2<at2con->index[ac1+1]; n2++)
 836                 {
 837                     c2 = at2con->a[n2];
 838                     if (c2 != c0 && c2 != c1)
 839                     {
 840                         iap = constr_iatomptr(ncon1,ia1,ia2,c2);
 841                         a20 = iap[1];
 842                         a21 = iap[2];
 843                         if (a20 == a00 || a21 == a00)
 844                         {
 845                             bTriangle = TRUE;
 846                         }
 847                     }
 848                 }
 849             }
 850         }
 851         if (bTriangle)
 852         {
 853             ncon_triangle++;
 854         }
 855     }
 856
 857     return ncon_triangle;
 858 }
 859
 860 static int int_comp(const void *a,const void *b)
 861 {
 862     return (*(int *)a) - (*(int *)b);
 863 }
 864
 865 gmx_lincsdata_t init_lincs(FILE *fplog,gmx_mtop_t *mtop,
 866                            int nflexcon_global,t_blocka *at2con,
 867                            gmx_bool bPLINCS,int nIter,int nProjOrder)
 868 {
 869     struct gmx_lincsdata *li;
 870     int mb;
 871     gmx_moltype_t *molt;
 872
 873     if (fplog)
 874     {
 875         fprintf(fplog,"\nInitializing%s LINear Constraint Solver\n",
 876                 bPLINCS ? " Parallel" : "");
 877     }
 878
 879     snew(li,1);
 880
 881     li->ncg      =
 882         gmx_mtop_ftype_count(mtop,F_CONSTR) +
 883         gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 884     li->ncg_flex = nflexcon_global;
 885
 886     li->ncg_triangle = 0;
 887     for(mb=0; mb<mtop->nmolblock; mb++)
 888     {
 889         molt = &mtop->moltype[mtop->molblock[mb].type];
 890         li->ncg_triangle +=
 891             mtop->molblock[mb].nmol*
 892             count_triangle_constraints(molt->ilist,
 893                                        &at2con[mtop->molblock[mb].type]);
 894     }
 895
 896     li->nIter  = nIter;
 897     li->nOrder = nProjOrder;
 898
 899     /* LINCS can run on any number of threads.
 900      * Currently the number is fixed for the whole simulation,
 901      * but it could be set in set_lincs().
 902      */
 903     li->nth = gmx_omp_nthreads_get(emntLINCS);
 904     if (li->nth == 1)
 905     {
 906         snew(li->th,1);
 907     }
 908     else
 909     {
 910         /* Allocate an extra elements for "thread-overlap" constraints */
 911         snew(li->th,li->nth+1);
 912     }
 913     if (debug)
 914     {
 915         fprintf(debug,"LINCS: using %d threads\n",li->nth);
 916     }
 917
 918     if (bPLINCS || li->ncg_triangle > 0)
 919     {
 920         please_cite(fplog,"Hess2008a");
 921     }
 922     else
 923     {
 924         please_cite(fplog,"Hess97a");
 925     }
 926
 927     if (fplog)
 928     {
 929         fprintf(fplog,"The number of constraints is %d\n",li->ncg);
 930         if (bPLINCS)
 931         {
 932             fprintf(fplog,"There are inter charge-group constraints,\n"
 933                     "will communicate selected coordinates each lincs iteration\n");
 934         }
 935         if (li->ncg_triangle > 0)
 936         {
 937             fprintf(fplog,
 938                     "%d constraints are involved in constraint triangles,\n"
 939                     "will apply an additional matrix expansion of order %d for couplings\n"
 940                     "between constraints inside triangles\n",
 941                     li->ncg_triangle,li->nOrder);
 942         }
 943     }
 944
 945     return li;
 946 }
 947
 948 /* Sets up the work division over the threads */
 949 static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms)
 950 {
 951     lincs_thread_t *li_m;
 952     int th;
 953     unsigned *atf;
 954     int a;
 955
 956     if (natoms > li->atf_nalloc)
 957     {
 958         li->atf_nalloc = over_alloc_large(natoms);
 959         srenew(li->atf,li->atf_nalloc);
 960     }
 961
 962     atf = li->atf;
 963     /* Clear the atom flags */
 964     for(a=0; a<natoms; a++)
 965     {
 966         atf[a] = 0;
 967     }
 968
 969     for(th=0; th<li->nth; th++)
 970     {
 971         lincs_thread_t *li_th;
 972         int b;
 973
 974         li_th = &li->th[th];
 975
 976         /* The constraints are divided equally over the threads */
 977         li_th->b0 = (li->nc* th   )/li->nth;
 978         li_th->b1 = (li->nc*(th+1))/li->nth;
 979
 980         if (th < sizeof(*atf)*8)
 981         {
 982             /* For each atom set a flag for constraints from each */
 983             for(b=li_th->b0; b<li_th->b1; b++)
 984             {
 985                 atf[li->bla[b*2]  ] |= (1U<<th);
 986                 atf[li->bla[b*2+1]] |= (1U<<th);
 987             }
 988         }
 989     }
 990
 991 #pragma omp parallel for num_threads(li->nth) schedule(static)
 992     for(th=0; th<li->nth; th++)
 993     {
 994         lincs_thread_t *li_th;
 995         unsigned mask;
 996         int b;
 997
 998         li_th = &li->th[th];
 999
1000         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1001         {
1002             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1003             srenew(li_th->ind,li_th->ind_nalloc);
1004             srenew(li_th->ind_r,li_th->ind_nalloc);
1005         }
1006
1007         if (th < sizeof(*atf)*8)
1008         {
1009             mask = (1U<<th) - 1U;
1010
1011             li_th->nind   = 0;
1012             li_th->nind_r = 0;
1013             for(b=li_th->b0; b<li_th->b1; b++)
1014             {
1015                 /* We let the constraint with the lowest thread index
1016                  * operate on atoms with constraints from multiple threads.
1017                  */
1018                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1019                     ((atf[li->bla[b*2+1]] & mask) == 0))
1020                 {
1021                     /* Add the constraint to the local atom update index */
1022                     li_th->ind[li_th->nind++] = b;
1023                 }
1024                 else
1025                 {
1026                     /* Add the constraint to the rest block */
1027                     li_th->ind_r[li_th->nind_r++] = b;
1028                 }
1029             }
1030         }
1031         else
1032         {
1033             /* We are out of bits, assign all constraints to rest */
1034             for(b=li_th->b0; b<li_th->b1; b++)
1035             {
1036                 li_th->ind_r[li_th->nind_r++] = b;
1037             }
1038         }
1039     }
1040
1041     /* We need to copy all constraints which have not be assigned
1042      * to a thread to a separate list which will be handled by one thread.
1043      */
1044     li_m = &li->th[li->nth];
1045
1046     li_m->nind = 0;
1047     for(th=0; th<li->nth; th++)
1048     {
1049         lincs_thread_t *li_th;
1050         int b;
1051
1052         li_th   = &li->th[th];
1053
1054         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1055         {
1056             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1057             srenew(li_m->ind,li_m->ind_nalloc);
1058         }
1059
1060         for(b=0; b<li_th->nind_r; b++)
1061         {
1062             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1063         }
1064
1065         if (debug)
1066         {
1067             fprintf(debug,"LINCS thread %d: %d constraints\n",
1068                     th,li_th->nind);
1069         }
1070     }
1071
1072     if (debug)
1073     {
1074         fprintf(debug,"LINCS thread r: %d constraints\n",
1075                 li_m->nind);
1076     }
1077 }
1078
1079
1080 void set_lincs(t_idef *idef,t_mdatoms *md,
1081                gmx_bool bDynamics,t_commrec *cr,
1082                struct gmx_lincsdata *li)
1083 {
1084     int      start,natoms,nflexcon;
1085     t_blocka at2con;
1086     t_iatom  *iatom;
1087     int      i,k,ncc_alloc,ni,con,nconnect,concon;
1088     int      type,a1,a2;
1089     real     lenA=0,lenB;
1090     gmx_bool     bLocal;
1091
1092     li->nc = 0;
1093     li->ncc = 0;
1094     /* Zero the thread index ranges.
1095      * Otherwise without local constraints we could return with old ranges.
1096      */
1097     for(i=0; i<li->nth; i++)
1098     {
1099         li->th[i].b0   = 0;
1100         li->th[i].b1   = 0;
1101         li->th[i].nind = 0;
1102     }
1103     if (li->nth > 1)
1104     {
1105         li->th[li->nth].nind = 0;
1106     }
1107
1108     /* This is the local topology, so there are only F_CONSTR constraints */
1109     if (idef->il[F_CONSTR].nr == 0)
1110     {
1111         /* There are no constraints,
1112          * we do not need to fill any data structures.
1113          */
1114         return;
1115     }
1116
1117     if (debug)
1118     {
1119         fprintf(debug,"Building the LINCS connectivity\n");
1120     }
1121
1122     if (DOMAINDECOMP(cr))
1123     {
1124         if (cr->dd->constraints)
1125         {
1126             dd_get_constraint_range(cr->dd,&start,&natoms);
1127         }
1128         else
1129         {
1130             natoms = cr->dd->nat_home;
1131         }
1132         start = 0;
1133     }
1134     else if(PARTDECOMP(cr))
1135         {
1136                 pd_get_constraint_range(cr->pd,&start,&natoms);
1137         }
1138         else
1139     {
1140         start  = md->start;
1141         natoms = md->homenr;
1142     }
1143     at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
1144                          &nflexcon);
1145
1146
1147     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1148     {
1149         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1150         srenew(li->bllen0,li->nc_alloc);
1151         srenew(li->ddist,li->nc_alloc);
1152         srenew(li->bla,2*li->nc_alloc);
1153         srenew(li->blc,li->nc_alloc);
1154         srenew(li->blc1,li->nc_alloc);
1155         srenew(li->blnr,li->nc_alloc+1);
1156         srenew(li->bllen,li->nc_alloc);
1157         srenew(li->tmpv,li->nc_alloc);
1158         srenew(li->tmp1,li->nc_alloc);
1159         srenew(li->tmp2,li->nc_alloc);
1160         srenew(li->tmp3,li->nc_alloc);
1161         srenew(li->tmp4,li->nc_alloc);
1162         srenew(li->mlambda,li->nc_alloc);
1163         if (li->ncg_triangle > 0)
1164         {
1165             /* This is allocating too much, but it is difficult to improve */
1166             srenew(li->triangle,li->nc_alloc);
1167             srenew(li->tri_bits,li->nc_alloc);
1168         }
1169     }
1170
1171     iatom = idef->il[F_CONSTR].iatoms;
1172
1173     ncc_alloc = li->ncc_alloc;
1174     li->blnr[0] = 0;
1175
1176     ni = idef->il[F_CONSTR].nr/3;
1177
1178     con = 0;
1179     nconnect = 0;
1180     li->blnr[con] = nconnect;
1181     for(i=0; i<ni; i++)
1182     {
1183         bLocal = TRUE;
1184         type = iatom[3*i];
1185         a1   = iatom[3*i+1];
1186         a2   = iatom[3*i+2];
1187         lenA = idef->iparams[type].constr.dA;
1188         lenB = idef->iparams[type].constr.dB;
1189         /* Skip the flexible constraints when not doing dynamics */
1190         if (bDynamics || lenA!=0 || lenB!=0)
1191         {
1192             li->bllen0[con]  = lenA;
1193             li->ddist[con]   = lenB - lenA;
1194             /* Set the length to the topology A length */
1195             li->bllen[con]   = li->bllen0[con];
1196             li->bla[2*con]   = a1;
1197             li->bla[2*con+1] = a2;
1198             /* Construct the constraint connection matrix blbnb */
1199             for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
1200             {
1201                 concon = at2con.a[k];
1202                 if (concon != i)
1203                 {
1204                     if (nconnect >= ncc_alloc)
1205                     {
1206                         ncc_alloc = over_alloc_small(nconnect+1);
1207                         srenew(li->blbnb,ncc_alloc);
1208                     }
1209                     li->blbnb[nconnect++] = concon;
1210                 }
1211             }
1212             for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
1213             {
1214                 concon = at2con.a[k];
1215                 if (concon != i)
1216                 {
1217                     if (nconnect+1 > ncc_alloc)
1218                     {
1219                         ncc_alloc = over_alloc_small(nconnect+1);
1220                         srenew(li->blbnb,ncc_alloc);
1221                     }
1222                     li->blbnb[nconnect++] = concon;
1223                 }
1224             }
1225             li->blnr[con+1] = nconnect;
1226
1227             if (cr->dd == NULL)
1228             {
1229                 /* Order the blbnb matrix to optimize memory access */
1230                 qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
1231                       sizeof(li->blbnb[0]),int_comp);
1232             }
1233             /* Increase the constraint count */
1234             con++;
1235         }
1236     }
1237
1238     done_blocka(&at2con);
1239
1240     /* This is the real number of constraints,
1241      * without dynamics the flexible constraints are not present.
1242      */
1243     li->nc = con;
1244
1245     li->ncc = li->blnr[con];
1246     if (cr->dd == NULL)
1247     {
1248         /* Since the matrix is static, we can free some memory */
1249         ncc_alloc = li->ncc;
1250         srenew(li->blbnb,ncc_alloc);
1251     }
1252
1253     if (ncc_alloc > li->ncc_alloc)
1254     {
1255         li->ncc_alloc = ncc_alloc;
1256         srenew(li->blmf,li->ncc_alloc);
1257         srenew(li->blmf1,li->ncc_alloc);
1258         srenew(li->tmpncc,li->ncc_alloc);
1259     }
1260
1261     if (debug)
1262     {
1263         fprintf(debug,"Number of constraints is %d, couplings %d\n",
1264                 li->nc,li->ncc);
1265     }
1266
1267     if (li->nth == 1)
1268     {
1269         li->th[0].b0 = 0;
1270         li->th[0].b1 = li->nc;
1271     }
1272     else
1273     {
1274         lincs_thread_setup(li,md->nr);
1275     }
1276
1277     set_lincs_matrix(li,md->invmass,md->lambda);
1278 }
1279
1280 static void lincs_warning(FILE *fplog,
1281                           gmx_domdec_t *dd,rvec *x,rvec *xprime,t_pbc *pbc,
1282                           int ncons,int *bla,real *bllen,real wangle,
1283                           int maxwarn,int *warncount)
1284 {
1285     int b,i,j;
1286     rvec v0,v1;
1287     real wfac,d0,d1,cosine;
1288     char buf[STRLEN];
1289
1290     wfac=cos(DEG2RAD*wangle);
1291
1292     sprintf(buf,"bonds that rotated more than %g degrees:\n"
1293             " atom 1 atom 2  angle  previous, current, constraint length\n",
1294             wangle);
1295     fprintf(stderr,"%s",buf);
1296     if (fplog)
1297     {
1298         fprintf(fplog,"%s",buf);
1299     }
1300
1301     for(b=0;b<ncons;b++)
1302     {
1303         i = bla[2*b];
1304         j = bla[2*b+1];
1305         if (pbc)
1306         {
1307             pbc_dx_aiuc(pbc,x[i],x[j],v0);
1308             pbc_dx_aiuc(pbc,xprime[i],xprime[j],v1);
1309         }
1310         else
1311         {
1312             rvec_sub(x[i],x[j],v0);
1313             rvec_sub(xprime[i],xprime[j],v1);
1314         }
1315         d0 = norm(v0);
1316         d1 = norm(v1);
1317         cosine = iprod(v0,v1)/(d0*d1);
1318         if (cosine < wfac)
1319         {
1320             sprintf(buf," %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1321                     ddglatnr(dd,i),ddglatnr(dd,j),
1322                     RAD2DEG*acos(cosine),d0,d1,bllen[b]);
1323             fprintf(stderr,"%s",buf);
1324             if (fplog)
1325             {
1326                 fprintf(fplog,"%s",buf);
1327             }
1328             if (!gmx_isfinite(d1))
1329             {
1330                 gmx_fatal(FARGS,"Bond length not finite.");
1331             }
1332
1333             (*warncount)++;
1334         }
1335     }
1336     if (*warncount > maxwarn)
1337     {
1338         too_many_constraint_warnings(econtLINCS,*warncount);
1339     }
1340 }
1341
1342 static void cconerr(gmx_domdec_t *dd,
1343                     int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc,
1344                     real *ncons_loc,real *ssd,real *max,int *imax)
1345 {
1346     real      len,d,ma,ssd2,r2;
1347     int       *nlocat,count,b,im;
1348     rvec      dx;
1349
1350     if (dd && dd->constraints)
1351     {
1352         nlocat = dd_constraints_nlocalatoms(dd);
1353     }
1354     else
1355     {
1356         nlocat = 0;
1357     }
1358
1359     ma = 0;
1360     ssd2 = 0;
1361     im = 0;
1362     count = 0;
1363     for(b=0;b<ncons;b++)
1364     {
1365         if (pbc)
1366         {
1367             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
1368         }
1369         else {
1370             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
1371         }
1372         r2 = norm2(dx);
1373         len = r2*gmx_invsqrt(r2);
1374         d = fabs(len/bllen[b]-1);
1375         if (d > ma && (nlocat==NULL || nlocat[b]))
1376         {
1377             ma = d;
1378             im = b;
1379         }
1380         if (nlocat == NULL)
1381         {
1382             ssd2 += d*d;
1383             count++;
1384         }
1385         else
1386         {
1387             ssd2 += nlocat[b]*d*d;
1388             count += nlocat[b];
1389         }
1390     }
1391
1392     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1393     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1394     *max = ma;
1395     *imax = im;
1396 }
1397
1398 static void dump_conf(gmx_domdec_t *dd,struct gmx_lincsdata *li,
1399                       t_blocka *at2con,
1400                       char *name,gmx_bool bAll,rvec *x,matrix box)
1401 {
1402     char str[STRLEN];
1403     FILE *fp;
1404     int  ac0,ac1,i;
1405
1406     dd_get_constraint_range(dd,&ac0,&ac1);
1407
1408     sprintf(str,"%s_%d_%d_%d.pdb",name,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
1409     fp = gmx_fio_fopen(str,"w");
1410     fprintf(fp,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n",
1411             10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
1412             90.0,90.0,90.0);
1413     for(i=0; i<ac1; i++)
1414     {
1415         if (i < dd->nat_home || (bAll && i >= ac0 && i < ac1))
1416         {
1417             fprintf(fp,"%-6s%5u  %-4.4s%3.3s %c%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
1418                     "ATOM",ddglatnr(dd,i),"C","ALA",' ',i+1,
1419                     10*x[i][XX],10*x[i][YY],10*x[i][ZZ],
1420                     1.0,i<dd->nat_tot ? 0.0 : 1.0);
1421         }
1422     }
1423     if (bAll)
1424     {
1425         for(i=0; i<li->nc; i++)
1426         {
1427             fprintf(fp,"CONECT%5d%5d\n",
1428                     ddglatnr(dd,li->bla[2*i]),
1429                     ddglatnr(dd,li->bla[2*i+1]));
1430         }
1431     }
1432     gmx_fio_fclose(fp);
1433 }
1434
1435 gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
1436                          t_inputrec *ir,
1437                          gmx_large_int_t step,
1438                          struct gmx_lincsdata *lincsd,t_mdatoms *md,
1439                          t_commrec *cr,
1440                          rvec *x,rvec *xprime,rvec *min_proj,
1441                          matrix box,t_pbc *pbc,
1442                          real lambda,real *dvdlambda,
1443                          real invdt,rvec *v,
1444                          gmx_bool bCalcVir,tensor rmdr,
1445                          int econq,
1446                          t_nrnb *nrnb,
1447                          int maxwarn,int *warncount)
1448 {
1449     char  buf[STRLEN],buf2[22],buf3[STRLEN];
1450     int   i,warn=0,p_imax,error;
1451     real  ncons_loc,p_ssd,p_max=0;
1452     rvec  dx;
1453     gmx_bool  bOK;
1454
1455     bOK = TRUE;
1456
1457     if (lincsd->nc == 0 && cr->dd == NULL)
1458     {
1459         if (bLog || bEner)
1460         {
1461             lincsd->rmsd_data[0] = 0;
1462             if (ir->eI == eiSD2 && v == NULL)
1463             {
1464                 i = 2;
1465             }
1466             else
1467             {
1468                 i = 1;
1469             }
1470             lincsd->rmsd_data[i] = 0;
1471         }
1472
1473         return bOK;
1474     }
1475
1476     if (econq == econqCoord)
1477     {
1478         if (ir->efep != efepNO)
1479         {
1480             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1481             {
1482                 set_lincs_matrix(lincsd,md->invmass,md->lambda);
1483             }
1484
1485             for(i=0; i<lincsd->nc; i++)
1486             {
1487                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1488             }
1489         }
1490
1491         if (lincsd->ncg_flex)
1492         {
1493             /* Set the flexible constraint lengths to the old lengths */
1494             if (pbc != NULL)
1495             {
1496                 for(i=0; i<lincsd->nc; i++)
1497                 {
1498                     if (lincsd->bllen[i] == 0) {
1499                         pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
1500                         lincsd->bllen[i] = norm(dx);
1501                     }
1502                 }
1503             }
1504             else
1505             {
1506                 for(i=0; i<lincsd->nc; i++)
1507                 {
1508                     if (lincsd->bllen[i] == 0)
1509                     {
1510                         lincsd->bllen[i] =
1511                             sqrt(distance2(x[lincsd->bla[2*i]],
1512                                            x[lincsd->bla[2*i+1]]));
1513                     }
1514                 }
1515             }
1516         }
1517
1518         if (bLog && fplog)
1519         {
1520             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1521                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1522         }
1523
1524         /* The (only) OpenMP parallel region of constrain_lincs */
1525         {
1526             int th;
1527
1528 #pragma omp parallel for num_threads(lincsd->nth) schedule(static)
1529             for(th=0; th<lincsd->nth; th++)
1530             {
1531                 do_lincs(x,xprime,box,pbc,lincsd,th,
1532                          md->invmass,cr,
1533                          bCalcVir || (ir->efep != efepNO),
1534                          ir->LincsWarnAngle,&warn,
1535                          invdt,v,bCalcVir,rmdr);
1536             }
1537         }
1538
1539         if (ir->efep != efepNO)
1540         {
1541             real dt_2,dvdl=0;
1542
1543             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1544             for(i=0; (i<lincsd->nc); i++)
1545             {
1546                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1547             }
1548             *dvdlambda += dvdl;
1549                 }
1550
1551         if (bLog && fplog && lincsd->nc > 0)
1552         {
1553             fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1554             fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
1555                     sqrt(p_ssd/ncons_loc),p_max,
1556                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1557                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1558         }
1559         if (bLog || bEner)
1560         {
1561             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1562                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1563             /* Check if we are doing the second part of SD */
1564             if (ir->eI == eiSD2 && v == NULL)
1565             {
1566                 i = 2;
1567             }
1568             else
1569             {
1570                 i = 1;
1571             }
1572             lincsd->rmsd_data[0] = ncons_loc;
1573             lincsd->rmsd_data[i] = p_ssd;
1574         }
1575         else
1576         {
1577             lincsd->rmsd_data[0] = 0;
1578             lincsd->rmsd_data[1] = 0;
1579             lincsd->rmsd_data[2] = 0;
1580         }
1581         if (bLog && fplog && lincsd->nc > 0)
1582         {
1583             fprintf(fplog,
1584                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1585                     sqrt(p_ssd/ncons_loc),p_max,
1586                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1587                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1588         }
1589
1590         if (warn > 0)
1591         {
1592             if (maxwarn >= 0)
1593             {
1594                 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1595                         &ncons_loc,&p_ssd,&p_max,&p_imax);
1596                 if (MULTISIM(cr))
1597                 {
1598                     sprintf(buf3," in simulation %d", cr->ms->sim);
1599                 }
1600                 else
1601                 {
1602                     buf3[0] = 0;
1603                 }
1604                 sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1605                         "relative constraint deviation after LINCS:\n"
1606                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1607                         gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
1608                         buf3,
1609                         sqrt(p_ssd/ncons_loc),p_max,
1610                         ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1611                         ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1612                 if (fplog)
1613                 {
1614                     fprintf(fplog,"%s",buf);
1615                 }
1616                 fprintf(stderr,"%s",buf);
1617                 lincs_warning(fplog,cr->dd,x,xprime,pbc,
1618                               lincsd->nc,lincsd->bla,lincsd->bllen,
1619                               ir->LincsWarnAngle,maxwarn,warncount);
1620             }
1621             bOK = (p_max < 0.5);
1622         }
1623
1624         if (lincsd->ncg_flex) {
1625             for(i=0; (i<lincsd->nc); i++)
1626                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1627                     lincsd->bllen[i] = 0;
1628         }
1629     }
1630     else
1631     {
1632         do_lincsp(x,xprime,min_proj,pbc,lincsd,md->invmass,econq,dvdlambda,
1633                   bCalcVir,rmdr);
1634     }
1635
1636     /* count assuming nit=1 */
1637     inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
1638     inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
1639     if (lincsd->ntriangle > 0)
1640     {
1641         inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
1642     }
1643     if (v)
1644     {
1645         inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
1646     }
1647     if (bCalcVir)
1648     {
1649         inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
1650     }
1651
1652     return bOK;
1653 }