2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /* This file is completely threadsafe - keep it that way! */
49 #include "gromacs/domdec/domdec.h"
50 #include "gromacs/domdec/domdec_struct.h"
51 #include "gromacs/gmxlib/nrnb.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/units.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/mdlib/constr.h"
56 #include "gromacs/mdlib/gmx_omp_nthreads.h"
57 #include "gromacs/mdlib/mdrun.h"
58 #include "gromacs/mdtypes/commrec.h"
59 #include "gromacs/mdtypes/inputrec.h"
60 #include "gromacs/mdtypes/md_enums.h"
61 #include "gromacs/pbcutil/pbc.h"
62 #include "gromacs/pbcutil/pbc-simd.h"
63 #include "gromacs/simd/simd.h"
64 #include "gromacs/simd/simd_math.h"
65 #include "gromacs/simd/vector_operations.h"
66 #include "gromacs/topology/block.h"
67 #include "gromacs/topology/mtop_util.h"
68 #include "gromacs/utility/basedefinitions.h"
69 #include "gromacs/utility/bitmask.h"
70 #include "gromacs/utility/cstringutil.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxomp.h"
74 #include "gromacs/utility/pleasecite.h"
75 #include "gromacs/utility/smalloc.h"
77 using namespace gmx
; // TODO: Remove when this file is moved into gmx namespace
80 int b0
; /* first constraint for this task */
81 int b1
; /* b1-1 is the last constraint for this task */
82 int ntriangle
; /* the number of constraints in triangles */
83 int *triangle
; /* the list of triangle constraints */
84 int *tri_bits
; /* the bits tell if the matrix element should be used */
85 int tri_alloc
; /* allocation size of triangle and tri_bits */
86 int nind
; /* number of indices */
87 int *ind
; /* constraint index for updating atom data */
88 int nind_r
; /* number of indices */
89 int *ind_r
; /* constraint index for updating atom data */
90 int ind_nalloc
; /* allocation size of ind and ind_r */
91 tensor vir_r_m_dr
; /* temporary variable for virial calculation */
92 real dhdlambda
; /* temporary variable for lambda derivative */
95 typedef struct gmx_lincsdata
{
96 int ncg
; /* the global number of constraints */
97 int ncg_flex
; /* the global number of flexible constraints */
98 int ncg_triangle
; /* the global number of constraints in triangles */
99 int nIter
; /* the number of iterations */
100 int nOrder
; /* the order of the matrix expansion */
101 int max_connect
; /* the maximum number of constrains connected to a single atom */
103 int nc_real
; /* the number of real constraints */
104 int nc
; /* the number of constraints including padding for SIMD */
105 int nc_alloc
; /* the number we allocated memory for */
106 int ncc
; /* the number of constraint connections */
107 int ncc_alloc
; /* the number we allocated memory for */
108 real matlam
; /* the FE lambda value used for filling blc and blmf */
109 int *con_index
; /* mapping from topology to LINCS constraints */
110 real
*bllen0
; /* the reference distance in topology A */
111 real
*ddist
; /* the reference distance in top B - the r.d. in top A */
112 int *bla
; /* the atom pairs involved in the constraints */
113 real
*blc
; /* 1/sqrt(invmass1 + invmass2) */
114 real
*blc1
; /* as blc, but with all masses 1 */
115 int *blnr
; /* index into blbnb and blmf */
116 int *blbnb
; /* list of constraint connections */
117 int ntriangle
; /* the local number of constraints in triangles */
118 int ncc_triangle
; /* the number of constraint connections in triangles */
119 gmx_bool bCommIter
; /* communicate before each LINCS interation */
120 real
*blmf
; /* matrix of mass factors for constraint connections */
121 real
*blmf1
; /* as blmf, but with all masses 1 */
122 real
*bllen
; /* the reference bond length */
123 int *nlocat
; /* the local atom count per constraint, can be NULL */
125 int ntask
; /* The number of tasks = #threads for LINCS */
126 lincs_task_t
*task
; /* LINCS thread division */
127 gmx_bitmask_t
*atf
; /* atom flags for thread parallelization */
128 int atf_nalloc
; /* allocation size of atf */
129 gmx_bool bTaskDep
; /* are the LINCS tasks interdependent? */
130 gmx_bool bTaskDepTri
; /* are there triangle constraints that cross task borders? */
131 /* arrays for temporary storage in the LINCS algorithm */
138 real
*mlambda
; /* the Lagrange multipliers * -1 */
139 /* storage for the constraint RMS relative deviation output */
143 /* Define simd_width for memory allocation used for SIMD code */
144 #if GMX_SIMD_HAVE_REAL
145 static const int simd_width
= GMX_SIMD_REAL_WIDTH
;
147 static const int simd_width
= 1;
150 /* Align to 128 bytes, consistent with the current implementation of
151 AlignedAllocator, which currently forces 128 byte alignment. */
152 static const int align_bytes
= 128;
154 real
*lincs_rmsd_data(struct gmx_lincsdata
*lincsd
)
156 return lincsd
->rmsd_data
;
159 real
lincs_rmsd(struct gmx_lincsdata
*lincsd
)
161 if (lincsd
->rmsd_data
[0] > 0)
163 return std::sqrt(lincsd
->rmsd_data
[1]/lincsd
->rmsd_data
[0]);
171 /* Do a set of nrec LINCS matrix multiplications.
172 * This function will return with up to date thread-local
173 * constraint data, without an OpenMP barrier.
175 static void lincs_matrix_expand(const struct gmx_lincsdata
*lincsd
,
176 const lincs_task_t
*li_task
,
178 real
*rhs1
, real
*rhs2
, real
*sol
)
180 int b0
, b1
, nrec
, rec
;
181 const int *blnr
= lincsd
->blnr
;
182 const int *blbnb
= lincsd
->blbnb
;
186 nrec
= lincsd
->nOrder
;
188 for (rec
= 0; rec
< nrec
; rec
++)
192 if (lincsd
->bTaskDep
)
196 for (b
= b0
; b
< b1
; b
++)
202 for (n
= blnr
[b
]; n
< blnr
[b
+1]; n
++)
204 mvb
= mvb
+ blcc
[n
]*rhs1
[blbnb
[n
]];
207 sol
[b
] = sol
[b
] + mvb
;
215 } /* nrec*(ncons+2*nrtot) flops */
217 if (lincsd
->ntriangle
> 0)
219 /* Perform an extra nrec recursions for only the constraints
220 * involved in rigid triangles.
221 * In this way their accuracy should come close to those of the other
222 * constraints, since traingles of constraints can produce eigenvalues
223 * around 0.7, while the effective eigenvalue for bond constraints
224 * is around 0.4 (and 0.7*0.7=0.5).
227 if (lincsd
->bTaskDep
)
229 /* We need a barrier here, since other threads might still be
230 * reading the contents of rhs1 and/o rhs2.
231 * We could avoid this barrier by introducing two extra rhs
232 * arrays for the triangle constraints only.
237 /* Constraints involved in a triangle are ensured to be in the same
238 * LINCS task. This means no barriers are required during the extra
239 * iterations for the triangle constraints.
241 const int *triangle
= li_task
->triangle
;
242 const int *tri_bits
= li_task
->tri_bits
;
244 for (rec
= 0; rec
< nrec
; rec
++)
248 for (tb
= 0; tb
< li_task
->ntriangle
; tb
++)
250 int b
, bits
, nr0
, nr1
, n
;
258 for (n
= nr0
; n
< nr1
; n
++)
260 if (bits
& (1 << (n
- nr0
)))
262 mvb
= mvb
+ blcc
[n
]*rhs1
[blbnb
[n
]];
266 sol
[b
] = sol
[b
] + mvb
;
274 } /* nrec*(ntriangle + ncc_triangle*2) flops */
276 if (lincsd
->bTaskDepTri
)
278 /* The constraints triangles are decoupled from each other,
279 * but constraints in one triangle cross thread task borders.
280 * We could probably avoid this with more advanced setup code.
287 static void lincs_update_atoms_noind(int ncons
, const int *bla
,
289 const real
*fac
, rvec
*r
,
294 real mvb
, im1
, im2
, tmp0
, tmp1
, tmp2
;
296 if (invmass
!= nullptr)
298 for (b
= 0; b
< ncons
; b
++)
314 } /* 16 ncons flops */
318 for (b
= 0; b
< ncons
; b
++)
336 static void lincs_update_atoms_ind(int ncons
, const int *ind
, const int *bla
,
338 const real
*fac
, rvec
*r
,
343 real mvb
, im1
, im2
, tmp0
, tmp1
, tmp2
;
345 if (invmass
!= nullptr)
347 for (bi
= 0; bi
< ncons
; bi
++)
364 } /* 16 ncons flops */
368 for (bi
= 0; bi
< ncons
; bi
++)
383 } /* 16 ncons flops */
387 static void lincs_update_atoms(struct gmx_lincsdata
*li
, int th
,
389 const real
*fac
, rvec
*r
,
395 /* Single thread, we simply update for all constraints */
396 lincs_update_atoms_noind(li
->nc_real
,
397 li
->bla
, prefac
, fac
, r
, invmass
, x
);
401 /* Update the atom vector components for our thread local
402 * constraints that only access our local atom range.
403 * This can be done without a barrier.
405 lincs_update_atoms_ind(li
->task
[th
].nind
, li
->task
[th
].ind
,
406 li
->bla
, prefac
, fac
, r
, invmass
, x
);
408 if (li
->task
[li
->ntask
].nind
> 0)
410 /* Update the constraints that operate on atoms
411 * in multiple thread atom blocks on the master thread.
416 lincs_update_atoms_ind(li
->task
[li
->ntask
].nind
,
417 li
->task
[li
->ntask
].ind
,
418 li
->bla
, prefac
, fac
, r
, invmass
, x
);
424 #if GMX_SIMD_HAVE_REAL
425 /* Calculate the constraint distance vectors r to project on from x.
426 * Determine the right-hand side of the matrix equation using quantity f.
427 * This function only differs from calc_dr_x_xp_simd below in that
428 * no constraint length is subtracted and no PBC is used for f.
430 static void gmx_simdcall
431 calc_dr_x_f_simd(int b0
,
434 const rvec
* gmx_restrict x
,
435 const rvec
* gmx_restrict f
,
436 const real
* gmx_restrict blc
,
437 const real
* pbc_simd
,
438 rvec
* gmx_restrict r
,
439 real
* gmx_restrict rhs
,
440 real
* gmx_restrict sol
)
442 assert(b0
% GMX_SIMD_REAL_WIDTH
== 0);
444 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset2
[GMX_SIMD_REAL_WIDTH
];
446 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
451 for (int bs
= b0
; bs
< b1
; bs
+= GMX_SIMD_REAL_WIDTH
)
453 SimdReal x0_S
, y0_S
, z0_S
;
454 SimdReal x1_S
, y1_S
, z1_S
;
455 SimdReal rx_S
, ry_S
, rz_S
, n2_S
, il_S
;
456 SimdReal fx_S
, fy_S
, fz_S
, ip_S
, rhs_S
;
457 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset0
[GMX_SIMD_REAL_WIDTH
];
458 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset1
[GMX_SIMD_REAL_WIDTH
];
460 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
462 offset0
[i
] = bla
[bs
*2 + i
*2];
463 offset1
[i
] = bla
[bs
*2 + i
*2 + 1];
466 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset0
, &x0_S
, &y0_S
, &z0_S
);
467 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset1
, &x1_S
, &y1_S
, &z1_S
);
472 pbc_correct_dx_simd(&rx_S
, &ry_S
, &rz_S
, pbc_simd
);
474 n2_S
= norm2(rx_S
, ry_S
, rz_S
);
475 il_S
= invsqrt(n2_S
);
481 transposeScatterStoreU
<3>(reinterpret_cast<real
*>(r
+ bs
), offset2
, rx_S
, ry_S
, rz_S
);
483 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(f
), offset0
, &x0_S
, &y0_S
, &z0_S
);
484 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(f
), offset1
, &x1_S
, &y1_S
, &z1_S
);
489 ip_S
= iprod(rx_S
, ry_S
, rz_S
, fx_S
, fy_S
, fz_S
);
491 rhs_S
= load(blc
+ bs
) * ip_S
;
493 store(rhs
+ bs
, rhs_S
);
494 store(sol
+ bs
, rhs_S
);
497 #endif // GMX_SIMD_HAVE_REAL
499 /* LINCS projection, works on derivatives of the coordinates */
500 static void do_lincsp(rvec
*x
, rvec
*f
, rvec
*fp
, t_pbc
*pbc
,
501 struct gmx_lincsdata
*lincsd
, int th
,
503 int econq
, gmx_bool bCalcDHDL
,
504 gmx_bool bCalcVir
, tensor rmdf
)
507 int *bla
, *blnr
, *blbnb
;
509 real
*blc
, *blmf
, *blcc
, *rhs1
, *rhs2
, *sol
;
511 b0
= lincsd
->task
[th
].b0
;
512 b1
= lincsd
->task
[th
].b1
;
517 blbnb
= lincsd
->blbnb
;
518 if (econq
!= econqForce
)
520 /* Use mass-weighted parameters */
526 /* Use non mass-weighted parameters */
528 blmf
= lincsd
->blmf1
;
530 blcc
= lincsd
->tmpncc
;
535 #if GMX_SIMD_HAVE_REAL
536 /* This SIMD code does the same as the plain-C code after the #else.
537 * The only difference is that we always call pbc code, as with SIMD
538 * the overhead of pbc computation (when not needed) is small.
540 GMX_ALIGNED(real
, GMX_SIMD_REAL_WIDTH
) pbc_simd
[9*GMX_SIMD_REAL_WIDTH
];
542 /* Convert the pbc struct for SIMD */
543 set_pbc_simd(pbc
, pbc_simd
);
545 /* Compute normalized x i-j vectors, store in r.
546 * Compute the inner product of r and xp i-j and store in rhs1.
548 calc_dr_x_f_simd(b0
, b1
, bla
, x
, f
, blc
,
552 #else // GMX_SIMD_HAVE_REAL
554 /* Compute normalized i-j vectors */
557 for (b
= b0
; b
< b1
; b
++)
561 pbc_dx_aiuc(pbc
, x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
567 for (b
= b0
; b
< b1
; b
++)
571 rvec_sub(x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
573 } /* 16 ncons flops */
576 for (b
= b0
; b
< b1
; b
++)
583 mvb
= blc
[b
]*(r
[b
][0]*(f
[i
][0] - f
[j
][0]) +
584 r
[b
][1]*(f
[i
][1] - f
[j
][1]) +
585 r
[b
][2]*(f
[i
][2] - f
[j
][2]));
591 #endif // GMX_SIMD_HAVE_REAL
593 if (lincsd
->bTaskDep
)
595 /* We need a barrier, since the matrix construction below
596 * can access entries in r of other threads.
601 /* Construct the (sparse) LINCS matrix */
602 for (b
= b0
; b
< b1
; b
++)
606 for (n
= blnr
[b
]; n
< blnr
[b
+1]; n
++)
608 blcc
[n
] = blmf
[n
]*iprod(r
[b
], r
[blbnb
[n
]]);
611 /* Together: 23*ncons + 6*nrtot flops */
613 lincs_matrix_expand(lincsd
, &lincsd
->task
[th
], blcc
, rhs1
, rhs2
, sol
);
614 /* nrec*(ncons+2*nrtot) flops */
616 if (econq
== econqDeriv_FlexCon
)
618 /* We only want to constraint the flexible constraints,
619 * so we mask out the normal ones by setting sol to 0.
621 for (b
= b0
; b
< b1
; b
++)
623 if (!(lincsd
->bllen0
[b
] == 0 && lincsd
->ddist
[b
] == 0))
630 /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
631 for (b
= b0
; b
< b1
; b
++)
636 /* When constraining forces, we should not use mass weighting,
637 * so we pass invmass=NULL, which results in the use of 1 for all atoms.
639 lincs_update_atoms(lincsd
, th
, 1.0, sol
, r
,
640 (econq
!= econqForce
) ? invmass
: nullptr, fp
);
647 for (b
= b0
; b
< b1
; b
++)
649 dhdlambda
-= sol
[b
]*lincsd
->ddist
[b
];
652 lincsd
->task
[th
].dhdlambda
= dhdlambda
;
657 /* Constraint virial,
658 * determines sum r_bond x delta f,
659 * where delta f is the constraint correction
660 * of the quantity that is being constrained.
662 for (b
= b0
; b
< b1
; b
++)
667 mvb
= lincsd
->bllen
[b
]*sol
[b
];
668 for (i
= 0; i
< DIM
; i
++)
671 for (j
= 0; j
< DIM
; j
++)
673 rmdf
[i
][j
] += tmp1
*r
[b
][j
];
676 } /* 23 ncons flops */
680 #if GMX_SIMD_HAVE_REAL
681 /* Calculate the constraint distance vectors r to project on from x.
682 * Determine the right-hand side of the matrix equation using coordinates xp.
684 static void gmx_simdcall
685 calc_dr_x_xp_simd(int b0
,
688 const rvec
* gmx_restrict x
,
689 const rvec
* gmx_restrict xp
,
690 const real
* gmx_restrict bllen
,
691 const real
* gmx_restrict blc
,
692 const real
* pbc_simd
,
693 rvec
* gmx_restrict r
,
694 real
* gmx_restrict rhs
,
695 real
* gmx_restrict sol
)
697 assert(b0
% GMX_SIMD_REAL_WIDTH
== 0);
698 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset2
[GMX_SIMD_REAL_WIDTH
];
700 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
705 for (int bs
= b0
; bs
< b1
; bs
+= GMX_SIMD_REAL_WIDTH
)
707 SimdReal x0_S
, y0_S
, z0_S
;
708 SimdReal x1_S
, y1_S
, z1_S
;
709 SimdReal rx_S
, ry_S
, rz_S
, n2_S
, il_S
;
710 SimdReal rxp_S
, ryp_S
, rzp_S
, ip_S
, rhs_S
;
711 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset0
[GMX_SIMD_REAL_WIDTH
];
712 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset1
[GMX_SIMD_REAL_WIDTH
];
714 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
716 offset0
[i
] = bla
[bs
*2 + i
*2];
717 offset1
[i
] = bla
[bs
*2 + i
*2 + 1];
720 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset0
, &x0_S
, &y0_S
, &z0_S
);
721 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset1
, &x1_S
, &y1_S
, &z1_S
);
726 pbc_correct_dx_simd(&rx_S
, &ry_S
, &rz_S
, pbc_simd
);
728 n2_S
= norm2(rx_S
, ry_S
, rz_S
);
729 il_S
= invsqrt(n2_S
);
735 transposeScatterStoreU
<3>(reinterpret_cast<real
*>(r
+ bs
), offset2
, rx_S
, ry_S
, rz_S
);
737 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(xp
), offset0
, &x0_S
, &y0_S
, &z0_S
);
738 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(xp
), offset1
, &x1_S
, &y1_S
, &z1_S
);
743 pbc_correct_dx_simd(&rxp_S
, &ryp_S
, &rzp_S
, pbc_simd
);
745 ip_S
= iprod(rx_S
, ry_S
, rz_S
, rxp_S
, ryp_S
, rzp_S
);
747 rhs_S
= load(blc
+ bs
) * (ip_S
- load(bllen
+ bs
));
749 store(rhs
+ bs
, rhs_S
);
750 store(sol
+ bs
, rhs_S
);
753 #endif // GMX_SIMD_HAVE_REAL
755 /* Determine the distances and right-hand side for the next iteration */
756 static void calc_dist_iter(int b0
,
759 const rvec
* gmx_restrict xp
,
760 const real
* gmx_restrict bllen
,
761 const real
* gmx_restrict blc
,
764 real
* gmx_restrict rhs
,
765 real
* gmx_restrict sol
,
770 for (b
= b0
; b
< b1
; b
++)
772 real len
, len2
, dlen2
, mvb
;
778 pbc_dx_aiuc(pbc
, xp
[bla
[2*b
]], xp
[bla
[2*b
+1]], dx
);
782 rvec_sub(xp
[bla
[2*b
]], xp
[bla
[2*b
+1]], dx
);
785 dlen2
= 2*len2
- norm2(dx
);
786 if (dlen2
< wfac
*len2
)
788 /* not race free - see detailed comment in caller */
793 mvb
= blc
[b
]*(len
- dlen2
*gmx::invsqrt(dlen2
));
801 } /* 20*ncons flops */
804 #if GMX_SIMD_HAVE_REAL
805 /* As the function above, but using SIMD intrinsics */
806 static void gmx_simdcall
807 calc_dist_iter_simd(int b0
,
810 const rvec
* gmx_restrict x
,
811 const real
* gmx_restrict bllen
,
812 const real
* gmx_restrict blc
,
813 const real
* pbc_simd
,
815 real
* gmx_restrict rhs
,
816 real
* gmx_restrict sol
,
819 SimdReal
min_S(GMX_REAL_MIN
);
821 SimdReal
wfac_S(wfac
);
826 assert(b0
% GMX_SIMD_REAL_WIDTH
== 0);
828 /* Initialize all to FALSE */
829 warn_S
= (two_S
< setZero());
831 for (bs
= b0
; bs
< b1
; bs
+= GMX_SIMD_REAL_WIDTH
)
833 SimdReal x0_S
, y0_S
, z0_S
;
834 SimdReal x1_S
, y1_S
, z1_S
;
835 SimdReal rx_S
, ry_S
, rz_S
, n2_S
;
836 SimdReal len_S
, len2_S
, dlen2_S
, lc_S
, blc_S
;
837 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset0
[GMX_SIMD_REAL_WIDTH
];
838 GMX_ALIGNED(int, GMX_SIMD_REAL_WIDTH
) offset1
[GMX_SIMD_REAL_WIDTH
];
840 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
842 offset0
[i
] = bla
[bs
*2 + i
*2];
843 offset1
[i
] = bla
[bs
*2 + i
*2 + 1];
846 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset0
, &x0_S
, &y0_S
, &z0_S
);
847 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset1
, &x1_S
, &y1_S
, &z1_S
);
852 pbc_correct_dx_simd(&rx_S
, &ry_S
, &rz_S
, pbc_simd
);
854 n2_S
= norm2(rx_S
, ry_S
, rz_S
);
856 len_S
= load(bllen
+ bs
);
857 len2_S
= len_S
* len_S
;
859 dlen2_S
= fms(two_S
, len2_S
, n2_S
);
861 warn_S
= warn_S
|| (dlen2_S
< (wfac_S
* len2_S
));
863 /* Avoid 1/0 by taking the max with REAL_MIN.
864 * Note: when dlen2 is close to zero (90 degree constraint rotation),
865 * the accuracy of the algorithm is no longer relevant.
867 dlen2_S
= max(dlen2_S
, min_S
);
869 lc_S
= fnma(dlen2_S
, invsqrt(dlen2_S
), len_S
);
871 blc_S
= load(blc
+ bs
);
875 store(rhs
+ bs
, lc_S
);
876 store(sol
+ bs
, lc_S
);
884 #endif // GMX_SIMD_HAVE_REAL
886 static void do_lincs(rvec
*x
, rvec
*xp
, matrix box
, t_pbc
*pbc
,
887 struct gmx_lincsdata
*lincsd
, int th
,
891 real wangle
, gmx_bool
*bWarn
,
892 real invdt
, rvec
* gmx_restrict v
,
893 gmx_bool bCalcVir
, tensor vir_r_m_dr
)
895 int b0
, b1
, b
, i
, j
, n
, iter
;
896 int *bla
, *blnr
, *blbnb
;
898 real
*blc
, *blmf
, *bllen
, *blcc
, *rhs1
, *rhs2
, *sol
, *blc_sol
, *mlambda
;
901 b0
= lincsd
->task
[th
].b0
;
902 b1
= lincsd
->task
[th
].b1
;
907 blbnb
= lincsd
->blbnb
;
910 bllen
= lincsd
->bllen
;
911 blcc
= lincsd
->tmpncc
;
915 blc_sol
= lincsd
->tmp4
;
916 mlambda
= lincsd
->mlambda
;
917 nlocat
= lincsd
->nlocat
;
919 #if GMX_SIMD_HAVE_REAL
921 /* This SIMD code does the same as the plain-C code after the #else.
922 * The only difference is that we always call pbc code, as with SIMD
923 * the overhead of pbc computation (when not needed) is small.
925 GMX_ALIGNED(real
, GMX_SIMD_REAL_WIDTH
) pbc_simd
[9*GMX_SIMD_REAL_WIDTH
];
927 /* Convert the pbc struct for SIMD */
928 set_pbc_simd(pbc
, pbc_simd
);
930 /* Compute normalized x i-j vectors, store in r.
931 * Compute the inner product of r and xp i-j and store in rhs1.
933 calc_dr_x_xp_simd(b0
, b1
, bla
, x
, xp
, bllen
, blc
,
937 #else // GMX_SIMD_HAVE_REAL
941 /* Compute normalized i-j vectors */
942 for (b
= b0
; b
< b1
; b
++)
947 pbc_dx_aiuc(pbc
, x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
950 pbc_dx_aiuc(pbc
, xp
[bla
[2*b
]], xp
[bla
[2*b
+1]], dx
);
951 mvb
= blc
[b
]*(iprod(r
[b
], dx
) - bllen
[b
]);
958 /* Compute normalized i-j vectors */
959 for (b
= b0
; b
< b1
; b
++)
961 real tmp0
, tmp1
, tmp2
, rlen
, mvb
;
965 tmp0
= x
[i
][0] - x
[j
][0];
966 tmp1
= x
[i
][1] - x
[j
][1];
967 tmp2
= x
[i
][2] - x
[j
][2];
968 rlen
= gmx::invsqrt(tmp0
*tmp0
+ tmp1
*tmp1
+ tmp2
*tmp2
);
976 mvb
= blc
[b
]*(r
[b
][0]*(xp
[i
][0] - xp
[j
][0]) +
977 r
[b
][1]*(xp
[i
][1] - xp
[j
][1]) +
978 r
[b
][2]*(xp
[i
][2] - xp
[j
][2]) - bllen
[b
]);
983 /* Together: 26*ncons + 6*nrtot flops */
986 #endif // GMX_SIMD_HAVE_REAL
988 if (lincsd
->bTaskDep
)
990 /* We need a barrier, since the matrix construction below
991 * can access entries in r of other threads.
996 /* Construct the (sparse) LINCS matrix */
997 for (b
= b0
; b
< b1
; b
++)
999 for (n
= blnr
[b
]; n
< blnr
[b
+1]; n
++)
1001 blcc
[n
] = blmf
[n
]*iprod(r
[b
], r
[blbnb
[n
]]);
1004 /* Together: 26*ncons + 6*nrtot flops */
1006 lincs_matrix_expand(lincsd
, &lincsd
->task
[th
], blcc
, rhs1
, rhs2
, sol
);
1007 /* nrec*(ncons+2*nrtot) flops */
1009 #if GMX_SIMD_HAVE_REAL
1010 for (b
= b0
; b
< b1
; b
+= GMX_SIMD_REAL_WIDTH
)
1012 SimdReal t1
= load(blc
+ b
);
1013 SimdReal t2
= load(sol
+ b
);
1014 store(mlambda
+ b
, t1
* t2
);
1017 for (b
= b0
; b
< b1
; b
++)
1019 mlambda
[b
] = blc
[b
]*sol
[b
];
1021 #endif // GMX_SIMD_HAVE_REAL
1023 /* Update the coordinates */
1024 lincs_update_atoms(lincsd
, th
, 1.0, mlambda
, r
, invmass
, xp
);
1027 ******** Correction for centripetal effects ********
1032 wfac
= std::cos(DEG2RAD
*wangle
);
1035 for (iter
= 0; iter
< lincsd
->nIter
; iter
++)
1037 if ((lincsd
->bCommIter
&& DOMAINDECOMP(cr
) && cr
->dd
->constraints
))
1042 /* Communicate the corrected non-local coordinates */
1043 if (DOMAINDECOMP(cr
))
1045 dd_move_x_constraints(cr
->dd
, box
, xp
, nullptr, FALSE
);
1050 else if (lincsd
->bTaskDep
)
1055 #if GMX_SIMD_HAVE_REAL
1056 calc_dist_iter_simd(b0
, b1
, bla
, xp
, bllen
, blc
, pbc_simd
, wfac
,
1059 calc_dist_iter(b0
, b1
, bla
, xp
, bllen
, blc
, pbc
, wfac
,
1061 /* 20*ncons flops */
1062 #endif // GMX_SIMD_HAVE_REAL
1064 lincs_matrix_expand(lincsd
, &lincsd
->task
[th
], blcc
, rhs1
, rhs2
, sol
);
1065 /* nrec*(ncons+2*nrtot) flops */
1067 #if GMX_SIMD_HAVE_REAL
1068 for (b
= b0
; b
< b1
; b
+= GMX_SIMD_REAL_WIDTH
)
1070 SimdReal t1
= load(blc
+ b
);
1071 SimdReal t2
= load(sol
+ b
);
1072 SimdReal mvb
= t1
* t2
;
1073 store(blc_sol
+ b
, mvb
);
1074 store(mlambda
+ b
, load(mlambda
+ b
) + mvb
);
1077 for (b
= b0
; b
< b1
; b
++)
1081 mvb
= blc
[b
]*sol
[b
];
1085 #endif // GMX_SIMD_HAVE_REAL
1087 /* Update the coordinates */
1088 lincs_update_atoms(lincsd
, th
, 1.0, blc_sol
, r
, invmass
, xp
);
1090 /* nit*ncons*(37+9*nrec) flops */
1094 /* Update the velocities */
1095 lincs_update_atoms(lincsd
, th
, invdt
, mlambda
, r
, invmass
, v
);
1096 /* 16 ncons flops */
1099 if (nlocat
!= nullptr && (bCalcDHDL
|| bCalcVir
))
1101 if (lincsd
->bTaskDep
)
1103 /* In lincs_update_atoms threads might cross-read mlambda */
1107 /* Only account for local atoms */
1108 for (b
= b0
; b
< b1
; b
++)
1110 mlambda
[b
] *= 0.5*nlocat
[b
];
1119 for (b
= b0
; b
< b1
; b
++)
1121 /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
1122 * later after the contributions are reduced over the threads.
1124 dhdl
-= lincsd
->mlambda
[b
]*lincsd
->ddist
[b
];
1126 lincsd
->task
[th
].dhdlambda
= dhdl
;
1131 /* Constraint virial */
1132 for (b
= b0
; b
< b1
; b
++)
1136 tmp0
= -bllen
[b
]*mlambda
[b
];
1137 for (i
= 0; i
< DIM
; i
++)
1139 tmp1
= tmp0
*r
[b
][i
];
1140 for (j
= 0; j
< DIM
; j
++)
1142 vir_r_m_dr
[i
][j
] -= tmp1
*r
[b
][j
];
1145 } /* 22 ncons flops */
1149 * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
1150 * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
1152 * (26+nrec)*ncons + (6+2*nrec)*nrtot
1153 * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
1155 * (63+nrec)*ncons + (6+4*nrec)*nrtot
1159 /* Sets the elements in the LINCS matrix for task li_task */
1160 static void set_lincs_matrix_task(struct gmx_lincsdata
*li
,
1161 lincs_task_t
*li_task
,
1162 const real
*invmass
,
1164 int *nCrossTaskTriangles
)
1168 /* Construct the coupling coefficient matrix blmf */
1169 li_task
->ntriangle
= 0;
1171 *nCrossTaskTriangles
= 0;
1172 for (i
= li_task
->b0
; i
< li_task
->b1
; i
++)
1177 a2
= li
->bla
[2*i
+1];
1178 for (n
= li
->blnr
[i
]; (n
< li
->blnr
[i
+1]); n
++)
1180 int k
, sign
, center
, end
;
1184 /* If we are using multiple, independent tasks for LINCS,
1185 * the calls to check_assign_connected should have
1186 * put all connected constraints in our task.
1188 assert(li
->bTaskDep
|| (k
>= li_task
->b0
&& k
< li_task
->b1
));
1190 if (a1
== li
->bla
[2*k
] || a2
== li
->bla
[2*k
+1])
1198 if (a1
== li
->bla
[2*k
] || a1
== li
->bla
[2*k
+1])
1208 li
->blmf
[n
] = sign
*invmass
[center
]*li
->blc
[i
]*li
->blc
[k
];
1209 li
->blmf1
[n
] = sign
*0.5;
1210 if (li
->ncg_triangle
> 0)
1214 /* Look for constraint triangles */
1215 for (nk
= li
->blnr
[k
]; (nk
< li
->blnr
[k
+1]); nk
++)
1218 if (kk
!= i
&& kk
!= k
&&
1219 (li
->bla
[2*kk
] == end
|| li
->bla
[2*kk
+1] == end
))
1221 /* Check if the constraints in this triangle actually
1222 * belong to a different task. We still assign them
1223 * here, since it's convenient for the triangle
1224 * iterations, but we then need an extra barrier.
1226 if (k
< li_task
->b0
|| k
>= li_task
->b1
||
1227 kk
< li_task
->b0
|| kk
>= li_task
->b1
)
1229 (*nCrossTaskTriangles
)++;
1232 if (li_task
->ntriangle
== 0 ||
1233 li_task
->triangle
[li_task
->ntriangle
- 1] < i
)
1235 /* Add this constraint to the triangle list */
1236 li_task
->triangle
[li_task
->ntriangle
] = i
;
1237 li_task
->tri_bits
[li_task
->ntriangle
] = 0;
1238 li_task
->ntriangle
++;
1239 if (li
->blnr
[i
+1] - li
->blnr
[i
] > static_cast<int>(sizeof(li_task
->tri_bits
[0])*8 - 1))
1241 gmx_fatal(FARGS
, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
1242 li
->blnr
[i
+1] - li
->blnr
[i
],
1243 sizeof(li_task
->tri_bits
[0])*8-1);
1246 li_task
->tri_bits
[li_task
->ntriangle
-1] |= (1 << (n
- li
->blnr
[i
]));
1255 /* Sets the elements in the LINCS matrix */
1256 void set_lincs_matrix(struct gmx_lincsdata
*li
, real
*invmass
, real lambda
)
1259 const real invsqrt2
= 0.7071067811865475244;
1261 for (i
= 0; (i
< li
->nc
); i
++)
1266 a2
= li
->bla
[2*i
+1];
1267 li
->blc
[i
] = gmx::invsqrt(invmass
[a1
] + invmass
[a2
]);
1268 li
->blc1
[i
] = invsqrt2
;
1271 /* Construct the coupling coefficient matrix blmf */
1272 int th
, ntriangle
= 0, ncc_triangle
= 0, nCrossTaskTriangles
= 0;
1273 #pragma omp parallel for reduction(+: ntriangle, ncc_triangle, nCrossTaskTriangles) num_threads(li->ntask) schedule(static)
1274 for (th
= 0; th
< li
->ntask
; th
++)
1278 set_lincs_matrix_task(li
, &li
->task
[th
], invmass
,
1279 &ncc_triangle
, &nCrossTaskTriangles
);
1280 ntriangle
= li
->task
[th
].ntriangle
;
1282 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
1284 li
->ntriangle
= ntriangle
;
1285 li
->ncc_triangle
= ncc_triangle
;
1286 li
->bTaskDepTri
= (nCrossTaskTriangles
> 0);
1290 fprintf(debug
, "The %d constraints participate in %d triangles\n",
1291 li
->nc
, li
->ntriangle
);
1292 fprintf(debug
, "There are %d constraint couplings, of which %d in triangles\n",
1293 li
->ncc
, li
->ncc_triangle
);
1294 if (li
->ntriangle
> 0 && li
->ntask
> 1)
1296 fprintf(debug
, "%d constraint triangles contain constraints assigned to different tasks\n",
1297 nCrossTaskTriangles
);
1302 * so we know with which lambda value the masses have been set.
1304 li
->matlam
= lambda
;
1307 static int count_triangle_constraints(const t_ilist
*ilist
,
1308 const t_blocka
*at2con
)
1310 int ncon1
, ncon_tot
;
1311 int c0
, a00
, a01
, n1
, c1
, a10
, a11
, ac1
, n2
, c2
, a20
, a21
;
1314 t_iatom
*ia1
, *ia2
, *iap
;
1316 ncon1
= ilist
[F_CONSTR
].nr
/3;
1317 ncon_tot
= ncon1
+ ilist
[F_CONSTRNC
].nr
/3;
1319 ia1
= ilist
[F_CONSTR
].iatoms
;
1320 ia2
= ilist
[F_CONSTRNC
].iatoms
;
1323 for (c0
= 0; c0
< ncon_tot
; c0
++)
1326 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c0
);
1329 for (n1
= at2con
->index
[a01
]; n1
< at2con
->index
[a01
+1]; n1
++)
1334 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c1
);
1345 for (n2
= at2con
->index
[ac1
]; n2
< at2con
->index
[ac1
+1]; n2
++)
1348 if (c2
!= c0
&& c2
!= c1
)
1350 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c2
);
1353 if (a20
== a00
|| a21
== a00
)
1367 return ncon_triangle
;
1370 static gmx_bool
more_than_two_sequential_constraints(const t_ilist
*ilist
,
1371 const t_blocka
*at2con
)
1373 t_iatom
*ia1
, *ia2
, *iap
;
1374 int ncon1
, ncon_tot
, c
;
1376 gmx_bool bMoreThanTwoSequentialConstraints
;
1378 ncon1
= ilist
[F_CONSTR
].nr
/3;
1379 ncon_tot
= ncon1
+ ilist
[F_CONSTRNC
].nr
/3;
1381 ia1
= ilist
[F_CONSTR
].iatoms
;
1382 ia2
= ilist
[F_CONSTRNC
].iatoms
;
1384 bMoreThanTwoSequentialConstraints
= FALSE
;
1385 for (c
= 0; c
< ncon_tot
&& !bMoreThanTwoSequentialConstraints
; c
++)
1387 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c
);
1390 /* Check if this constraint has constraints connected at both atoms */
1391 if (at2con
->index
[a1
+1] - at2con
->index
[a1
] > 1 &&
1392 at2con
->index
[a2
+1] - at2con
->index
[a2
] > 1)
1394 bMoreThanTwoSequentialConstraints
= TRUE
;
1398 return bMoreThanTwoSequentialConstraints
;
1401 static int int_comp(const void *a
, const void *b
)
1403 return (*(int *)a
) - (*(int *)b
);
1406 gmx_lincsdata_t
init_lincs(FILE *fplog
, const gmx_mtop_t
*mtop
,
1407 int nflexcon_global
, const t_blocka
*at2con
,
1408 gmx_bool bPLINCS
, int nIter
, int nProjOrder
)
1410 struct gmx_lincsdata
*li
;
1412 gmx_moltype_t
*molt
;
1413 gmx_bool bMoreThanTwoSeq
;
1417 fprintf(fplog
, "\nInitializing%s LINear Constraint Solver\n",
1418 bPLINCS
? " Parallel" : "");
1424 gmx_mtop_ftype_count(mtop
, F_CONSTR
) +
1425 gmx_mtop_ftype_count(mtop
, F_CONSTRNC
);
1426 li
->ncg_flex
= nflexcon_global
;
1429 li
->nOrder
= nProjOrder
;
1431 li
->max_connect
= 0;
1432 for (mt
= 0; mt
< mtop
->nmoltype
; mt
++)
1436 molt
= &mtop
->moltype
[mt
];
1437 for (a
= 0; a
< molt
->atoms
.nr
; a
++)
1439 li
->max_connect
= std::max(li
->max_connect
,
1440 at2con
[mt
].index
[a
+ 1] - at2con
[mt
].index
[a
]);
1444 li
->ncg_triangle
= 0;
1445 bMoreThanTwoSeq
= FALSE
;
1446 for (mb
= 0; mb
< mtop
->nmolblock
; mb
++)
1448 molt
= &mtop
->moltype
[mtop
->molblock
[mb
].type
];
1451 mtop
->molblock
[mb
].nmol
*
1452 count_triangle_constraints(molt
->ilist
,
1453 &at2con
[mtop
->molblock
[mb
].type
]);
1455 if (!bMoreThanTwoSeq
&&
1456 more_than_two_sequential_constraints(molt
->ilist
, &at2con
[mtop
->molblock
[mb
].type
]))
1458 bMoreThanTwoSeq
= TRUE
;
1462 /* Check if we need to communicate not only before LINCS,
1463 * but also before each iteration.
1464 * The check for only two sequential constraints is only
1465 * useful for the common case of H-bond only constraints.
1466 * With more effort we could also make it useful for small
1467 * molecules with nr. sequential constraints <= nOrder-1.
1469 li
->bCommIter
= (bPLINCS
&& (li
->nOrder
< 1 || bMoreThanTwoSeq
));
1471 if (debug
&& bPLINCS
)
1473 fprintf(debug
, "PLINCS communication before each iteration: %d\n",
1477 /* LINCS can run on any number of threads.
1478 * Currently the number is fixed for the whole simulation,
1479 * but it could be set in set_lincs().
1480 * The current constraint to task assignment code can create independent
1481 * tasks only when not more than two constraints are connected sequentially.
1483 li
->ntask
= gmx_omp_nthreads_get(emntLINCS
);
1484 li
->bTaskDep
= (li
->ntask
> 1 && bMoreThanTwoSeq
);
1487 fprintf(debug
, "LINCS: using %d threads, tasks are %sdependent\n",
1488 li
->ntask
, li
->bTaskDep
? "" : "in");
1496 /* Allocate an extra elements for "task-overlap" constraints */
1497 snew(li
->task
, li
->ntask
+ 1);
1500 if (bPLINCS
|| li
->ncg_triangle
> 0)
1502 please_cite(fplog
, "Hess2008a");
1506 please_cite(fplog
, "Hess97a");
1511 fprintf(fplog
, "The number of constraints is %d\n", li
->ncg
);
1514 fprintf(fplog
, "There are inter charge-group constraints,\n"
1515 "will communicate selected coordinates each lincs iteration\n");
1517 if (li
->ncg_triangle
> 0)
1520 "%d constraints are involved in constraint triangles,\n"
1521 "will apply an additional matrix expansion of order %d for couplings\n"
1522 "between constraints inside triangles\n",
1523 li
->ncg_triangle
, li
->nOrder
);
1530 /* Sets up the work division over the threads */
1531 static void lincs_thread_setup(struct gmx_lincsdata
*li
, int natoms
)
1538 if (natoms
> li
->atf_nalloc
)
1540 li
->atf_nalloc
= over_alloc_large(natoms
);
1541 srenew(li
->atf
, li
->atf_nalloc
);
1545 /* Clear the atom flags */
1546 for (a
= 0; a
< natoms
; a
++)
1548 bitmask_clear(&atf
[a
]);
1551 if (li
->ntask
> BITMASK_SIZE
)
1553 gmx_fatal(FARGS
, "More than %d threads is not supported for LINCS.", BITMASK_SIZE
);
1556 for (th
= 0; th
< li
->ntask
; th
++)
1558 lincs_task_t
*li_task
;
1561 li_task
= &li
->task
[th
];
1563 /* For each atom set a flag for constraints from each */
1564 for (b
= li_task
->b0
; b
< li_task
->b1
; b
++)
1566 bitmask_set_bit(&atf
[li
->bla
[b
*2 ]], th
);
1567 bitmask_set_bit(&atf
[li
->bla
[b
*2 + 1]], th
);
1571 #pragma omp parallel for num_threads(li->ntask) schedule(static)
1572 for (th
= 0; th
< li
->ntask
; th
++)
1576 lincs_task_t
*li_task
;
1580 li_task
= &li
->task
[th
];
1582 if (li_task
->b1
- li_task
->b0
> li_task
->ind_nalloc
)
1584 li_task
->ind_nalloc
= over_alloc_large(li_task
->b1
-li_task
->b0
);
1585 srenew(li_task
->ind
, li_task
->ind_nalloc
);
1586 srenew(li_task
->ind_r
, li_task
->ind_nalloc
);
1589 bitmask_init_low_bits(&mask
, th
);
1592 li_task
->nind_r
= 0;
1593 for (b
= li_task
->b0
; b
< li_task
->b1
; b
++)
1595 /* We let the constraint with the lowest thread index
1596 * operate on atoms with constraints from multiple threads.
1598 if (bitmask_is_disjoint(atf
[li
->bla
[b
*2]], mask
) &&
1599 bitmask_is_disjoint(atf
[li
->bla
[b
*2+1]], mask
))
1601 /* Add the constraint to the local atom update index */
1602 li_task
->ind
[li_task
->nind
++] = b
;
1606 /* Add the constraint to the rest block */
1607 li_task
->ind_r
[li_task
->nind_r
++] = b
;
1611 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
1614 /* We need to copy all constraints which have not be assigned
1615 * to a thread to a separate list which will be handled by one thread.
1617 li_m
= &li
->task
[li
->ntask
];
1620 for (th
= 0; th
< li
->ntask
; th
++)
1622 lincs_task_t
*li_task
;
1625 li_task
= &li
->task
[th
];
1627 if (li_m
->nind
+ li_task
->nind_r
> li_m
->ind_nalloc
)
1629 li_m
->ind_nalloc
= over_alloc_large(li_m
->nind
+li_task
->nind_r
);
1630 srenew(li_m
->ind
, li_m
->ind_nalloc
);
1633 for (b
= 0; b
< li_task
->nind_r
; b
++)
1635 li_m
->ind
[li_m
->nind
++] = li_task
->ind_r
[b
];
1640 fprintf(debug
, "LINCS thread %d: %d constraints\n",
1647 fprintf(debug
, "LINCS thread r: %d constraints\n",
1652 /* There is no realloc with alignment, so here we make one for reals.
1653 * Note that this function does not preserve the contents of the memory.
1655 static void resize_real_aligned(real
**ptr
, int nelem
)
1657 sfree_aligned(*ptr
);
1658 snew_aligned(*ptr
, nelem
, align_bytes
);
1661 static void assign_constraint(struct gmx_lincsdata
*li
,
1662 int constraint_index
,
1664 real lenA
, real lenB
,
1665 const t_blocka
*at2con
)
1671 /* Make an mapping of local topology constraint index to LINCS index */
1672 li
->con_index
[constraint_index
] = con
;
1674 li
->bllen0
[con
] = lenA
;
1675 li
->ddist
[con
] = lenB
- lenA
;
1676 /* Set the length to the topology A length */
1677 li
->bllen
[con
] = lenA
;
1678 li
->bla
[2*con
] = a1
;
1679 li
->bla
[2*con
+1] = a2
;
1681 /* Make space in the constraint connection matrix for constraints
1682 * connected to both end of the current constraint.
1685 at2con
->index
[a1
+ 1] - at2con
->index
[a1
] - 1 +
1686 at2con
->index
[a2
+ 1] - at2con
->index
[a2
] - 1;
1688 li
->blnr
[con
+ 1] = li
->ncc
;
1690 /* Increase the constraint count */
1694 /* Check if constraint with topology index constraint_index is connected
1695 * to other constraints, and if so add those connected constraints to our task.
1697 static void check_assign_connected(struct gmx_lincsdata
*li
,
1698 const t_iatom
*iatom
,
1702 const t_blocka
*at2con
)
1704 /* Currently this function only supports constraint groups
1705 * in which all constraints share at least one atom
1706 * (e.g. H-bond constraints).
1707 * Check both ends of the current constraint for
1708 * connected constraints. We need to assign those
1713 for (end
= 0; end
< 2; end
++)
1717 a
= (end
== 0 ? a1
: a2
);
1719 for (k
= at2con
->index
[a
]; k
< at2con
->index
[a
+ 1]; k
++)
1724 /* Check if constraint cc has not yet been assigned */
1725 if (li
->con_index
[cc
] == -1)
1731 lenA
= idef
->iparams
[type
].constr
.dA
;
1732 lenB
= idef
->iparams
[type
].constr
.dB
;
1734 if (bDynamics
|| lenA
!= 0 || lenB
!= 0)
1736 assign_constraint(li
, cc
, iatom
[3*cc
+ 1], iatom
[3*cc
+ 2], lenA
, lenB
, at2con
);
1743 /* Check if constraint with topology index constraint_index is involved
1744 * in a constraint triangle, and if so add the other two constraints
1745 * in the triangle to our task.
1747 static void check_assign_triangle(struct gmx_lincsdata
*li
,
1748 const t_iatom
*iatom
,
1751 int constraint_index
,
1753 const t_blocka
*at2con
)
1755 int nca
, cc
[32], ca
[32], k
;
1756 int c_triangle
[2] = { -1, -1 };
1759 for (k
= at2con
->index
[a1
]; k
< at2con
->index
[a1
+ 1]; k
++)
1764 if (c
!= constraint_index
)
1768 aa1
= iatom
[c
*3 + 1];
1769 aa2
= iatom
[c
*3 + 2];
1785 for (k
= at2con
->index
[a2
]; k
< at2con
->index
[a2
+ 1]; k
++)
1790 if (c
!= constraint_index
)
1794 aa1
= iatom
[c
*3 + 1];
1795 aa2
= iatom
[c
*3 + 2];
1798 for (i
= 0; i
< nca
; i
++)
1802 c_triangle
[0] = cc
[i
];
1809 for (i
= 0; i
< nca
; i
++)
1813 c_triangle
[0] = cc
[i
];
1821 if (c_triangle
[0] >= 0)
1825 for (end
= 0; end
< 2; end
++)
1827 /* Check if constraint c_triangle[end] has not yet been assigned */
1828 if (li
->con_index
[c_triangle
[end
]] == -1)
1833 i
= c_triangle
[end
]*3;
1835 lenA
= idef
->iparams
[type
].constr
.dA
;
1836 lenB
= idef
->iparams
[type
].constr
.dB
;
1838 if (bDynamics
|| lenA
!= 0 || lenB
!= 0)
1840 assign_constraint(li
, c_triangle
[end
], iatom
[i
+ 1], iatom
[i
+ 2], lenA
, lenB
, at2con
);
1847 static void set_matrix_indices(struct gmx_lincsdata
*li
,
1848 const lincs_task_t
*li_task
,
1849 const t_blocka
*at2con
,
1850 gmx_bool bSortMatrix
)
1854 for (b
= li_task
->b0
; b
< li_task
->b1
; b
++)
1859 a2
= li
->bla
[b
*2 + 1];
1862 for (k
= at2con
->index
[a1
]; k
< at2con
->index
[a1
+ 1]; k
++)
1866 concon
= li
->con_index
[at2con
->a
[k
]];
1869 li
->blbnb
[i
++] = concon
;
1872 for (k
= at2con
->index
[a2
]; k
< at2con
->index
[a2
+ 1]; k
++)
1876 concon
= li
->con_index
[at2con
->a
[k
]];
1879 li
->blbnb
[i
++] = concon
;
1885 /* Order the blbnb matrix to optimize memory access */
1886 qsort(&(li
->blbnb
[li
->blnr
[b
]]), li
->blnr
[b
+ 1] - li
->blnr
[b
],
1887 sizeof(li
->blbnb
[0]), int_comp
);
1892 void set_lincs(const t_idef
*idef
,
1893 const t_mdatoms
*md
,
1896 struct gmx_lincsdata
*li
)
1898 int natoms
, nflexcon
;
1901 int i
, ncc_alloc_old
, ncon_tot
;
1906 /* Zero the thread index ranges.
1907 * Otherwise without local constraints we could return with old ranges.
1909 for (i
= 0; i
< li
->ntask
; i
++)
1913 li
->task
[i
].nind
= 0;
1917 li
->task
[li
->ntask
].nind
= 0;
1920 /* This is the local topology, so there are only F_CONSTR constraints */
1921 if (idef
->il
[F_CONSTR
].nr
== 0)
1923 /* There are no constraints,
1924 * we do not need to fill any data structures.
1931 fprintf(debug
, "Building the LINCS connectivity\n");
1934 if (DOMAINDECOMP(cr
))
1936 if (cr
->dd
->constraints
)
1940 dd_get_constraint_range(cr
->dd
, &start
, &natoms
);
1944 natoms
= cr
->dd
->nat_home
;
1949 natoms
= md
->homenr
;
1951 at2con
= make_at2con(0, natoms
, idef
->il
, idef
->iparams
, bDynamics
,
1954 ncon_tot
= idef
->il
[F_CONSTR
].nr
/3;
1956 /* Ensure we have enough padding for aligned loads for each thread */
1957 if (ncon_tot
+ li
->ntask
*simd_width
> li
->nc_alloc
|| li
->nc_alloc
== 0)
1959 li
->nc_alloc
= over_alloc_dd(ncon_tot
+ li
->ntask
*simd_width
);
1960 srenew(li
->con_index
, li
->nc_alloc
);
1961 resize_real_aligned(&li
->bllen0
, li
->nc_alloc
);
1962 resize_real_aligned(&li
->ddist
, li
->nc_alloc
);
1963 srenew(li
->bla
, 2*li
->nc_alloc
);
1964 resize_real_aligned(&li
->blc
, li
->nc_alloc
);
1965 resize_real_aligned(&li
->blc1
, li
->nc_alloc
);
1966 srenew(li
->blnr
, li
->nc_alloc
+ 1);
1967 resize_real_aligned(&li
->bllen
, li
->nc_alloc
);
1968 srenew(li
->tmpv
, li
->nc_alloc
);
1969 if (DOMAINDECOMP(cr
))
1971 srenew(li
->nlocat
, li
->nc_alloc
);
1973 resize_real_aligned(&li
->tmp1
, li
->nc_alloc
);
1974 resize_real_aligned(&li
->tmp2
, li
->nc_alloc
);
1975 resize_real_aligned(&li
->tmp3
, li
->nc_alloc
);
1976 resize_real_aligned(&li
->tmp4
, li
->nc_alloc
);
1977 resize_real_aligned(&li
->mlambda
, li
->nc_alloc
);
1980 iatom
= idef
->il
[F_CONSTR
].iatoms
;
1982 ncc_alloc_old
= li
->ncc_alloc
;
1983 li
->blnr
[0] = li
->ncc
;
1985 /* Assign the constraints for li->ntask LINCS tasks.
1986 * We target a uniform distribution of constraints over the tasks.
1987 * Note that when flexible constraints are present, but are removed here
1988 * (e.g. because we are doing EM) we get imbalance, but since that doesn't
1989 * happen during normal MD, that's ok.
1991 int ncon_assign
, ncon_target
, con
, th
;
1993 /* Determine the number of constraints we need to assign here */
1994 ncon_assign
= ncon_tot
;
1997 /* With energy minimization, flexible constraints are ignored
1998 * (and thus minimized, as they should be).
2000 ncon_assign
-= nflexcon
;
2003 /* Set the target constraint count per task to exactly uniform,
2004 * this might be overridden below.
2006 ncon_target
= (ncon_assign
+ li
->ntask
- 1)/li
->ntask
;
2008 /* Mark all constraints as unassigned by setting their index to -1 */
2009 for (con
= 0; con
< ncon_tot
; con
++)
2011 li
->con_index
[con
] = -1;
2015 for (th
= 0; th
< li
->ntask
; th
++)
2017 lincs_task_t
*li_task
;
2019 li_task
= &li
->task
[th
];
2021 #if GMX_SIMD_HAVE_REAL
2022 /* With indepedent tasks we likely have H-bond constraints or constraint
2023 * pairs. The connected constraints will be pulled into the task, so the
2024 * constraints per task will often exceed ncon_target.
2025 * Triangle constraints can also increase the count, but there are
2026 * relatively few of those, so we usually expect to get ncon_target.
2030 /* We round ncon_target to a multiple of GMX_SIMD_WIDTH,
2031 * since otherwise a lot of operations can be wasted.
2032 * There are several ways to round here, we choose the one
2033 * that alternates block sizes, which helps with Intel HT.
2035 ncon_target
= ((ncon_assign
*(th
+ 1))/li
->ntask
- li
->nc_real
+ GMX_SIMD_REAL_WIDTH
- 1) & ~(GMX_SIMD_REAL_WIDTH
- 1);
2037 #endif // GMX_SIMD==2 && GMX_SIMD_HAVE_REAL
2039 /* Continue filling the arrays where we left off with the previous task,
2040 * including padding for SIMD.
2042 li_task
->b0
= li
->nc
;
2044 while (con
< ncon_tot
&& li
->nc
- li_task
->b0
< ncon_target
)
2046 if (li
->con_index
[con
] == -1)
2051 type
= iatom
[3*con
];
2052 a1
= iatom
[3*con
+ 1];
2053 a2
= iatom
[3*con
+ 2];
2054 lenA
= idef
->iparams
[type
].constr
.dA
;
2055 lenB
= idef
->iparams
[type
].constr
.dB
;
2056 /* Skip the flexible constraints when not doing dynamics */
2057 if (bDynamics
|| lenA
!= 0 || lenB
!= 0)
2059 assign_constraint(li
, con
, a1
, a2
, lenA
, lenB
, &at2con
);
2061 if (li
->ntask
> 1 && !li
->bTaskDep
)
2063 /* We can generate independent tasks. Check if we
2064 * need to assign connected constraints to our task.
2066 check_assign_connected(li
, iatom
, idef
, bDynamics
,
2069 if (li
->ntask
> 1 && li
->ncg_triangle
> 0)
2071 /* Ensure constraints in one triangle are assigned
2074 check_assign_triangle(li
, iatom
, idef
, bDynamics
,
2075 con
, a1
, a2
, &at2con
);
2083 li_task
->b1
= li
->nc
;
2087 /* Copy the last atom pair indices and lengths for constraints
2088 * up to a multiple of simd_width, such that we can do all
2089 * SIMD operations without having to worry about end effects.
2093 li
->nc
= ((li_task
->b1
+ simd_width
- 1)/simd_width
)*simd_width
;
2094 last
= li_task
->b1
- 1;
2095 for (i
= li_task
->b1
; i
< li
->nc
; i
++)
2097 li
->bla
[i
*2 ] = li
->bla
[last
*2 ];
2098 li
->bla
[i
*2 + 1] = li
->bla
[last
*2 + 1];
2099 li
->bllen0
[i
] = li
->bllen0
[last
];
2100 li
->ddist
[i
] = li
->ddist
[last
];
2101 li
->bllen
[i
] = li
->bllen
[last
];
2102 li
->blnr
[i
+ 1] = li
->blnr
[last
+ 1];
2106 /* Keep track of how many constraints we assigned */
2107 li
->nc_real
+= li_task
->b1
- li_task
->b0
;
2111 fprintf(debug
, "LINCS task %d constraints %d - %d\n",
2112 th
, li_task
->b0
, li_task
->b1
);
2116 assert(li
->nc_real
== ncon_assign
);
2118 gmx_bool bSortMatrix
;
2120 /* Without DD we order the blbnb matrix to optimize memory access.
2121 * With DD the overhead of sorting is more than the gain during access.
2123 bSortMatrix
= !DOMAINDECOMP(cr
);
2125 if (li
->ncc
> li
->ncc_alloc
)
2127 li
->ncc_alloc
= over_alloc_small(li
->ncc
);
2128 srenew(li
->blbnb
, li
->ncc_alloc
);
2131 #pragma omp parallel for num_threads(li->ntask) schedule(static)
2132 for (th
= 0; th
< li
->ntask
; th
++)
2136 lincs_task_t
*li_task
;
2138 li_task
= &li
->task
[th
];
2140 if (li
->ncg_triangle
> 0 &&
2141 li_task
->b1
- li_task
->b0
> li_task
->tri_alloc
)
2143 /* This is allocating too much, but it is difficult to improve */
2144 li_task
->tri_alloc
= over_alloc_dd(li_task
->b1
- li_task
->b0
);
2145 srenew(li_task
->triangle
, li_task
->tri_alloc
);
2146 srenew(li_task
->tri_bits
, li_task
->tri_alloc
);
2149 set_matrix_indices(li
, li_task
, &at2con
, bSortMatrix
);
2151 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
2154 done_blocka(&at2con
);
2156 if (cr
->dd
== nullptr)
2158 /* Since the matrix is static, we should free some memory */
2159 li
->ncc_alloc
= li
->ncc
;
2160 srenew(li
->blbnb
, li
->ncc_alloc
);
2163 if (li
->ncc_alloc
> ncc_alloc_old
)
2165 srenew(li
->blmf
, li
->ncc_alloc
);
2166 srenew(li
->blmf1
, li
->ncc_alloc
);
2167 srenew(li
->tmpncc
, li
->ncc_alloc
);
2170 if (DOMAINDECOMP(cr
) && dd_constraints_nlocalatoms(cr
->dd
) != nullptr)
2174 nlocat_dd
= dd_constraints_nlocalatoms(cr
->dd
);
2176 /* Convert nlocat from local topology to LINCS constraint indexing */
2177 for (con
= 0; con
< ncon_tot
; con
++)
2179 li
->nlocat
[li
->con_index
[con
]] = nlocat_dd
[con
];
2184 li
->nlocat
= nullptr;
2189 fprintf(debug
, "Number of constraints is %d, padded %d, couplings %d\n",
2190 li
->nc_real
, li
->nc
, li
->ncc
);
2195 lincs_thread_setup(li
, md
->nr
);
2198 set_lincs_matrix(li
, md
->invmass
, md
->lambda
);
2201 static void lincs_warning(FILE *fplog
,
2202 gmx_domdec_t
*dd
, rvec
*x
, rvec
*xprime
, t_pbc
*pbc
,
2203 int ncons
, int *bla
, real
*bllen
, real wangle
,
2204 int maxwarn
, int *warncount
)
2208 real wfac
, d0
, d1
, cosine
;
2211 wfac
= std::cos(DEG2RAD
*wangle
);
2213 sprintf(buf
, "bonds that rotated more than %g degrees:\n"
2214 " atom 1 atom 2 angle previous, current, constraint length\n",
2216 fprintf(stderr
, "%s", buf
);
2219 fprintf(fplog
, "%s", buf
);
2222 for (b
= 0; b
< ncons
; b
++)
2228 pbc_dx_aiuc(pbc
, x
[i
], x
[j
], v0
);
2229 pbc_dx_aiuc(pbc
, xprime
[i
], xprime
[j
], v1
);
2233 rvec_sub(x
[i
], x
[j
], v0
);
2234 rvec_sub(xprime
[i
], xprime
[j
], v1
);
2238 cosine
= iprod(v0
, v1
)/(d0
*d1
);
2241 sprintf(buf
, " %6d %6d %5.1f %8.4f %8.4f %8.4f\n",
2242 ddglatnr(dd
, i
), ddglatnr(dd
, j
),
2243 RAD2DEG
*std::acos(cosine
), d0
, d1
, bllen
[b
]);
2244 fprintf(stderr
, "%s", buf
);
2247 fprintf(fplog
, "%s", buf
);
2249 if (!std::isfinite(d1
))
2251 gmx_fatal(FARGS
, "Bond length not finite.");
2257 if (*warncount
> maxwarn
)
2259 too_many_constraint_warnings(econtLINCS
, *warncount
);
2263 static void cconerr(const struct gmx_lincsdata
*lincsd
,
2264 rvec
*x
, t_pbc
*pbc
,
2265 real
*ncons_loc
, real
*ssd
, real
*max
, int *imax
)
2267 const int *bla
, *nlocat
;
2270 int count
, im
, task
;
2273 bllen
= lincsd
->bllen
;
2274 nlocat
= lincsd
->nlocat
;
2280 for (task
= 0; task
< lincsd
->ntask
; task
++)
2284 for (b
= lincsd
->task
[task
].b0
; b
< lincsd
->task
[task
].b1
; b
++)
2291 pbc_dx_aiuc(pbc
, x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
2295 rvec_sub(x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
2298 len
= r2
*gmx::invsqrt(r2
);
2299 d
= std::abs(len
/bllen
[b
]-1);
2300 if (d
> ma
&& (nlocat
== nullptr || nlocat
[b
]))
2305 if (nlocat
== nullptr)
2312 ssd2
+= nlocat
[b
]*d
*d
;
2318 *ncons_loc
= (nlocat
? 0.5 : 1)*count
;
2319 *ssd
= (nlocat
? 0.5 : 1)*ssd2
;
2324 gmx_bool
constrain_lincs(FILE *fplog
, gmx_bool bLog
, gmx_bool bEner
,
2327 struct gmx_lincsdata
*lincsd
, t_mdatoms
*md
,
2329 rvec
*x
, rvec
*xprime
, rvec
*min_proj
,
2330 matrix box
, t_pbc
*pbc
,
2331 real lambda
, real
*dvdlambda
,
2332 real invdt
, rvec
*v
,
2333 gmx_bool bCalcVir
, tensor vir_r_m_dr
,
2336 int maxwarn
, int *warncount
)
2339 char buf
[STRLEN
], buf2
[22], buf3
[STRLEN
];
2341 real ncons_loc
, p_ssd
, p_max
= 0;
2343 gmx_bool bOK
, bWarn
;
2347 /* This boolean should be set by a flag passed to this routine.
2348 * We can also easily check if any constraint length is changed,
2349 * if not dH/dlambda=0 and we can also set the boolean to FALSE.
2351 bCalcDHDL
= (ir
->efep
!= efepNO
&& dvdlambda
!= nullptr);
2353 if (lincsd
->nc
== 0 && cr
->dd
== nullptr)
2357 lincsd
->rmsd_data
[0] = 0;
2358 lincsd
->rmsd_data
[1] = 0;
2364 if (econq
== econqCoord
)
2366 /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
2367 * also with efep!=fepNO.
2369 if (ir
->efep
!= efepNO
)
2371 if (md
->nMassPerturbed
&& lincsd
->matlam
!= md
->lambda
)
2373 set_lincs_matrix(lincsd
, md
->invmass
, md
->lambda
);
2376 for (i
= 0; i
< lincsd
->nc
; i
++)
2378 lincsd
->bllen
[i
] = lincsd
->bllen0
[i
] + lambda
*lincsd
->ddist
[i
];
2382 if (lincsd
->ncg_flex
)
2384 /* Set the flexible constraint lengths to the old lengths */
2387 for (i
= 0; i
< lincsd
->nc
; i
++)
2389 if (lincsd
->bllen
[i
] == 0)
2391 pbc_dx_aiuc(pbc
, x
[lincsd
->bla
[2*i
]], x
[lincsd
->bla
[2*i
+1]], dx
);
2392 lincsd
->bllen
[i
] = norm(dx
);
2398 for (i
= 0; i
< lincsd
->nc
; i
++)
2400 if (lincsd
->bllen
[i
] == 0)
2403 std::sqrt(distance2(x
[lincsd
->bla
[2*i
]],
2404 x
[lincsd
->bla
[2*i
+1]]));
2412 cconerr(lincsd
, xprime
, pbc
,
2413 &ncons_loc
, &p_ssd
, &p_max
, &p_imax
);
2416 /* This bWarn var can be updated by multiple threads
2417 * at the same time. But as we only need to detect
2418 * if a warning occurred or not, this is not an issue.
2422 /* The OpenMP parallel region of constrain_lincs for coords */
2423 #pragma omp parallel num_threads(lincsd->ntask)
2427 int th
= gmx_omp_get_thread_num();
2429 clear_mat(lincsd
->task
[th
].vir_r_m_dr
);
2431 do_lincs(x
, xprime
, box
, pbc
, lincsd
, th
,
2434 ir
->LincsWarnAngle
, &bWarn
,
2436 th
== 0 ? vir_r_m_dr
: lincsd
->task
[th
].vir_r_m_dr
);
2438 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
2441 if (bLog
&& fplog
&& lincsd
->nc
> 0)
2443 fprintf(fplog
, " Rel. Constraint Deviation: RMS MAX between atoms\n");
2444 fprintf(fplog
, " Before LINCS %.6f %.6f %6d %6d\n",
2445 std::sqrt(p_ssd
/ncons_loc
), p_max
,
2446 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
]),
2447 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
+1]));
2451 cconerr(lincsd
, xprime
, pbc
,
2452 &ncons_loc
, &p_ssd
, &p_max
, &p_imax
);
2453 lincsd
->rmsd_data
[0] = ncons_loc
;
2454 lincsd
->rmsd_data
[1] = p_ssd
;
2458 lincsd
->rmsd_data
[0] = 0;
2459 lincsd
->rmsd_data
[1] = 0;
2460 lincsd
->rmsd_data
[2] = 0;
2462 if (bLog
&& fplog
&& lincsd
->nc
> 0)
2465 " After LINCS %.6f %.6f %6d %6d\n\n",
2466 std::sqrt(p_ssd
/ncons_loc
), p_max
,
2467 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
]),
2468 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
+1]));
2473 if (maxwarn
< INT_MAX
)
2475 cconerr(lincsd
, xprime
, pbc
,
2476 &ncons_loc
, &p_ssd
, &p_max
, &p_imax
);
2479 sprintf(buf3
, " in simulation %d", cr
->ms
->sim
);
2485 sprintf(buf
, "\nStep %s, time %g (ps) LINCS WARNING%s\n"
2486 "relative constraint deviation after LINCS:\n"
2487 "rms %.6f, max %.6f (between atoms %d and %d)\n",
2488 gmx_step_str(step
, buf2
), ir
->init_t
+step
*ir
->delta_t
,
2490 std::sqrt(p_ssd
/ncons_loc
), p_max
,
2491 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
]),
2492 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
+1]));
2495 fprintf(fplog
, "%s", buf
);
2497 fprintf(stderr
, "%s", buf
);
2498 lincs_warning(fplog
, cr
->dd
, x
, xprime
, pbc
,
2499 lincsd
->nc
, lincsd
->bla
, lincsd
->bllen
,
2500 ir
->LincsWarnAngle
, maxwarn
, warncount
);
2502 bOK
= (p_max
< 0.5);
2505 if (lincsd
->ncg_flex
)
2507 for (i
= 0; (i
< lincsd
->nc
); i
++)
2509 if (lincsd
->bllen0
[i
] == 0 && lincsd
->ddist
[i
] == 0)
2511 lincsd
->bllen
[i
] = 0;
2518 /* The OpenMP parallel region of constrain_lincs for derivatives */
2519 #pragma omp parallel num_threads(lincsd->ntask)
2523 int th
= gmx_omp_get_thread_num();
2525 do_lincsp(x
, xprime
, min_proj
, pbc
, lincsd
, th
,
2526 md
->invmass
, econq
, bCalcDHDL
,
2527 bCalcVir
, th
== 0 ? vir_r_m_dr
: lincsd
->task
[th
].vir_r_m_dr
);
2529 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
2535 /* Reduce the dH/dlambda contributions over the threads */
2540 for (th
= 0; th
< lincsd
->ntask
; th
++)
2542 dhdlambda
+= lincsd
->task
[th
].dhdlambda
;
2544 if (econq
== econqCoord
)
2546 /* dhdlambda contains dH/dlambda*dt^2, correct for this */
2547 /* TODO This should probably use invdt, so that sd integrator scaling works properly */
2548 dhdlambda
/= ir
->delta_t
*ir
->delta_t
;
2550 *dvdlambda
+= dhdlambda
;
2553 if (bCalcVir
&& lincsd
->ntask
> 1)
2555 for (i
= 1; i
< lincsd
->ntask
; i
++)
2557 m_add(vir_r_m_dr
, lincsd
->task
[i
].vir_r_m_dr
, vir_r_m_dr
);
2561 /* count assuming nit=1 */
2562 inc_nrnb(nrnb
, eNR_LINCS
, lincsd
->nc_real
);
2563 inc_nrnb(nrnb
, eNR_LINCSMAT
, (2+lincsd
->nOrder
)*lincsd
->ncc
);
2564 if (lincsd
->ntriangle
> 0)
2566 inc_nrnb(nrnb
, eNR_LINCSMAT
, lincsd
->nOrder
*lincsd
->ncc_triangle
);
2570 inc_nrnb(nrnb
, eNR_CONSTR_V
, lincsd
->nc_real
*2);
2574 inc_nrnb(nrnb
, eNR_CONSTR_VIR
, lincsd
->nc_real
);