2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief Defines LINCS code.
40 * \author Berk Hess <hess@kth.se>
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_mdlib
57 #include "gromacs/domdec/domdec.h"
58 #include "gromacs/domdec/domdec_struct.h"
59 #include "gromacs/gmxlib/nrnb.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdlib/constr.h"
64 #include "gromacs/mdlib/gmx_omp_nthreads.h"
65 #include "gromacs/mdlib/mdrun.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/mdtypes/inputrec.h"
68 #include "gromacs/mdtypes/md_enums.h"
69 #include "gromacs/mdtypes/mdatom.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/pbcutil/pbc-simd.h"
72 #include "gromacs/simd/simd.h"
73 #include "gromacs/simd/simd_math.h"
74 #include "gromacs/simd/vector_operations.h"
75 #include "gromacs/topology/block.h"
76 #include "gromacs/topology/mtop_util.h"
77 #include "gromacs/utility/basedefinitions.h"
78 #include "gromacs/utility/bitmask.h"
79 #include "gromacs/utility/cstringutil.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/gmxomp.h"
83 #include "gromacs/utility/pleasecite.h"
84 #include "gromacs/utility/smalloc.h"
86 using namespace gmx
; // TODO: Remove when this file is moved into gmx namespace
91 //! Unit of work within LINCS.
94 //! First constraint for this task.
96 //! b1-1 is the last constraint for this task.
98 //! The number of constraints in triangles.
100 //! The list of triangle constraints.
102 //! The bits tell if the matrix element should be used.
104 //! Allocation size of triangle and tri_bits.
106 //! Number of indices.
108 //! Constraint index for updating atom data.
110 //! Number of indices.
112 //! Constraint index for updating atom data.
114 //! Allocation size of ind and ind_r.
116 //! Temporary variable for virial calculation.
118 //! Temporary variable for lambda derivative.
122 /*! \brief Data for LINCS algorithm.
127 //! The global number of constraints.
129 //! The global number of flexible constraints.
131 //! The global number of constraints in triangles.
133 //! The number of iterations.
135 //! The order of the matrix expansion.
137 //! The maximum number of constraints connected to a single atom.
140 //! The number of real constraints.
142 //! The number of constraints including padding for SIMD.
144 //! The number we allocated memory for.
146 //! The number of constraint connections.
148 //! The number we allocated memory for.
150 //! The FE lambda value used for filling blc and blmf.
152 //! mapping from topology to LINCS constraints.
154 //! The reference distance in topology A.
156 //! The reference distance in top B - the r.d. in top A.
158 //! The atom pairs involved in the constraints.
160 //! 1/sqrt(invmass1 invmass2).
162 //! As blc, but with all masses 1.
164 //! Index into blbnb and blmf.
166 //! List of constraint connections.
168 //! The local number of constraints in triangles.
170 //! The number of constraint connections in triangles.
172 //! Communicate before each LINCS interation.
174 //! Matrix of mass factors for constraint connections.
176 //! As blmf, but with all masses 1.
178 //! The reference bond length.
180 //! The local atom count per constraint, can be NULL.
183 /*! \brief The number of tasks used for LINCS work.
185 * \todo This is mostly used to loop over \c task, which would
186 * be nicer to do with range-based for loops, but the thread
187 * index is used for constructing bit masks and organizing the
188 * virial output buffer, so other things need to change,
191 /*! \brief LINCS thread division */
193 //! Atom flags for thread parallelization.
195 //! Allocation size of atf
197 //! Are the LINCS tasks interdependent?
199 //! Are there triangle constraints that cross task borders?
201 //! Arrays for temporary storage in the LINCS algorithm.
210 //! The Lagrange multipliers times -1.
212 //! Storage for the constraint RMS relative deviation output.
216 /*! \brief Define simd_width for memory allocation used for SIMD code */
217 #if GMX_SIMD_HAVE_REAL
218 static const int simd_width
= GMX_SIMD_REAL_WIDTH
;
220 static const int simd_width
= 1;
223 /*! \brief Align to 128 bytes, consistent with the current implementation of
224 AlignedAllocator, which currently forces 128 byte alignment. */
225 static const int align_bytes
= 128;
227 real
*lincs_rmsd_data(Lincs
*lincsd
)
229 return lincsd
->rmsd_data
;
232 real
lincs_rmsd(const Lincs
*lincsd
)
234 if (lincsd
->rmsd_data
[0] > 0)
236 return std::sqrt(lincsd
->rmsd_data
[1]/lincsd
->rmsd_data
[0]);
244 /*! \brief Do a set of nrec LINCS matrix multiplications.
246 * This function will return with up to date thread-local
247 * constraint data, without an OpenMP barrier.
249 static void lincs_matrix_expand(const Lincs
*lincsd
,
252 real
*rhs1
, real
*rhs2
, real
*sol
)
254 int b0
, b1
, nrec
, rec
;
255 const int *blnr
= lincsd
->blnr
;
256 const int *blbnb
= lincsd
->blbnb
;
260 nrec
= lincsd
->nOrder
;
262 for (rec
= 0; rec
< nrec
; rec
++)
266 if (lincsd
->bTaskDep
)
270 for (b
= b0
; b
< b1
; b
++)
276 for (n
= blnr
[b
]; n
< blnr
[b
+1]; n
++)
278 mvb
= mvb
+ blcc
[n
]*rhs1
[blbnb
[n
]];
281 sol
[b
] = sol
[b
] + mvb
;
289 } /* nrec*(ncons+2*nrtot) flops */
291 if (lincsd
->ntriangle
> 0)
293 /* Perform an extra nrec recursions for only the constraints
294 * involved in rigid triangles.
295 * In this way their accuracy should come close to those of the other
296 * constraints, since traingles of constraints can produce eigenvalues
297 * around 0.7, while the effective eigenvalue for bond constraints
298 * is around 0.4 (and 0.7*0.7=0.5).
301 if (lincsd
->bTaskDep
)
303 /* We need a barrier here, since other threads might still be
304 * reading the contents of rhs1 and/o rhs2.
305 * We could avoid this barrier by introducing two extra rhs
306 * arrays for the triangle constraints only.
311 /* Constraints involved in a triangle are ensured to be in the same
312 * LINCS task. This means no barriers are required during the extra
313 * iterations for the triangle constraints.
315 const int *triangle
= li_task
->triangle
;
316 const int *tri_bits
= li_task
->tri_bits
;
318 for (rec
= 0; rec
< nrec
; rec
++)
322 for (tb
= 0; tb
< li_task
->ntriangle
; tb
++)
324 int b
, bits
, nr0
, nr1
, n
;
332 for (n
= nr0
; n
< nr1
; n
++)
334 if (bits
& (1 << (n
- nr0
)))
336 mvb
= mvb
+ blcc
[n
]*rhs1
[blbnb
[n
]];
340 sol
[b
] = sol
[b
] + mvb
;
348 } /* nrec*(ntriangle + ncc_triangle*2) flops */
350 if (lincsd
->bTaskDepTri
)
352 /* The constraints triangles are decoupled from each other,
353 * but constraints in one triangle cross thread task borders.
354 * We could probably avoid this with more advanced setup code.
361 //! Update atomic coordinates when an index is not required.
362 static void lincs_update_atoms_noind(int ncons
, const int *bla
,
364 const real
*fac
, rvec
*r
,
369 real mvb
, im1
, im2
, tmp0
, tmp1
, tmp2
;
371 if (invmass
!= nullptr)
373 for (b
= 0; b
< ncons
; b
++)
389 } /* 16 ncons flops */
393 for (b
= 0; b
< ncons
; b
++)
411 //! Update atomic coordinates when an index is required.
412 static void lincs_update_atoms_ind(int ncons
, const int *ind
, const int *bla
,
414 const real
*fac
, rvec
*r
,
419 real mvb
, im1
, im2
, tmp0
, tmp1
, tmp2
;
421 if (invmass
!= nullptr)
423 for (bi
= 0; bi
< ncons
; bi
++)
440 } /* 16 ncons flops */
444 for (bi
= 0; bi
< ncons
; bi
++)
459 } /* 16 ncons flops */
463 //! Update coordinates for atoms.
464 static void lincs_update_atoms(Lincs
*li
, int th
,
466 const real
*fac
, rvec
*r
,
472 /* Single thread, we simply update for all constraints */
473 lincs_update_atoms_noind(li
->nc_real
,
474 li
->bla
, prefac
, fac
, r
, invmass
, x
);
478 /* Update the atom vector components for our thread local
479 * constraints that only access our local atom range.
480 * This can be done without a barrier.
482 lincs_update_atoms_ind(li
->task
[th
].nind
, li
->task
[th
].ind
,
483 li
->bla
, prefac
, fac
, r
, invmass
, x
);
485 if (li
->task
[li
->ntask
].nind
> 0)
487 /* Update the constraints that operate on atoms
488 * in multiple thread atom blocks on the master thread.
493 lincs_update_atoms_ind(li
->task
[li
->ntask
].nind
,
494 li
->task
[li
->ntask
].ind
,
495 li
->bla
, prefac
, fac
, r
, invmass
, x
);
501 #if GMX_SIMD_HAVE_REAL
502 /*! \brief Calculate the constraint distance vectors r to project on from x.
504 * Determine the right-hand side of the matrix equation using quantity f.
505 * This function only differs from calc_dr_x_xp_simd below in that
506 * no constraint length is subtracted and no PBC is used for f. */
507 static void gmx_simdcall
508 calc_dr_x_f_simd(int b0
,
511 const rvec
* gmx_restrict x
,
512 const rvec
* gmx_restrict f
,
513 const real
* gmx_restrict blc
,
514 const real
* pbc_simd
,
515 rvec
* gmx_restrict r
,
516 real
* gmx_restrict rhs
,
517 real
* gmx_restrict sol
)
519 assert(b0
% GMX_SIMD_REAL_WIDTH
== 0);
521 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset2
[GMX_SIMD_REAL_WIDTH
];
523 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
528 for (int bs
= b0
; bs
< b1
; bs
+= GMX_SIMD_REAL_WIDTH
)
530 SimdReal x0_S
, y0_S
, z0_S
;
531 SimdReal x1_S
, y1_S
, z1_S
;
532 SimdReal rx_S
, ry_S
, rz_S
, n2_S
, il_S
;
533 SimdReal fx_S
, fy_S
, fz_S
, ip_S
, rhs_S
;
534 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset0
[GMX_SIMD_REAL_WIDTH
];
535 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset1
[GMX_SIMD_REAL_WIDTH
];
537 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
539 offset0
[i
] = bla
[bs
*2 + i
*2];
540 offset1
[i
] = bla
[bs
*2 + i
*2 + 1];
543 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset0
, &x0_S
, &y0_S
, &z0_S
);
544 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset1
, &x1_S
, &y1_S
, &z1_S
);
549 pbc_correct_dx_simd(&rx_S
, &ry_S
, &rz_S
, pbc_simd
);
551 n2_S
= norm2(rx_S
, ry_S
, rz_S
);
552 il_S
= invsqrt(n2_S
);
558 transposeScatterStoreU
<3>(reinterpret_cast<real
*>(r
+ bs
), offset2
, rx_S
, ry_S
, rz_S
);
560 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(f
), offset0
, &x0_S
, &y0_S
, &z0_S
);
561 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(f
), offset1
, &x1_S
, &y1_S
, &z1_S
);
566 ip_S
= iprod(rx_S
, ry_S
, rz_S
, fx_S
, fy_S
, fz_S
);
568 rhs_S
= load
<SimdReal
>(blc
+ bs
) * ip_S
;
570 store(rhs
+ bs
, rhs_S
);
571 store(sol
+ bs
, rhs_S
);
574 #endif // GMX_SIMD_HAVE_REAL
576 /*! \brief LINCS projection, works on derivatives of the coordinates. */
577 static void do_lincsp(const rvec
*x
, rvec
*f
, rvec
*fp
, t_pbc
*pbc
,
578 Lincs
*lincsd
, int th
,
580 ConstraintVariable econq
, bool bCalcDHDL
,
581 bool bCalcVir
, tensor rmdf
)
584 int *bla
, *blnr
, *blbnb
;
586 real
*blc
, *blmf
, *blcc
, *rhs1
, *rhs2
, *sol
;
588 b0
= lincsd
->task
[th
].b0
;
589 b1
= lincsd
->task
[th
].b1
;
594 blbnb
= lincsd
->blbnb
;
595 if (econq
!= ConstraintVariable::Force
)
597 /* Use mass-weighted parameters */
603 /* Use non mass-weighted parameters */
605 blmf
= lincsd
->blmf1
;
607 blcc
= lincsd
->tmpncc
;
612 #if GMX_SIMD_HAVE_REAL
613 /* This SIMD code does the same as the plain-C code after the #else.
614 * The only difference is that we always call pbc code, as with SIMD
615 * the overhead of pbc computation (when not needed) is small.
617 alignas(GMX_SIMD_ALIGNMENT
) real pbc_simd
[9*GMX_SIMD_REAL_WIDTH
];
619 /* Convert the pbc struct for SIMD */
620 set_pbc_simd(pbc
, pbc_simd
);
622 /* Compute normalized x i-j vectors, store in r.
623 * Compute the inner product of r and xp i-j and store in rhs1.
625 calc_dr_x_f_simd(b0
, b1
, bla
, x
, f
, blc
,
629 #else // GMX_SIMD_HAVE_REAL
631 /* Compute normalized i-j vectors */
634 for (b
= b0
; b
< b1
; b
++)
638 pbc_dx_aiuc(pbc
, x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
644 for (b
= b0
; b
< b1
; b
++)
648 rvec_sub(x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
650 } /* 16 ncons flops */
653 for (b
= b0
; b
< b1
; b
++)
660 mvb
= blc
[b
]*(r
[b
][0]*(f
[i
][0] - f
[j
][0]) +
661 r
[b
][1]*(f
[i
][1] - f
[j
][1]) +
662 r
[b
][2]*(f
[i
][2] - f
[j
][2]));
668 #endif // GMX_SIMD_HAVE_REAL
670 if (lincsd
->bTaskDep
)
672 /* We need a barrier, since the matrix construction below
673 * can access entries in r of other threads.
678 /* Construct the (sparse) LINCS matrix */
679 for (b
= b0
; b
< b1
; b
++)
683 for (n
= blnr
[b
]; n
< blnr
[b
+1]; n
++)
685 blcc
[n
] = blmf
[n
]*::iprod(r
[b
], r
[blbnb
[n
]]);
688 /* Together: 23*ncons + 6*nrtot flops */
690 lincs_matrix_expand(lincsd
, &lincsd
->task
[th
], blcc
, rhs1
, rhs2
, sol
);
691 /* nrec*(ncons+2*nrtot) flops */
693 if (econq
== ConstraintVariable::Deriv_FlexCon
)
695 /* We only want to constraint the flexible constraints,
696 * so we mask out the normal ones by setting sol to 0.
698 for (b
= b0
; b
< b1
; b
++)
700 if (!(lincsd
->bllen0
[b
] == 0 && lincsd
->ddist
[b
] == 0))
707 /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
708 for (b
= b0
; b
< b1
; b
++)
713 /* When constraining forces, we should not use mass weighting,
714 * so we pass invmass=NULL, which results in the use of 1 for all atoms.
716 lincs_update_atoms(lincsd
, th
, 1.0, sol
, r
,
717 (econq
!= ConstraintVariable::Force
) ? invmass
: nullptr, fp
);
724 for (b
= b0
; b
< b1
; b
++)
726 dhdlambda
-= sol
[b
]*lincsd
->ddist
[b
];
729 lincsd
->task
[th
].dhdlambda
= dhdlambda
;
734 /* Constraint virial,
735 * determines sum r_bond x delta f,
736 * where delta f is the constraint correction
737 * of the quantity that is being constrained.
739 for (b
= b0
; b
< b1
; b
++)
744 mvb
= lincsd
->bllen
[b
]*sol
[b
];
745 for (i
= 0; i
< DIM
; i
++)
748 for (j
= 0; j
< DIM
; j
++)
750 rmdf
[i
][j
] += tmp1
*r
[b
][j
];
753 } /* 23 ncons flops */
757 #if GMX_SIMD_HAVE_REAL
758 /*! \brief Calculate the constraint distance vectors r to project on from x.
760 * Determine the right-hand side of the matrix equation using coordinates xp. */
761 static void gmx_simdcall
762 calc_dr_x_xp_simd(int b0
,
765 const rvec
* gmx_restrict x
,
766 const rvec
* gmx_restrict xp
,
767 const real
* gmx_restrict bllen
,
768 const real
* gmx_restrict blc
,
769 const real
* pbc_simd
,
770 rvec
* gmx_restrict r
,
771 real
* gmx_restrict rhs
,
772 real
* gmx_restrict sol
)
774 assert(b0
% GMX_SIMD_REAL_WIDTH
== 0);
775 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset2
[GMX_SIMD_REAL_WIDTH
];
777 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
782 for (int bs
= b0
; bs
< b1
; bs
+= GMX_SIMD_REAL_WIDTH
)
784 SimdReal x0_S
, y0_S
, z0_S
;
785 SimdReal x1_S
, y1_S
, z1_S
;
786 SimdReal rx_S
, ry_S
, rz_S
, n2_S
, il_S
;
787 SimdReal rxp_S
, ryp_S
, rzp_S
, ip_S
, rhs_S
;
788 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset0
[GMX_SIMD_REAL_WIDTH
];
789 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset1
[GMX_SIMD_REAL_WIDTH
];
791 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
793 offset0
[i
] = bla
[bs
*2 + i
*2];
794 offset1
[i
] = bla
[bs
*2 + i
*2 + 1];
797 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset0
, &x0_S
, &y0_S
, &z0_S
);
798 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset1
, &x1_S
, &y1_S
, &z1_S
);
803 pbc_correct_dx_simd(&rx_S
, &ry_S
, &rz_S
, pbc_simd
);
805 n2_S
= norm2(rx_S
, ry_S
, rz_S
);
806 il_S
= invsqrt(n2_S
);
812 transposeScatterStoreU
<3>(reinterpret_cast<real
*>(r
+ bs
), offset2
, rx_S
, ry_S
, rz_S
);
814 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(xp
), offset0
, &x0_S
, &y0_S
, &z0_S
);
815 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(xp
), offset1
, &x1_S
, &y1_S
, &z1_S
);
820 pbc_correct_dx_simd(&rxp_S
, &ryp_S
, &rzp_S
, pbc_simd
);
822 ip_S
= iprod(rx_S
, ry_S
, rz_S
, rxp_S
, ryp_S
, rzp_S
);
824 rhs_S
= load
<SimdReal
>(blc
+ bs
) * (ip_S
- load
<SimdReal
>(bllen
+ bs
));
826 store(rhs
+ bs
, rhs_S
);
827 store(sol
+ bs
, rhs_S
);
830 #endif // GMX_SIMD_HAVE_REAL
832 /*! \brief Determine the distances and right-hand side for the next iteration. */
833 gmx_unused
static void calc_dist_iter(
837 const rvec
* gmx_restrict xp
,
838 const real
* gmx_restrict bllen
,
839 const real
* gmx_restrict blc
,
842 real
* gmx_restrict rhs
,
843 real
* gmx_restrict sol
,
848 for (b
= b0
; b
< b1
; b
++)
850 real len
, len2
, dlen2
, mvb
;
856 pbc_dx_aiuc(pbc
, xp
[bla
[2*b
]], xp
[bla
[2*b
+1]], dx
);
860 rvec_sub(xp
[bla
[2*b
]], xp
[bla
[2*b
+1]], dx
);
863 dlen2
= 2*len2
- ::norm2(dx
);
864 if (dlen2
< wfac
*len2
)
866 /* not race free - see detailed comment in caller */
871 mvb
= blc
[b
]*(len
- dlen2
*gmx::invsqrt(dlen2
));
879 } /* 20*ncons flops */
882 #if GMX_SIMD_HAVE_REAL
883 /*! \brief As calc_dist_iter(), but using SIMD intrinsics. */
884 static void gmx_simdcall
885 calc_dist_iter_simd(int b0
,
888 const rvec
* gmx_restrict x
,
889 const real
* gmx_restrict bllen
,
890 const real
* gmx_restrict blc
,
891 const real
* pbc_simd
,
893 real
* gmx_restrict rhs
,
894 real
* gmx_restrict sol
,
897 SimdReal
min_S(GMX_REAL_MIN
);
899 SimdReal
wfac_S(wfac
);
904 assert(b0
% GMX_SIMD_REAL_WIDTH
== 0);
906 /* Initialize all to FALSE */
907 warn_S
= (two_S
< setZero());
909 for (bs
= b0
; bs
< b1
; bs
+= GMX_SIMD_REAL_WIDTH
)
911 SimdReal x0_S
, y0_S
, z0_S
;
912 SimdReal x1_S
, y1_S
, z1_S
;
913 SimdReal rx_S
, ry_S
, rz_S
, n2_S
;
914 SimdReal len_S
, len2_S
, dlen2_S
, lc_S
, blc_S
;
915 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset0
[GMX_SIMD_REAL_WIDTH
];
916 alignas(GMX_SIMD_ALIGNMENT
) std::int32_t offset1
[GMX_SIMD_REAL_WIDTH
];
918 for (int i
= 0; i
< GMX_SIMD_REAL_WIDTH
; i
++)
920 offset0
[i
] = bla
[bs
*2 + i
*2];
921 offset1
[i
] = bla
[bs
*2 + i
*2 + 1];
924 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset0
, &x0_S
, &y0_S
, &z0_S
);
925 gatherLoadUTranspose
<3>(reinterpret_cast<const real
*>(x
), offset1
, &x1_S
, &y1_S
, &z1_S
);
930 pbc_correct_dx_simd(&rx_S
, &ry_S
, &rz_S
, pbc_simd
);
932 n2_S
= norm2(rx_S
, ry_S
, rz_S
);
934 len_S
= load
<SimdReal
>(bllen
+ bs
);
935 len2_S
= len_S
* len_S
;
937 dlen2_S
= fms(two_S
, len2_S
, n2_S
);
939 warn_S
= warn_S
|| (dlen2_S
< (wfac_S
* len2_S
));
941 /* Avoid 1/0 by taking the max with REAL_MIN.
942 * Note: when dlen2 is close to zero (90 degree constraint rotation),
943 * the accuracy of the algorithm is no longer relevant.
945 dlen2_S
= max(dlen2_S
, min_S
);
947 lc_S
= fnma(dlen2_S
, invsqrt(dlen2_S
), len_S
);
949 blc_S
= load
<SimdReal
>(blc
+ bs
);
953 store(rhs
+ bs
, lc_S
);
954 store(sol
+ bs
, lc_S
);
962 #endif // GMX_SIMD_HAVE_REAL
964 //! Implements LINCS constraining.
965 static void do_lincs(const rvec
*x
, rvec
*xp
, matrix box
, t_pbc
*pbc
,
966 Lincs
*lincsd
, int th
,
970 real wangle
, bool *bWarn
,
971 real invdt
, rvec
* gmx_restrict v
,
972 bool bCalcVir
, tensor vir_r_m_dr
)
974 int b0
, b1
, b
, i
, j
, n
, iter
;
975 int *bla
, *blnr
, *blbnb
;
977 real
*blc
, *blmf
, *bllen
, *blcc
, *rhs1
, *rhs2
, *sol
, *blc_sol
, *mlambda
;
980 b0
= lincsd
->task
[th
].b0
;
981 b1
= lincsd
->task
[th
].b1
;
986 blbnb
= lincsd
->blbnb
;
989 bllen
= lincsd
->bllen
;
990 blcc
= lincsd
->tmpncc
;
994 blc_sol
= lincsd
->tmp4
;
995 mlambda
= lincsd
->mlambda
;
996 nlocat
= lincsd
->nlocat
;
998 #if GMX_SIMD_HAVE_REAL
1000 /* This SIMD code does the same as the plain-C code after the #else.
1001 * The only difference is that we always call pbc code, as with SIMD
1002 * the overhead of pbc computation (when not needed) is small.
1004 alignas(GMX_SIMD_ALIGNMENT
) real pbc_simd
[9*GMX_SIMD_REAL_WIDTH
];
1006 /* Convert the pbc struct for SIMD */
1007 set_pbc_simd(pbc
, pbc_simd
);
1009 /* Compute normalized x i-j vectors, store in r.
1010 * Compute the inner product of r and xp i-j and store in rhs1.
1012 calc_dr_x_xp_simd(b0
, b1
, bla
, x
, xp
, bllen
, blc
,
1016 #else // GMX_SIMD_HAVE_REAL
1020 /* Compute normalized i-j vectors */
1021 for (b
= b0
; b
< b1
; b
++)
1026 pbc_dx_aiuc(pbc
, x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
1029 pbc_dx_aiuc(pbc
, xp
[bla
[2*b
]], xp
[bla
[2*b
+1]], dx
);
1030 mvb
= blc
[b
]*(::iprod(r
[b
], dx
) - bllen
[b
]);
1037 /* Compute normalized i-j vectors */
1038 for (b
= b0
; b
< b1
; b
++)
1040 real tmp0
, tmp1
, tmp2
, rlen
, mvb
;
1044 tmp0
= x
[i
][0] - x
[j
][0];
1045 tmp1
= x
[i
][1] - x
[j
][1];
1046 tmp2
= x
[i
][2] - x
[j
][2];
1047 rlen
= gmx::invsqrt(tmp0
*tmp0
+ tmp1
*tmp1
+ tmp2
*tmp2
);
1048 r
[b
][0] = rlen
*tmp0
;
1049 r
[b
][1] = rlen
*tmp1
;
1050 r
[b
][2] = rlen
*tmp2
;
1051 /* 16 ncons flops */
1055 mvb
= blc
[b
]*(r
[b
][0]*(xp
[i
][0] - xp
[j
][0]) +
1056 r
[b
][1]*(xp
[i
][1] - xp
[j
][1]) +
1057 r
[b
][2]*(xp
[i
][2] - xp
[j
][2]) - bllen
[b
]);
1062 /* Together: 26*ncons + 6*nrtot flops */
1065 #endif // GMX_SIMD_HAVE_REAL
1067 if (lincsd
->bTaskDep
)
1069 /* We need a barrier, since the matrix construction below
1070 * can access entries in r of other threads.
1075 /* Construct the (sparse) LINCS matrix */
1076 for (b
= b0
; b
< b1
; b
++)
1078 for (n
= blnr
[b
]; n
< blnr
[b
+1]; n
++)
1080 blcc
[n
] = blmf
[n
]*::iprod(r
[b
], r
[blbnb
[n
]]);
1083 /* Together: 26*ncons + 6*nrtot flops */
1085 lincs_matrix_expand(lincsd
, &lincsd
->task
[th
], blcc
, rhs1
, rhs2
, sol
);
1086 /* nrec*(ncons+2*nrtot) flops */
1088 #if GMX_SIMD_HAVE_REAL
1089 for (b
= b0
; b
< b1
; b
+= GMX_SIMD_REAL_WIDTH
)
1091 SimdReal t1
= load
<SimdReal
>(blc
+ b
);
1092 SimdReal t2
= load
<SimdReal
>(sol
+ b
);
1093 store(mlambda
+ b
, t1
* t2
);
1096 for (b
= b0
; b
< b1
; b
++)
1098 mlambda
[b
] = blc
[b
]*sol
[b
];
1100 #endif // GMX_SIMD_HAVE_REAL
1102 /* Update the coordinates */
1103 lincs_update_atoms(lincsd
, th
, 1.0, mlambda
, r
, invmass
, xp
);
1106 ******** Correction for centripetal effects ********
1111 wfac
= std::cos(DEG2RAD
*wangle
);
1114 for (iter
= 0; iter
< lincsd
->nIter
; iter
++)
1116 if ((lincsd
->bCommIter
&& DOMAINDECOMP(cr
) && cr
->dd
->constraints
))
1121 /* Communicate the corrected non-local coordinates */
1122 if (DOMAINDECOMP(cr
))
1124 dd_move_x_constraints(cr
->dd
, box
, xp
, nullptr, FALSE
);
1129 else if (lincsd
->bTaskDep
)
1134 #if GMX_SIMD_HAVE_REAL
1135 calc_dist_iter_simd(b0
, b1
, bla
, xp
, bllen
, blc
, pbc_simd
, wfac
,
1138 calc_dist_iter(b0
, b1
, bla
, xp
, bllen
, blc
, pbc
, wfac
,
1140 /* 20*ncons flops */
1141 #endif // GMX_SIMD_HAVE_REAL
1143 lincs_matrix_expand(lincsd
, &lincsd
->task
[th
], blcc
, rhs1
, rhs2
, sol
);
1144 /* nrec*(ncons+2*nrtot) flops */
1146 #if GMX_SIMD_HAVE_REAL
1147 for (b
= b0
; b
< b1
; b
+= GMX_SIMD_REAL_WIDTH
)
1149 SimdReal t1
= load
<SimdReal
>(blc
+ b
);
1150 SimdReal t2
= load
<SimdReal
>(sol
+ b
);
1151 SimdReal mvb
= t1
* t2
;
1152 store(blc_sol
+ b
, mvb
);
1153 store(mlambda
+ b
, load
<SimdReal
>(mlambda
+ b
) + mvb
);
1156 for (b
= b0
; b
< b1
; b
++)
1160 mvb
= blc
[b
]*sol
[b
];
1164 #endif // GMX_SIMD_HAVE_REAL
1166 /* Update the coordinates */
1167 lincs_update_atoms(lincsd
, th
, 1.0, blc_sol
, r
, invmass
, xp
);
1169 /* nit*ncons*(37+9*nrec) flops */
1173 /* Update the velocities */
1174 lincs_update_atoms(lincsd
, th
, invdt
, mlambda
, r
, invmass
, v
);
1175 /* 16 ncons flops */
1178 if (nlocat
!= nullptr && (bCalcDHDL
|| bCalcVir
))
1180 if (lincsd
->bTaskDep
)
1182 /* In lincs_update_atoms threads might cross-read mlambda */
1186 /* Only account for local atoms */
1187 for (b
= b0
; b
< b1
; b
++)
1189 mlambda
[b
] *= 0.5*nlocat
[b
];
1198 for (b
= b0
; b
< b1
; b
++)
1200 /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
1201 * later after the contributions are reduced over the threads.
1203 dhdl
-= lincsd
->mlambda
[b
]*lincsd
->ddist
[b
];
1205 lincsd
->task
[th
].dhdlambda
= dhdl
;
1210 /* Constraint virial */
1211 for (b
= b0
; b
< b1
; b
++)
1215 tmp0
= -bllen
[b
]*mlambda
[b
];
1216 for (i
= 0; i
< DIM
; i
++)
1218 tmp1
= tmp0
*r
[b
][i
];
1219 for (j
= 0; j
< DIM
; j
++)
1221 vir_r_m_dr
[i
][j
] -= tmp1
*r
[b
][j
];
1224 } /* 22 ncons flops */
1228 * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
1229 * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
1231 * (26+nrec)*ncons + (6+2*nrec)*nrtot
1232 * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
1234 * (63+nrec)*ncons + (6+4*nrec)*nrtot
1238 /*! \brief Sets the elements in the LINCS matrix for task task. */
1239 static void set_lincs_matrix_task(Lincs
*li
,
1241 const real
*invmass
,
1243 int *nCrossTaskTriangles
)
1247 /* Construct the coupling coefficient matrix blmf */
1248 li_task
->ntriangle
= 0;
1250 *nCrossTaskTriangles
= 0;
1251 for (i
= li_task
->b0
; i
< li_task
->b1
; i
++)
1256 a2
= li
->bla
[2*i
+1];
1257 for (n
= li
->blnr
[i
]; (n
< li
->blnr
[i
+1]); n
++)
1259 int k
, sign
, center
, end
;
1263 /* If we are using multiple, independent tasks for LINCS,
1264 * the calls to check_assign_connected should have
1265 * put all connected constraints in our task.
1267 assert(li
->bTaskDep
|| (k
>= li_task
->b0
&& k
< li_task
->b1
));
1269 if (a1
== li
->bla
[2*k
] || a2
== li
->bla
[2*k
+1])
1277 if (a1
== li
->bla
[2*k
] || a1
== li
->bla
[2*k
+1])
1287 li
->blmf
[n
] = sign
*invmass
[center
]*li
->blc
[i
]*li
->blc
[k
];
1288 li
->blmf1
[n
] = sign
*0.5;
1289 if (li
->ncg_triangle
> 0)
1293 /* Look for constraint triangles */
1294 for (nk
= li
->blnr
[k
]; (nk
< li
->blnr
[k
+1]); nk
++)
1297 if (kk
!= i
&& kk
!= k
&&
1298 (li
->bla
[2*kk
] == end
|| li
->bla
[2*kk
+1] == end
))
1300 /* Check if the constraints in this triangle actually
1301 * belong to a different task. We still assign them
1302 * here, since it's convenient for the triangle
1303 * iterations, but we then need an extra barrier.
1305 if (k
< li_task
->b0
|| k
>= li_task
->b1
||
1306 kk
< li_task
->b0
|| kk
>= li_task
->b1
)
1308 (*nCrossTaskTriangles
)++;
1311 if (li_task
->ntriangle
== 0 ||
1312 li_task
->triangle
[li_task
->ntriangle
- 1] < i
)
1314 /* Add this constraint to the triangle list */
1315 li_task
->triangle
[li_task
->ntriangle
] = i
;
1316 li_task
->tri_bits
[li_task
->ntriangle
] = 0;
1317 li_task
->ntriangle
++;
1318 if (li
->blnr
[i
+1] - li
->blnr
[i
] > static_cast<int>(sizeof(li_task
->tri_bits
[0])*8 - 1))
1320 gmx_fatal(FARGS
, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
1321 li
->blnr
[i
+1] - li
->blnr
[i
],
1322 sizeof(li_task
->tri_bits
[0])*8-1);
1325 li_task
->tri_bits
[li_task
->ntriangle
-1] |= (1 << (n
- li
->blnr
[i
]));
1334 /*! \brief Sets the elements in the LINCS matrix. */
1335 static void set_lincs_matrix(Lincs
*li
, real
*invmass
, real lambda
)
1338 const real invsqrt2
= 0.7071067811865475244;
1340 for (i
= 0; (i
< li
->nc
); i
++)
1345 a2
= li
->bla
[2*i
+1];
1346 li
->blc
[i
] = gmx::invsqrt(invmass
[a1
] + invmass
[a2
]);
1347 li
->blc1
[i
] = invsqrt2
;
1350 /* Construct the coupling coefficient matrix blmf */
1351 int th
, ntriangle
= 0, ncc_triangle
= 0, nCrossTaskTriangles
= 0;
1352 #pragma omp parallel for reduction(+: ntriangle, ncc_triangle, nCrossTaskTriangles) num_threads(li->ntask) schedule(static)
1353 for (th
= 0; th
< li
->ntask
; th
++)
1357 set_lincs_matrix_task(li
, &li
->task
[th
], invmass
,
1358 &ncc_triangle
, &nCrossTaskTriangles
);
1359 ntriangle
= li
->task
[th
].ntriangle
;
1361 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
1363 li
->ntriangle
= ntriangle
;
1364 li
->ncc_triangle
= ncc_triangle
;
1365 li
->bTaskDepTri
= (nCrossTaskTriangles
> 0);
1369 fprintf(debug
, "The %d constraints participate in %d triangles\n",
1370 li
->nc
, li
->ntriangle
);
1371 fprintf(debug
, "There are %d constraint couplings, of which %d in triangles\n",
1372 li
->ncc
, li
->ncc_triangle
);
1373 if (li
->ntriangle
> 0 && li
->ntask
> 1)
1375 fprintf(debug
, "%d constraint triangles contain constraints assigned to different tasks\n",
1376 nCrossTaskTriangles
);
1381 * so we know with which lambda value the masses have been set.
1383 li
->matlam
= lambda
;
1386 //! Finds all triangles of atoms that share constraints to a central atom.
1387 static int count_triangle_constraints(const t_ilist
*ilist
,
1388 const t_blocka
*at2con
)
1390 int ncon1
, ncon_tot
;
1391 int c0
, a00
, a01
, n1
, c1
, a10
, a11
, ac1
, n2
, c2
, a20
, a21
;
1394 t_iatom
*ia1
, *ia2
, *iap
;
1396 ncon1
= ilist
[F_CONSTR
].nr
/3;
1397 ncon_tot
= ncon1
+ ilist
[F_CONSTRNC
].nr
/3;
1399 ia1
= ilist
[F_CONSTR
].iatoms
;
1400 ia2
= ilist
[F_CONSTRNC
].iatoms
;
1403 for (c0
= 0; c0
< ncon_tot
; c0
++)
1406 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c0
);
1409 for (n1
= at2con
->index
[a01
]; n1
< at2con
->index
[a01
+1]; n1
++)
1414 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c1
);
1425 for (n2
= at2con
->index
[ac1
]; n2
< at2con
->index
[ac1
+1]; n2
++)
1428 if (c2
!= c0
&& c2
!= c1
)
1430 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c2
);
1433 if (a20
== a00
|| a21
== a00
)
1447 return ncon_triangle
;
1450 //! Finds sequences of sequential constraints.
1451 static bool more_than_two_sequential_constraints(const t_ilist
*ilist
,
1452 const t_blocka
*at2con
)
1454 t_iatom
*ia1
, *ia2
, *iap
;
1455 int ncon1
, ncon_tot
, c
;
1457 bool bMoreThanTwoSequentialConstraints
;
1459 ncon1
= ilist
[F_CONSTR
].nr
/3;
1460 ncon_tot
= ncon1
+ ilist
[F_CONSTRNC
].nr
/3;
1462 ia1
= ilist
[F_CONSTR
].iatoms
;
1463 ia2
= ilist
[F_CONSTRNC
].iatoms
;
1465 bMoreThanTwoSequentialConstraints
= FALSE
;
1466 for (c
= 0; c
< ncon_tot
&& !bMoreThanTwoSequentialConstraints
; c
++)
1468 iap
= constr_iatomptr(ncon1
, ia1
, ia2
, c
);
1471 /* Check if this constraint has constraints connected at both atoms */
1472 if (at2con
->index
[a1
+1] - at2con
->index
[a1
] > 1 &&
1473 at2con
->index
[a2
+1] - at2con
->index
[a2
] > 1)
1475 bMoreThanTwoSequentialConstraints
= TRUE
;
1479 return bMoreThanTwoSequentialConstraints
;
1482 //! Sorting helper function to compare two integers.
1483 static int int_comp(const void *a
, const void *b
)
1485 return (*(int *)a
) - (*(int *)b
);
1488 Lincs
*init_lincs(FILE *fplog
, const gmx_mtop_t
&mtop
,
1489 int nflexcon_global
, const t_blocka
*at2con
,
1490 bool bPLINCS
, int nIter
, int nProjOrder
)
1493 bool bMoreThanTwoSeq
;
1497 fprintf(fplog
, "\nInitializing%s LINear Constraint Solver\n",
1498 bPLINCS
? " Parallel" : "");
1504 gmx_mtop_ftype_count(mtop
, F_CONSTR
) +
1505 gmx_mtop_ftype_count(mtop
, F_CONSTRNC
);
1506 li
->ncg_flex
= nflexcon_global
;
1509 li
->nOrder
= nProjOrder
;
1511 li
->max_connect
= 0;
1512 for (size_t mt
= 0; mt
< mtop
.moltype
.size(); mt
++)
1514 for (int a
= 0; a
< mtop
.moltype
[mt
].atoms
.nr
; a
++)
1516 li
->max_connect
= std::max(li
->max_connect
,
1517 at2con
[mt
].index
[a
+ 1] - at2con
[mt
].index
[a
]);
1521 li
->ncg_triangle
= 0;
1522 bMoreThanTwoSeq
= FALSE
;
1523 for (const gmx_molblock_t
&molb
: mtop
.molblock
)
1525 const gmx_moltype_t
&molt
= mtop
.moltype
[molb
.type
];
1529 count_triangle_constraints(molt
.ilist
, &at2con
[molb
.type
]);
1531 if (!bMoreThanTwoSeq
&&
1532 more_than_two_sequential_constraints(molt
.ilist
, &at2con
[molb
.type
]))
1534 bMoreThanTwoSeq
= TRUE
;
1538 /* Check if we need to communicate not only before LINCS,
1539 * but also before each iteration.
1540 * The check for only two sequential constraints is only
1541 * useful for the common case of H-bond only constraints.
1542 * With more effort we could also make it useful for small
1543 * molecules with nr. sequential constraints <= nOrder-1.
1545 li
->bCommIter
= (bPLINCS
&& (li
->nOrder
< 1 || bMoreThanTwoSeq
));
1547 if (debug
&& bPLINCS
)
1549 fprintf(debug
, "PLINCS communication before each iteration: %d\n",
1553 /* LINCS can run on any number of threads.
1554 * Currently the number is fixed for the whole simulation,
1555 * but it could be set in set_lincs().
1556 * The current constraint to task assignment code can create independent
1557 * tasks only when not more than two constraints are connected sequentially.
1559 li
->ntask
= gmx_omp_nthreads_get(emntLINCS
);
1560 li
->bTaskDep
= (li
->ntask
> 1 && bMoreThanTwoSeq
);
1563 fprintf(debug
, "LINCS: using %d threads, tasks are %sdependent\n",
1564 li
->ntask
, li
->bTaskDep
? "" : "in");
1572 /* Allocate an extra elements for "task-overlap" constraints */
1573 snew(li
->task
, li
->ntask
+ 1);
1576 if (bPLINCS
|| li
->ncg_triangle
> 0)
1578 please_cite(fplog
, "Hess2008a");
1582 please_cite(fplog
, "Hess97a");
1587 fprintf(fplog
, "The number of constraints is %d\n", li
->ncg
);
1590 fprintf(fplog
, "There are inter charge-group constraints,\n"
1591 "will communicate selected coordinates each lincs iteration\n");
1593 if (li
->ncg_triangle
> 0)
1596 "%d constraints are involved in constraint triangles,\n"
1597 "will apply an additional matrix expansion of order %d for couplings\n"
1598 "between constraints inside triangles\n",
1599 li
->ncg_triangle
, li
->nOrder
);
1606 /*! \brief Sets up the work division over the threads. */
1607 static void lincs_thread_setup(Lincs
*li
, int natoms
)
1614 if (natoms
> li
->atf_nalloc
)
1616 li
->atf_nalloc
= over_alloc_large(natoms
);
1617 srenew(li
->atf
, li
->atf_nalloc
);
1621 /* Clear the atom flags */
1622 for (a
= 0; a
< natoms
; a
++)
1624 bitmask_clear(&atf
[a
]);
1627 if (li
->ntask
> BITMASK_SIZE
)
1629 gmx_fatal(FARGS
, "More than %d threads is not supported for LINCS.", BITMASK_SIZE
);
1632 for (th
= 0; th
< li
->ntask
; th
++)
1637 li_task
= &li
->task
[th
];
1639 /* For each atom set a flag for constraints from each */
1640 for (b
= li_task
->b0
; b
< li_task
->b1
; b
++)
1642 bitmask_set_bit(&atf
[li
->bla
[b
*2 ]], th
);
1643 bitmask_set_bit(&atf
[li
->bla
[b
*2 + 1]], th
);
1647 #pragma omp parallel for num_threads(li->ntask) schedule(static)
1648 for (th
= 0; th
< li
->ntask
; th
++)
1656 li_task
= &li
->task
[th
];
1658 if (li_task
->b1
- li_task
->b0
> li_task
->ind_nalloc
)
1660 li_task
->ind_nalloc
= over_alloc_large(li_task
->b1
-li_task
->b0
);
1661 srenew(li_task
->ind
, li_task
->ind_nalloc
);
1662 srenew(li_task
->ind_r
, li_task
->ind_nalloc
);
1665 bitmask_init_low_bits(&mask
, th
);
1668 li_task
->nind_r
= 0;
1669 for (b
= li_task
->b0
; b
< li_task
->b1
; b
++)
1671 /* We let the constraint with the lowest thread index
1672 * operate on atoms with constraints from multiple threads.
1674 if (bitmask_is_disjoint(atf
[li
->bla
[b
*2]], mask
) &&
1675 bitmask_is_disjoint(atf
[li
->bla
[b
*2+1]], mask
))
1677 /* Add the constraint to the local atom update index */
1678 li_task
->ind
[li_task
->nind
++] = b
;
1682 /* Add the constraint to the rest block */
1683 li_task
->ind_r
[li_task
->nind_r
++] = b
;
1687 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
1690 /* We need to copy all constraints which have not be assigned
1691 * to a thread to a separate list which will be handled by one thread.
1693 li_m
= &li
->task
[li
->ntask
];
1696 for (th
= 0; th
< li
->ntask
; th
++)
1701 li_task
= &li
->task
[th
];
1703 if (li_m
->nind
+ li_task
->nind_r
> li_m
->ind_nalloc
)
1705 li_m
->ind_nalloc
= over_alloc_large(li_m
->nind
+li_task
->nind_r
);
1706 srenew(li_m
->ind
, li_m
->ind_nalloc
);
1709 for (b
= 0; b
< li_task
->nind_r
; b
++)
1711 li_m
->ind
[li_m
->nind
++] = li_task
->ind_r
[b
];
1716 fprintf(debug
, "LINCS thread %d: %d constraints\n",
1723 fprintf(debug
, "LINCS thread r: %d constraints\n",
1728 /*! \brief There is no realloc with alignment, so here we make one for reals.
1729 * Note that this function does not preserve the contents of the memory.
1731 static void resize_real_aligned(real
**ptr
, int nelem
)
1733 sfree_aligned(*ptr
);
1734 snew_aligned(*ptr
, nelem
, align_bytes
);
1737 //! Assign a constraint.
1738 static void assign_constraint(Lincs
*li
,
1739 int constraint_index
,
1741 real lenA
, real lenB
,
1742 const t_blocka
*at2con
)
1748 /* Make an mapping of local topology constraint index to LINCS index */
1749 li
->con_index
[constraint_index
] = con
;
1751 li
->bllen0
[con
] = lenA
;
1752 li
->ddist
[con
] = lenB
- lenA
;
1753 /* Set the length to the topology A length */
1754 li
->bllen
[con
] = lenA
;
1755 li
->bla
[2*con
] = a1
;
1756 li
->bla
[2*con
+1] = a2
;
1758 /* Make space in the constraint connection matrix for constraints
1759 * connected to both end of the current constraint.
1762 at2con
->index
[a1
+ 1] - at2con
->index
[a1
] - 1 +
1763 at2con
->index
[a2
+ 1] - at2con
->index
[a2
] - 1;
1765 li
->blnr
[con
+ 1] = li
->ncc
;
1767 /* Increase the constraint count */
1771 /*! \brief Check if constraint with topology index constraint_index is connected
1772 * to other constraints, and if so add those connected constraints to our task. */
1773 static void check_assign_connected(Lincs
*li
,
1774 const t_iatom
*iatom
,
1778 const t_blocka
*at2con
)
1780 /* Currently this function only supports constraint groups
1781 * in which all constraints share at least one atom
1782 * (e.g. H-bond constraints).
1783 * Check both ends of the current constraint for
1784 * connected constraints. We need to assign those
1789 for (end
= 0; end
< 2; end
++)
1793 a
= (end
== 0 ? a1
: a2
);
1795 for (k
= at2con
->index
[a
]; k
< at2con
->index
[a
+ 1]; k
++)
1800 /* Check if constraint cc has not yet been assigned */
1801 if (li
->con_index
[cc
] == -1)
1807 lenA
= idef
.iparams
[type
].constr
.dA
;
1808 lenB
= idef
.iparams
[type
].constr
.dB
;
1810 if (bDynamics
|| lenA
!= 0 || lenB
!= 0)
1812 assign_constraint(li
, cc
, iatom
[3*cc
+ 1], iatom
[3*cc
+ 2], lenA
, lenB
, at2con
);
1819 /*! \brief Check if constraint with topology index constraint_index is involved
1820 * in a constraint triangle, and if so add the other two constraints
1821 * in the triangle to our task. */
1822 static void check_assign_triangle(Lincs
*li
,
1823 const t_iatom
*iatom
,
1826 int constraint_index
,
1828 const t_blocka
*at2con
)
1830 int nca
, cc
[32], ca
[32], k
;
1831 int c_triangle
[2] = { -1, -1 };
1834 for (k
= at2con
->index
[a1
]; k
< at2con
->index
[a1
+ 1]; k
++)
1839 if (c
!= constraint_index
)
1843 aa1
= iatom
[c
*3 + 1];
1844 aa2
= iatom
[c
*3 + 2];
1860 for (k
= at2con
->index
[a2
]; k
< at2con
->index
[a2
+ 1]; k
++)
1865 if (c
!= constraint_index
)
1869 aa1
= iatom
[c
*3 + 1];
1870 aa2
= iatom
[c
*3 + 2];
1873 for (i
= 0; i
< nca
; i
++)
1877 c_triangle
[0] = cc
[i
];
1884 for (i
= 0; i
< nca
; i
++)
1888 c_triangle
[0] = cc
[i
];
1896 if (c_triangle
[0] >= 0)
1900 for (end
= 0; end
< 2; end
++)
1902 /* Check if constraint c_triangle[end] has not yet been assigned */
1903 if (li
->con_index
[c_triangle
[end
]] == -1)
1908 i
= c_triangle
[end
]*3;
1910 lenA
= idef
.iparams
[type
].constr
.dA
;
1911 lenB
= idef
.iparams
[type
].constr
.dB
;
1913 if (bDynamics
|| lenA
!= 0 || lenB
!= 0)
1915 assign_constraint(li
, c_triangle
[end
], iatom
[i
+ 1], iatom
[i
+ 2], lenA
, lenB
, at2con
);
1922 //! Sets matrix indices.
1923 static void set_matrix_indices(Lincs
*li
,
1924 const Task
*li_task
,
1925 const t_blocka
*at2con
,
1930 for (b
= li_task
->b0
; b
< li_task
->b1
; b
++)
1935 a2
= li
->bla
[b
*2 + 1];
1938 for (k
= at2con
->index
[a1
]; k
< at2con
->index
[a1
+ 1]; k
++)
1942 concon
= li
->con_index
[at2con
->a
[k
]];
1945 li
->blbnb
[i
++] = concon
;
1948 for (k
= at2con
->index
[a2
]; k
< at2con
->index
[a2
+ 1]; k
++)
1952 concon
= li
->con_index
[at2con
->a
[k
]];
1955 li
->blbnb
[i
++] = concon
;
1961 /* Order the blbnb matrix to optimize memory access */
1962 qsort(&(li
->blbnb
[li
->blnr
[b
]]), li
->blnr
[b
+ 1] - li
->blnr
[b
],
1963 sizeof(li
->blbnb
[0]), int_comp
);
1968 void set_lincs(const t_idef
&idef
,
1969 const t_mdatoms
&md
,
1971 const t_commrec
*cr
,
1977 int i
, ncc_alloc_old
, ncon_tot
;
1982 /* Zero the thread index ranges.
1983 * Otherwise without local constraints we could return with old ranges.
1985 for (i
= 0; i
< li
->ntask
; i
++)
1989 li
->task
[i
].nind
= 0;
1993 li
->task
[li
->ntask
].nind
= 0;
1996 /* This is the local topology, so there are only F_CONSTR constraints */
1997 if (idef
.il
[F_CONSTR
].nr
== 0)
1999 /* There are no constraints,
2000 * we do not need to fill any data structures.
2007 fprintf(debug
, "Building the LINCS connectivity\n");
2010 if (DOMAINDECOMP(cr
))
2012 if (cr
->dd
->constraints
)
2016 dd_get_constraint_range(cr
->dd
, &start
, &natoms
);
2020 natoms
= cr
->dd
->nat_home
;
2028 at2con
= make_at2con(natoms
, idef
.il
, idef
.iparams
,
2029 flexibleConstraintTreatment(bDynamics
));
2031 ncon_tot
= idef
.il
[F_CONSTR
].nr
/3;
2033 /* Ensure we have enough padding for aligned loads for each thread */
2034 if (ncon_tot
+ li
->ntask
*simd_width
> li
->nc_alloc
|| li
->nc_alloc
== 0)
2036 li
->nc_alloc
= over_alloc_dd(ncon_tot
+ li
->ntask
*simd_width
);
2037 srenew(li
->con_index
, li
->nc_alloc
);
2038 resize_real_aligned(&li
->bllen0
, li
->nc_alloc
);
2039 resize_real_aligned(&li
->ddist
, li
->nc_alloc
);
2040 srenew(li
->bla
, 2*li
->nc_alloc
);
2041 resize_real_aligned(&li
->blc
, li
->nc_alloc
);
2042 resize_real_aligned(&li
->blc1
, li
->nc_alloc
);
2043 srenew(li
->blnr
, li
->nc_alloc
+ 1);
2044 resize_real_aligned(&li
->bllen
, li
->nc_alloc
);
2045 srenew(li
->tmpv
, li
->nc_alloc
);
2046 if (DOMAINDECOMP(cr
))
2048 srenew(li
->nlocat
, li
->nc_alloc
);
2050 resize_real_aligned(&li
->tmp1
, li
->nc_alloc
);
2051 resize_real_aligned(&li
->tmp2
, li
->nc_alloc
);
2052 resize_real_aligned(&li
->tmp3
, li
->nc_alloc
);
2053 resize_real_aligned(&li
->tmp4
, li
->nc_alloc
);
2054 resize_real_aligned(&li
->mlambda
, li
->nc_alloc
);
2057 iatom
= idef
.il
[F_CONSTR
].iatoms
;
2059 ncc_alloc_old
= li
->ncc_alloc
;
2060 li
->blnr
[0] = li
->ncc
;
2062 /* Assign the constraints for li->ntask LINCS tasks.
2063 * We target a uniform distribution of constraints over the tasks.
2064 * Note that when flexible constraints are present, but are removed here
2065 * (e.g. because we are doing EM) we get imbalance, but since that doesn't
2066 * happen during normal MD, that's ok.
2068 int ncon_assign
, ncon_target
, con
, th
;
2070 /* Determine the number of constraints we need to assign here */
2071 ncon_assign
= ncon_tot
;
2074 /* With energy minimization, flexible constraints are ignored
2075 * (and thus minimized, as they should be).
2077 ncon_assign
-= countFlexibleConstraints(idef
.il
, idef
.iparams
);
2080 /* Set the target constraint count per task to exactly uniform,
2081 * this might be overridden below.
2083 ncon_target
= (ncon_assign
+ li
->ntask
- 1)/li
->ntask
;
2085 /* Mark all constraints as unassigned by setting their index to -1 */
2086 for (con
= 0; con
< ncon_tot
; con
++)
2088 li
->con_index
[con
] = -1;
2092 for (th
= 0; th
< li
->ntask
; th
++)
2096 li_task
= &li
->task
[th
];
2098 #if GMX_SIMD_HAVE_REAL
2099 /* With indepedent tasks we likely have H-bond constraints or constraint
2100 * pairs. The connected constraints will be pulled into the task, so the
2101 * constraints per task will often exceed ncon_target.
2102 * Triangle constraints can also increase the count, but there are
2103 * relatively few of those, so we usually expect to get ncon_target.
2107 /* We round ncon_target to a multiple of GMX_SIMD_WIDTH,
2108 * since otherwise a lot of operations can be wasted.
2109 * There are several ways to round here, we choose the one
2110 * that alternates block sizes, which helps with Intel HT.
2112 ncon_target
= ((ncon_assign
*(th
+ 1))/li
->ntask
- li
->nc_real
+ GMX_SIMD_REAL_WIDTH
- 1) & ~(GMX_SIMD_REAL_WIDTH
- 1);
2114 #endif // GMX_SIMD==2 && GMX_SIMD_HAVE_REAL
2116 /* Continue filling the arrays where we left off with the previous task,
2117 * including padding for SIMD.
2119 li_task
->b0
= li
->nc
;
2121 while (con
< ncon_tot
&& li
->nc
- li_task
->b0
< ncon_target
)
2123 if (li
->con_index
[con
] == -1)
2128 type
= iatom
[3*con
];
2129 a1
= iatom
[3*con
+ 1];
2130 a2
= iatom
[3*con
+ 2];
2131 lenA
= idef
.iparams
[type
].constr
.dA
;
2132 lenB
= idef
.iparams
[type
].constr
.dB
;
2133 /* Skip the flexible constraints when not doing dynamics */
2134 if (bDynamics
|| lenA
!= 0 || lenB
!= 0)
2136 assign_constraint(li
, con
, a1
, a2
, lenA
, lenB
, &at2con
);
2138 if (li
->ntask
> 1 && !li
->bTaskDep
)
2140 /* We can generate independent tasks. Check if we
2141 * need to assign connected constraints to our task.
2143 check_assign_connected(li
, iatom
, idef
, bDynamics
,
2146 if (li
->ntask
> 1 && li
->ncg_triangle
> 0)
2148 /* Ensure constraints in one triangle are assigned
2151 check_assign_triangle(li
, iatom
, idef
, bDynamics
,
2152 con
, a1
, a2
, &at2con
);
2160 li_task
->b1
= li
->nc
;
2164 /* Copy the last atom pair indices and lengths for constraints
2165 * up to a multiple of simd_width, such that we can do all
2166 * SIMD operations without having to worry about end effects.
2170 li
->nc
= ((li_task
->b1
+ simd_width
- 1)/simd_width
)*simd_width
;
2171 last
= li_task
->b1
- 1;
2172 for (i
= li_task
->b1
; i
< li
->nc
; i
++)
2174 li
->bla
[i
*2 ] = li
->bla
[last
*2 ];
2175 li
->bla
[i
*2 + 1] = li
->bla
[last
*2 + 1];
2176 li
->bllen0
[i
] = li
->bllen0
[last
];
2177 li
->ddist
[i
] = li
->ddist
[last
];
2178 li
->bllen
[i
] = li
->bllen
[last
];
2179 li
->blnr
[i
+ 1] = li
->blnr
[last
+ 1];
2183 /* Keep track of how many constraints we assigned */
2184 li
->nc_real
+= li_task
->b1
- li_task
->b0
;
2188 fprintf(debug
, "LINCS task %d constraints %d - %d\n",
2189 th
, li_task
->b0
, li_task
->b1
);
2193 assert(li
->nc_real
== ncon_assign
);
2197 /* Without DD we order the blbnb matrix to optimize memory access.
2198 * With DD the overhead of sorting is more than the gain during access.
2200 bSortMatrix
= !DOMAINDECOMP(cr
);
2202 if (li
->ncc
> li
->ncc_alloc
)
2204 li
->ncc_alloc
= over_alloc_small(li
->ncc
);
2205 srenew(li
->blbnb
, li
->ncc_alloc
);
2208 #pragma omp parallel for num_threads(li->ntask) schedule(static)
2209 for (th
= 0; th
< li
->ntask
; th
++)
2215 li_task
= &li
->task
[th
];
2217 if (li
->ncg_triangle
> 0 &&
2218 li_task
->b1
- li_task
->b0
> li_task
->tri_alloc
)
2220 /* This is allocating too much, but it is difficult to improve */
2221 li_task
->tri_alloc
= over_alloc_dd(li_task
->b1
- li_task
->b0
);
2222 srenew(li_task
->triangle
, li_task
->tri_alloc
);
2223 srenew(li_task
->tri_bits
, li_task
->tri_alloc
);
2226 set_matrix_indices(li
, li_task
, &at2con
, bSortMatrix
);
2228 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
2231 done_blocka(&at2con
);
2233 if (cr
->dd
== nullptr)
2235 /* Since the matrix is static, we should free some memory */
2236 li
->ncc_alloc
= li
->ncc
;
2237 srenew(li
->blbnb
, li
->ncc_alloc
);
2240 if (li
->ncc_alloc
> ncc_alloc_old
)
2242 srenew(li
->blmf
, li
->ncc_alloc
);
2243 srenew(li
->blmf1
, li
->ncc_alloc
);
2244 srenew(li
->tmpncc
, li
->ncc_alloc
);
2247 if (DOMAINDECOMP(cr
) && dd_constraints_nlocalatoms(cr
->dd
) != nullptr)
2251 nlocat_dd
= dd_constraints_nlocalatoms(cr
->dd
);
2253 /* Convert nlocat from local topology to LINCS constraint indexing */
2254 for (con
= 0; con
< ncon_tot
; con
++)
2256 li
->nlocat
[li
->con_index
[con
]] = nlocat_dd
[con
];
2261 li
->nlocat
= nullptr;
2266 fprintf(debug
, "Number of constraints is %d, padded %d, couplings %d\n",
2267 li
->nc_real
, li
->nc
, li
->ncc
);
2272 lincs_thread_setup(li
, md
.nr
);
2275 set_lincs_matrix(li
, md
.invmass
, md
.lambda
);
2278 //! Issues a warning when LINCS constraints cannot be satisfied.
2279 static void lincs_warning(FILE *fplog
,
2280 gmx_domdec_t
*dd
, const rvec
*x
, rvec
*xprime
, t_pbc
*pbc
,
2281 int ncons
, int *bla
, real
*bllen
, real wangle
,
2282 int maxwarn
, int *warncount
)
2286 real wfac
, d0
, d1
, cosine
;
2289 wfac
= std::cos(DEG2RAD
*wangle
);
2291 sprintf(buf
, "bonds that rotated more than %g degrees:\n"
2292 " atom 1 atom 2 angle previous, current, constraint length\n",
2294 fprintf(stderr
, "%s", buf
);
2297 fprintf(fplog
, "%s", buf
);
2300 for (b
= 0; b
< ncons
; b
++)
2306 pbc_dx_aiuc(pbc
, x
[i
], x
[j
], v0
);
2307 pbc_dx_aiuc(pbc
, xprime
[i
], xprime
[j
], v1
);
2311 rvec_sub(x
[i
], x
[j
], v0
);
2312 rvec_sub(xprime
[i
], xprime
[j
], v1
);
2316 cosine
= ::iprod(v0
, v1
)/(d0
*d1
);
2319 sprintf(buf
, " %6d %6d %5.1f %8.4f %8.4f %8.4f\n",
2320 ddglatnr(dd
, i
), ddglatnr(dd
, j
),
2321 RAD2DEG
*std::acos(cosine
), d0
, d1
, bllen
[b
]);
2322 fprintf(stderr
, "%s", buf
);
2325 fprintf(fplog
, "%s", buf
);
2327 if (!std::isfinite(d1
))
2329 gmx_fatal(FARGS
, "Bond length not finite.");
2335 if (*warncount
> maxwarn
)
2337 too_many_constraint_warnings(econtLINCS
, *warncount
);
2341 //! Determine how well the constraints have been satisfied.
2342 static void cconerr(const Lincs
*lincsd
,
2343 rvec
*x
, t_pbc
*pbc
,
2344 real
*ncons_loc
, real
*ssd
, real
*max
, int *imax
)
2346 const int *bla
, *nlocat
;
2349 int count
, im
, task
;
2352 bllen
= lincsd
->bllen
;
2353 nlocat
= lincsd
->nlocat
;
2359 for (task
= 0; task
< lincsd
->ntask
; task
++)
2363 for (b
= lincsd
->task
[task
].b0
; b
< lincsd
->task
[task
].b1
; b
++)
2370 pbc_dx_aiuc(pbc
, x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
2374 rvec_sub(x
[bla
[2*b
]], x
[bla
[2*b
+1]], dx
);
2377 len
= r2
*gmx::invsqrt(r2
);
2378 d
= std::abs(len
/bllen
[b
]-1);
2379 if (d
> ma
&& (nlocat
== nullptr || nlocat
[b
]))
2384 if (nlocat
== nullptr)
2391 ssd2
+= nlocat
[b
]*d
*d
;
2397 *ncons_loc
= (nlocat
? 0.5 : 1)*count
;
2398 *ssd
= (nlocat
? 0.5 : 1)*ssd2
;
2403 bool constrain_lincs(FILE *fplog
, bool bLog
, bool bEner
,
2404 const t_inputrec
&ir
,
2406 Lincs
*lincsd
, const t_mdatoms
&md
,
2407 const t_commrec
*cr
,
2408 const gmx_multisim_t
&ms
,
2409 const rvec
*x
, rvec
*xprime
, rvec
*min_proj
,
2410 matrix box
, t_pbc
*pbc
,
2411 real lambda
, real
*dvdlambda
,
2412 real invdt
, rvec
*v
,
2413 bool bCalcVir
, tensor vir_r_m_dr
,
2414 ConstraintVariable econq
,
2416 int maxwarn
, int *warncount
)
2419 char buf
[STRLEN
], buf2
[22], buf3
[STRLEN
];
2421 real ncons_loc
, p_ssd
, p_max
= 0;
2427 /* This boolean should be set by a flag passed to this routine.
2428 * We can also easily check if any constraint length is changed,
2429 * if not dH/dlambda=0 and we can also set the boolean to FALSE.
2431 bCalcDHDL
= (ir
.efep
!= efepNO
&& dvdlambda
!= nullptr);
2433 if (lincsd
->nc
== 0 && cr
->dd
== nullptr)
2437 lincsd
->rmsd_data
[0] = 0;
2438 lincsd
->rmsd_data
[1] = 0;
2444 if (econq
== ConstraintVariable::Positions
)
2446 /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
2447 * also with efep!=fepNO.
2449 if (ir
.efep
!= efepNO
)
2451 if (md
.nMassPerturbed
&& lincsd
->matlam
!= md
.lambda
)
2453 set_lincs_matrix(lincsd
, md
.invmass
, md
.lambda
);
2456 for (i
= 0; i
< lincsd
->nc
; i
++)
2458 lincsd
->bllen
[i
] = lincsd
->bllen0
[i
] + lambda
*lincsd
->ddist
[i
];
2462 if (lincsd
->ncg_flex
)
2464 /* Set the flexible constraint lengths to the old lengths */
2467 for (i
= 0; i
< lincsd
->nc
; i
++)
2469 if (lincsd
->bllen
[i
] == 0)
2471 pbc_dx_aiuc(pbc
, x
[lincsd
->bla
[2*i
]], x
[lincsd
->bla
[2*i
+1]], dx
);
2472 lincsd
->bllen
[i
] = norm(dx
);
2478 for (i
= 0; i
< lincsd
->nc
; i
++)
2480 if (lincsd
->bllen
[i
] == 0)
2483 std::sqrt(distance2(x
[lincsd
->bla
[2*i
]],
2484 x
[lincsd
->bla
[2*i
+1]]));
2492 cconerr(lincsd
, xprime
, pbc
,
2493 &ncons_loc
, &p_ssd
, &p_max
, &p_imax
);
2496 /* This bWarn var can be updated by multiple threads
2497 * at the same time. But as we only need to detect
2498 * if a warning occurred or not, this is not an issue.
2502 /* The OpenMP parallel region of constrain_lincs for coords */
2503 #pragma omp parallel num_threads(lincsd->ntask)
2507 int th
= gmx_omp_get_thread_num();
2509 clear_mat(lincsd
->task
[th
].vir_r_m_dr
);
2511 do_lincs(x
, xprime
, box
, pbc
, lincsd
, th
,
2514 ir
.LincsWarnAngle
, &bWarn
,
2516 th
== 0 ? vir_r_m_dr
: lincsd
->task
[th
].vir_r_m_dr
);
2518 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
2521 if (bLog
&& fplog
&& lincsd
->nc
> 0)
2523 fprintf(fplog
, " Rel. Constraint Deviation: RMS MAX between atoms\n");
2524 fprintf(fplog
, " Before LINCS %.6f %.6f %6d %6d\n",
2525 std::sqrt(p_ssd
/ncons_loc
), p_max
,
2526 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
]),
2527 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
+1]));
2531 cconerr(lincsd
, xprime
, pbc
,
2532 &ncons_loc
, &p_ssd
, &p_max
, &p_imax
);
2533 lincsd
->rmsd_data
[0] = ncons_loc
;
2534 lincsd
->rmsd_data
[1] = p_ssd
;
2538 lincsd
->rmsd_data
[0] = 0;
2539 lincsd
->rmsd_data
[1] = 0;
2540 lincsd
->rmsd_data
[2] = 0;
2542 if (bLog
&& fplog
&& lincsd
->nc
> 0)
2545 " After LINCS %.6f %.6f %6d %6d\n\n",
2546 std::sqrt(p_ssd
/ncons_loc
), p_max
,
2547 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
]),
2548 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
+1]));
2553 if (maxwarn
< INT_MAX
)
2555 cconerr(lincsd
, xprime
, pbc
,
2556 &ncons_loc
, &p_ssd
, &p_max
, &p_imax
);
2557 if (isMultiSim(&ms
))
2559 sprintf(buf3
, " in simulation %d", ms
.sim
);
2565 sprintf(buf
, "\nStep %s, time %g (ps) LINCS WARNING%s\n"
2566 "relative constraint deviation after LINCS:\n"
2567 "rms %.6f, max %.6f (between atoms %d and %d)\n",
2568 gmx_step_str(step
, buf2
), ir
.init_t
+step
*ir
.delta_t
,
2570 std::sqrt(p_ssd
/ncons_loc
), p_max
,
2571 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
]),
2572 ddglatnr(cr
->dd
, lincsd
->bla
[2*p_imax
+1]));
2575 fprintf(fplog
, "%s", buf
);
2577 fprintf(stderr
, "%s", buf
);
2578 lincs_warning(fplog
, cr
->dd
, x
, xprime
, pbc
,
2579 lincsd
->nc
, lincsd
->bla
, lincsd
->bllen
,
2580 ir
.LincsWarnAngle
, maxwarn
, warncount
);
2582 bOK
= (p_max
< 0.5);
2585 if (lincsd
->ncg_flex
)
2587 for (i
= 0; (i
< lincsd
->nc
); i
++)
2589 if (lincsd
->bllen0
[i
] == 0 && lincsd
->ddist
[i
] == 0)
2591 lincsd
->bllen
[i
] = 0;
2598 /* The OpenMP parallel region of constrain_lincs for derivatives */
2599 #pragma omp parallel num_threads(lincsd->ntask)
2603 int th
= gmx_omp_get_thread_num();
2605 do_lincsp(x
, xprime
, min_proj
, pbc
, lincsd
, th
,
2606 md
.invmass
, econq
, bCalcDHDL
,
2607 bCalcVir
, th
== 0 ? vir_r_m_dr
: lincsd
->task
[th
].vir_r_m_dr
);
2609 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
2615 /* Reduce the dH/dlambda contributions over the threads */
2620 for (th
= 0; th
< lincsd
->ntask
; th
++)
2622 dhdlambda
+= lincsd
->task
[th
].dhdlambda
;
2624 if (econq
== ConstraintVariable::Positions
)
2626 /* dhdlambda contains dH/dlambda*dt^2, correct for this */
2627 /* TODO This should probably use invdt, so that sd integrator scaling works properly */
2628 dhdlambda
/= ir
.delta_t
*ir
.delta_t
;
2630 *dvdlambda
+= dhdlambda
;
2633 if (bCalcVir
&& lincsd
->ntask
> 1)
2635 for (i
= 1; i
< lincsd
->ntask
; i
++)
2637 m_add(vir_r_m_dr
, lincsd
->task
[i
].vir_r_m_dr
, vir_r_m_dr
);
2641 /* count assuming nit=1 */
2642 inc_nrnb(nrnb
, eNR_LINCS
, lincsd
->nc_real
);
2643 inc_nrnb(nrnb
, eNR_LINCSMAT
, (2+lincsd
->nOrder
)*lincsd
->ncc
);
2644 if (lincsd
->ntriangle
> 0)
2646 inc_nrnb(nrnb
, eNR_LINCSMAT
, lincsd
->nOrder
*lincsd
->ncc_triangle
);
2650 inc_nrnb(nrnb
, eNR_CONSTR_V
, lincsd
->nc_real
*2);
2654 inc_nrnb(nrnb
, eNR_CONSTR_VIR
, lincsd
->nc_real
);