Enable TPI with the Verlet cut-off scheme
[gromacs.git] / src / gromacs / nbnxm / grid.cpp
blob54b10eaf284dfee5c33d552f759b5b40856dbaad
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
38 * \brief
39 * Implements the Grid class.
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_nbnxm
45 #include "gmxpre.h"
47 #include "grid.h"
49 #include <cmath>
50 #include <cstring>
52 #include <algorithm>
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/mdlib/gmx_omp_nthreads.h"
57 #include "gromacs/mdlib/updategroupscog.h"
58 #include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
59 #include "gromacs/nbnxm/atomdata.h"
60 #include "gromacs/nbnxm/nbnxm_geometry.h"
61 #include "gromacs/simd/simd.h"
62 #include "gromacs/simd/vector_operations.h"
64 #include "boundingboxes.h"
65 #include "gridsetdata.h"
66 #include "pairlistparams.h"
68 namespace Nbnxm
71 Grid::Geometry::Geometry(const PairlistType pairlistType) :
72 isSimple(pairlistType != PairlistType::HierarchicalNxN),
73 numAtomsICluster(IClusterSizePerListType[pairlistType]),
74 numAtomsJCluster(JClusterSizePerListType[pairlistType]),
75 numAtomsPerCell((isSimple ? 1 : c_gpuNumClusterPerCell)*numAtomsICluster),
76 numAtomsICluster2Log(get_2log(numAtomsICluster))
80 Grid::Grid(const PairlistType pairlistType,
81 const bool &haveFep) :
82 geometry_(pairlistType),
83 haveFep_(haveFep)
87 /*! \brief Returns the atom density (> 0) of a rectangular grid */
88 static real gridAtomDensity(int numAtoms,
89 const rvec lowerCorner,
90 const rvec upperCorner)
92 rvec size;
94 if (numAtoms == 0)
96 /* To avoid zero density we use a minimum of 1 atom */
97 numAtoms = 1;
100 rvec_sub(upperCorner, lowerCorner, size);
102 return static_cast<real>(numAtoms)/(size[XX]*size[YY]*size[ZZ]);
105 void Grid::setDimensions(const int ddZone,
106 const int numAtoms,
107 gmx::RVec lowerCorner,
108 gmx::RVec upperCorner,
109 real atomDensity,
110 const real maxAtomGroupRadius,
111 const bool haveFep,
112 gmx::PinningPolicy pinningPolicy)
114 /* We allow passing lowerCorner=upperCorner, in which case we need to
115 * create a finite sized bounding box to avoid division by zero.
116 * We use a minimum size such that the volume fits in float with some
117 * margin for computing and using the atom number density.
119 constexpr real c_minimumGridSize = 1e-10;
120 for (int d = 0; d < DIM; d++)
122 GMX_ASSERT(upperCorner[d] >= lowerCorner[d], "Upper corner should be larger than the lower corner");
123 if (upperCorner[d] - lowerCorner[d] < c_minimumGridSize)
125 /* Ensure we apply a correction to the bounding box */
126 real correction = std::max(std::abs(lowerCorner[d])*GMX_REAL_EPS,
127 0.5_real*c_minimumGridSize);
128 lowerCorner[d] -= correction;
129 upperCorner[d] += correction;
133 /* For the home zone we compute the density when not set (=-1) or when =0 */
134 if (ddZone == 0 && atomDensity <= 0)
136 atomDensity = gridAtomDensity(numAtoms, lowerCorner, upperCorner);
139 dimensions_.atomDensity = atomDensity;
140 dimensions_.maxAtomGroupRadius = maxAtomGroupRadius;
142 rvec size;
143 rvec_sub(upperCorner, lowerCorner, size);
145 if (numAtoms > geometry_.numAtomsPerCell)
147 GMX_ASSERT(atomDensity > 0, "With one or more atoms, the density should be positive");
149 /* target cell length */
150 real tlen_x;
151 real tlen_y;
152 if (geometry_.isSimple)
154 /* To minimize the zero interactions, we should make
155 * the largest of the i/j cell cubic.
157 int numAtomsInCell = std::max(geometry_.numAtomsICluster,
158 geometry_.numAtomsJCluster);
160 /* Approximately cubic cells */
161 real tlen = std::cbrt(numAtomsInCell/atomDensity);
162 tlen_x = tlen;
163 tlen_y = tlen;
165 else
167 /* Approximately cubic sub cells */
168 real tlen = std::cbrt(geometry_.numAtomsICluster/atomDensity);
169 tlen_x = tlen*c_gpuNumClusterPerCellX;
170 tlen_y = tlen*c_gpuNumClusterPerCellY;
172 /* We round ncx and ncy down, because we get less cell pairs
173 * in the pairlist when the fixed cell dimensions (x,y) are
174 * larger than the variable one (z) than the other way around.
176 dimensions_.numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
177 dimensions_.numCells[YY] = std::max(1, static_cast<int>(size[YY]/tlen_y));
179 else
181 dimensions_.numCells[XX] = 1;
182 dimensions_.numCells[YY] = 1;
185 for (int d = 0; d < DIM - 1; d++)
187 dimensions_.cellSize[d] = size[d]/dimensions_.numCells[d];
188 dimensions_.invCellSize[d] = 1/dimensions_.cellSize[d];
191 if (ddZone > 0)
193 /* This is a non-home zone, add an extra row of cells
194 * for particles communicated for bonded interactions.
195 * These can be beyond the cut-off. It doesn't matter where
196 * they end up on the grid, but for performance it's better
197 * if they don't end up in cells that can be within cut-off range.
199 dimensions_.numCells[XX]++;
200 dimensions_.numCells[YY]++;
203 /* We need one additional cell entry for particles moved by DD */
204 cxy_na_.resize(numColumns() + 1);
205 cxy_ind_.resize(numColumns() + 2);
206 changePinningPolicy(&cxy_na_, pinningPolicy);
207 changePinningPolicy(&cxy_ind_, pinningPolicy);
209 /* Worst case scenario of 1 atom in each last cell */
210 int maxNumCells;
211 if (geometry_.numAtomsJCluster <= geometry_.numAtomsICluster)
213 maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns();
215 else
217 maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns()*geometry_.numAtomsJCluster/geometry_.numAtomsICluster;
220 if (!geometry_.isSimple)
222 numClusters_.resize(maxNumCells);
224 bbcz_.resize(maxNumCells);
226 /* This resize also zeros the contents, this avoid possible
227 * floating exceptions in SIMD with the unused bb elements.
229 if (geometry_.isSimple)
231 bb_.resize(maxNumCells);
233 else
235 #if NBNXN_BBXXXX
236 pbb_.resize(packedBoundingBoxesIndex(maxNumCells*c_gpuNumClusterPerCell));
237 #else
238 bb_.resize(maxNumCells*c_gpuNumClusterPerCell);
239 #endif
242 if (geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
244 bbj_ = bb_;
246 else
248 GMX_ASSERT(geometry_.isSimple, "Only CPU lists should have different i/j cluster sizes");
250 bbjStorage_.resize(maxNumCells*geometry_.numAtomsICluster/geometry_.numAtomsJCluster);
251 bbj_ = bbjStorage_;
254 flags_.resize(maxNumCells);
255 if (haveFep)
257 fep_.resize(maxNumCells*geometry_.numAtomsPerCell/geometry_.numAtomsICluster);
260 copy_rvec(lowerCorner, dimensions_.lowerCorner);
261 copy_rvec(upperCorner, dimensions_.upperCorner);
262 copy_rvec(size, dimensions_.gridSize);
265 /* We need to sort paricles in grid columns on z-coordinate.
266 * As particle are very often distributed homogeneously, we use a sorting
267 * algorithm similar to pigeonhole sort. We multiply the z-coordinate
268 * by a factor, cast to an int and try to store in that hole. If the hole
269 * is full, we move this or another particle. A second pass is needed to make
270 * contiguous elements. SORT_GRID_OVERSIZE is the ratio of holes to particles.
271 * 4 is the optimal value for homogeneous particle distribution and allows
272 * for an O(#particles) sort up till distributions were all particles are
273 * concentrated in 1/4 of the space. No NlogN fallback is implemented,
274 * as it can be expensive to detect imhomogeneous particle distributions.
276 /*! \brief Ratio of grid cells to atoms */
277 static constexpr int c_sortGridRatio = 4;
278 /*! \brief Maximum ratio of holes used, in the worst case all particles
279 * end up in the last hole and we need num. atoms extra holes at the end.
281 static constexpr int c_sortGridMaxSizeFactor = c_sortGridRatio + 1;
283 /*! \brief Sorts particle index a on coordinates x along dim.
285 * Backwards tells if we want decreasing iso increasing coordinates.
286 * h0 is the minimum of the coordinate range.
287 * invh is the 1/length of the sorting range.
288 * n_per_h (>=n) is the expected average number of particles per 1/invh
289 * sort is the sorting work array.
290 * sort should have a size of at least n_per_h*c_sortGridRatio + n,
291 * or easier, allocate at least n*c_sortGridMaxSizeFactor elements.
293 static void sort_atoms(int dim, gmx_bool Backwards,
294 int gmx_unused dd_zone,
295 bool gmx_unused relevantAtomsAreWithinGridBounds,
296 int *a, int n,
297 gmx::ArrayRef<const gmx::RVec> x,
298 real h0, real invh, int n_per_h,
299 gmx::ArrayRef<int> sort)
301 if (n <= 1)
303 /* Nothing to do */
304 return;
307 GMX_ASSERT(n <= n_per_h, "We require n <= n_per_h");
309 /* Transform the inverse range height into the inverse hole height */
310 invh *= n_per_h*c_sortGridRatio;
312 /* Set nsort to the maximum possible number of holes used.
313 * In worst case all n elements end up in the last bin.
315 int nsort = n_per_h*c_sortGridRatio + n;
317 /* Determine the index range used, so we can limit it for the second pass */
318 int zi_min = INT_MAX;
319 int zi_max = -1;
321 /* Sort the particles using a simple index sort */
322 for (int i = 0; i < n; i++)
324 /* The cast takes care of float-point rounding effects below zero.
325 * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
326 * times the box height out of the box.
328 int zi = static_cast<int>((x[a[i]][dim] - h0)*invh);
330 #ifndef NDEBUG
331 /* As we can have rounding effect, we use > iso >= here */
332 if (relevantAtomsAreWithinGridBounds &&
333 (zi < 0 || (dd_zone == 0 && zi > n_per_h*c_sortGridRatio)))
335 gmx_fatal(FARGS, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n",
336 a[i], 'x'+dim, x[a[i]][dim], h0, invh, zi,
337 n_per_h, c_sortGridRatio);
339 #endif
340 if (zi < 0)
342 zi = 0;
345 /* In a non-local domain, particles communcated for bonded interactions
346 * can be far beyond the grid size, which is set by the non-bonded
347 * cut-off distance. We sort such particles into the last cell.
349 if (zi > n_per_h*c_sortGridRatio)
351 zi = n_per_h*c_sortGridRatio;
354 /* Ideally this particle should go in sort cell zi,
355 * but that might already be in use,
356 * in that case find the first empty cell higher up
358 if (sort[zi] < 0)
360 sort[zi] = a[i];
361 zi_min = std::min(zi_min, zi);
362 zi_max = std::max(zi_max, zi);
364 else
366 /* We have multiple atoms in the same sorting slot.
367 * Sort on real z for minimal bounding box size.
368 * There is an extra check for identical z to ensure
369 * well-defined output order, independent of input order
370 * to ensure binary reproducibility after restarts.
372 while (sort[zi] >= 0 && ( x[a[i]][dim] > x[sort[zi]][dim] ||
373 (x[a[i]][dim] == x[sort[zi]][dim] &&
374 a[i] > sort[zi])))
376 zi++;
379 if (sort[zi] >= 0)
381 /* Shift all elements by one slot until we find an empty slot */
382 int cp = sort[zi];
383 int zim = zi + 1;
384 while (sort[zim] >= 0)
386 int tmp = sort[zim];
387 sort[zim] = cp;
388 cp = tmp;
389 zim++;
391 sort[zim] = cp;
392 zi_max = std::max(zi_max, zim);
394 sort[zi] = a[i];
395 zi_max = std::max(zi_max, zi);
399 int c = 0;
400 if (!Backwards)
402 for (int zi = 0; zi < nsort; zi++)
404 if (sort[zi] >= 0)
406 a[c++] = sort[zi];
407 sort[zi] = -1;
411 else
413 for (int zi = zi_max; zi >= zi_min; zi--)
415 if (sort[zi] >= 0)
417 a[c++] = sort[zi];
418 sort[zi] = -1;
422 if (c < n)
424 gmx_incons("Lost particles while sorting");
428 #if GMX_DOUBLE
429 //! Returns double up to one least significant float bit smaller than x
430 static double R2F_D(const float x)
432 return static_cast<float>(x >= 0 ? (1 - GMX_FLOAT_EPS)*x : (1 + GMX_FLOAT_EPS)*x);
434 //! Returns double up to one least significant float bit larger than x
435 static double R2F_U(const float x)
437 return static_cast<float>(x >= 0 ? (1 + GMX_FLOAT_EPS)*x : (1 - GMX_FLOAT_EPS)*x);
439 #else
440 //! Returns x
441 static float R2F_D(const float x)
443 return x;
445 //! Returns x
446 static float R2F_U(const float x)
448 return x;
450 #endif
452 //! Computes the bounding box for na coordinates in order x,y,z, bb order xyz0
453 static void calc_bounding_box(int na, int stride, const real *x,
454 BoundingBox *bb)
456 int i;
457 real xl, xh, yl, yh, zl, zh;
459 i = 0;
460 xl = x[i+XX];
461 xh = x[i+XX];
462 yl = x[i+YY];
463 yh = x[i+YY];
464 zl = x[i+ZZ];
465 zh = x[i+ZZ];
466 i += stride;
467 for (int j = 1; j < na; j++)
469 xl = std::min(xl, x[i+XX]);
470 xh = std::max(xh, x[i+XX]);
471 yl = std::min(yl, x[i+YY]);
472 yh = std::max(yh, x[i+YY]);
473 zl = std::min(zl, x[i+ZZ]);
474 zh = std::max(zh, x[i+ZZ]);
475 i += stride;
477 /* Note: possible double to float conversion here */
478 bb->lower.x = R2F_D(xl);
479 bb->lower.y = R2F_D(yl);
480 bb->lower.z = R2F_D(zl);
481 bb->upper.x = R2F_U(xh);
482 bb->upper.y = R2F_U(yh);
483 bb->upper.z = R2F_U(zh);
486 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
487 static void calc_bounding_box_x_x4(int na, const real *x,
488 BoundingBox *bb)
490 real xl, xh, yl, yh, zl, zh;
492 xl = x[XX*c_packX4];
493 xh = x[XX*c_packX4];
494 yl = x[YY*c_packX4];
495 yh = x[YY*c_packX4];
496 zl = x[ZZ*c_packX4];
497 zh = x[ZZ*c_packX4];
498 for (int j = 1; j < na; j++)
500 xl = std::min(xl, x[j+XX*c_packX4]);
501 xh = std::max(xh, x[j+XX*c_packX4]);
502 yl = std::min(yl, x[j+YY*c_packX4]);
503 yh = std::max(yh, x[j+YY*c_packX4]);
504 zl = std::min(zl, x[j+ZZ*c_packX4]);
505 zh = std::max(zh, x[j+ZZ*c_packX4]);
507 /* Note: possible double to float conversion here */
508 bb->lower.x = R2F_D(xl);
509 bb->lower.y = R2F_D(yl);
510 bb->lower.z = R2F_D(zl);
511 bb->upper.x = R2F_U(xh);
512 bb->upper.y = R2F_U(yh);
513 bb->upper.z = R2F_U(zh);
516 /*! \brief Computes the bounding box for na coordinates, bb order xyz0 */
517 static void calc_bounding_box_x_x8(int na, const real *x,
518 BoundingBox *bb)
520 real xl, xh, yl, yh, zl, zh;
522 xl = x[XX*c_packX8];
523 xh = x[XX*c_packX8];
524 yl = x[YY*c_packX8];
525 yh = x[YY*c_packX8];
526 zl = x[ZZ*c_packX8];
527 zh = x[ZZ*c_packX8];
528 for (int j = 1; j < na; j++)
530 xl = std::min(xl, x[j+XX*c_packX8]);
531 xh = std::max(xh, x[j+XX*c_packX8]);
532 yl = std::min(yl, x[j+YY*c_packX8]);
533 yh = std::max(yh, x[j+YY*c_packX8]);
534 zl = std::min(zl, x[j+ZZ*c_packX8]);
535 zh = std::max(zh, x[j+ZZ*c_packX8]);
537 /* Note: possible double to float conversion here */
538 bb->lower.x = R2F_D(xl);
539 bb->lower.y = R2F_D(yl);
540 bb->lower.z = R2F_D(zl);
541 bb->upper.x = R2F_U(xh);
542 bb->upper.y = R2F_U(yh);
543 bb->upper.z = R2F_U(zh);
546 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
547 gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real *x,
548 BoundingBox *bb,
549 BoundingBox *bbj)
551 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
552 using namespace gmx;
554 calc_bounding_box_x_x4(std::min(na, 2), x, bbj);
556 if (na > 2)
558 calc_bounding_box_x_x4(std::min(na-2, 2), x+(c_packX4>>1), bbj+1);
560 else
562 /* Set the "empty" bounding box to the same as the first one,
563 * so we don't need to treat special cases in the rest of the code.
565 #if NBNXN_SEARCH_BB_SIMD4
566 store4(bbj[1].lower.ptr(), load4(bbj[0].lower.ptr()));
567 store4(bbj[1].upper.ptr(), load4(bbj[0].upper.ptr()));
568 #else
569 bbj[1] = bbj[0];
570 #endif
573 #if NBNXN_SEARCH_BB_SIMD4
574 store4(bb->lower.ptr(),
575 min(load4(bbj[0].lower.ptr()), load4(bbj[1].lower.ptr())));
576 store4(bb->upper.ptr(),
577 max(load4(bbj[0].upper.ptr()), load4(bbj[1].upper.ptr())));
578 #else
580 bb->lower = BoundingBox::Corner::min(bbj[0].lower,
581 bbj[1].lower);
582 bb->upper = BoundingBox::Corner::max(bbj[0].upper,
583 bbj[1].upper);
585 #endif
588 #if NBNXN_BBXXXX
590 /*! \brief Computes the bounding box for na coordinates in order xyz, bb order xxxxyyyyzzzz */
591 static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
593 int i;
594 real xl, xh, yl, yh, zl, zh;
596 i = 0;
597 xl = x[i+XX];
598 xh = x[i+XX];
599 yl = x[i+YY];
600 yh = x[i+YY];
601 zl = x[i+ZZ];
602 zh = x[i+ZZ];
603 i += stride;
604 for (int j = 1; j < na; j++)
606 xl = std::min(xl, x[i+XX]);
607 xh = std::max(xh, x[i+XX]);
608 yl = std::min(yl, x[i+YY]);
609 yh = std::max(yh, x[i+YY]);
610 zl = std::min(zl, x[i+ZZ]);
611 zh = std::max(zh, x[i+ZZ]);
612 i += stride;
614 /* Note: possible double to float conversion here */
615 bb[0*c_packedBoundingBoxesDimSize] = R2F_D(xl);
616 bb[1*c_packedBoundingBoxesDimSize] = R2F_D(yl);
617 bb[2*c_packedBoundingBoxesDimSize] = R2F_D(zl);
618 bb[3*c_packedBoundingBoxesDimSize] = R2F_U(xh);
619 bb[4*c_packedBoundingBoxesDimSize] = R2F_U(yh);
620 bb[5*c_packedBoundingBoxesDimSize] = R2F_U(zh);
623 #endif /* NBNXN_BBXXXX */
625 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
627 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xyz0 */
628 static void calc_bounding_box_simd4(int na, const float *x,
629 BoundingBox *bb)
631 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
632 using namespace gmx;
634 static_assert(sizeof(BoundingBox::Corner) == GMX_SIMD4_WIDTH*sizeof(float),
635 "The Corner struct should hold exactly 4 floats");
637 Simd4Float bb_0_S = load4(x);
638 Simd4Float bb_1_S = bb_0_S;
640 for (int i = 1; i < na; i++)
642 Simd4Float x_S = load4(x + i*GMX_SIMD4_WIDTH);
643 bb_0_S = min(bb_0_S, x_S);
644 bb_1_S = max(bb_1_S, x_S);
647 store4(bb->lower.ptr(), bb_0_S);
648 store4(bb->upper.ptr(), bb_1_S);
651 #if NBNXN_BBXXXX
653 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */
654 static void calc_bounding_box_xxxx_simd4(int na, const float *x,
655 BoundingBox *bb_work_aligned,
656 real *bb)
658 calc_bounding_box_simd4(na, x, bb_work_aligned);
660 bb[0*c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.x;
661 bb[1*c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.y;
662 bb[2*c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.z;
663 bb[3*c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.x;
664 bb[4*c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.y;
665 bb[5*c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.z;
668 #endif /* NBNXN_BBXXXX */
670 #endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
673 /*! \brief Combines pairs of consecutive bounding boxes */
674 static void combine_bounding_box_pairs(const Grid &grid,
675 gmx::ArrayRef<const BoundingBox> bb,
676 gmx::ArrayRef<BoundingBox> bbj)
678 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
679 using namespace gmx;
681 for (int i = 0; i < grid.numColumns(); i++)
683 /* Starting bb in a column is expected to be 2-aligned */
684 const int sc2 = grid.firstCellInColumn(i) >> 1;
685 /* For odd numbers skip the last bb here */
686 const int nc2 = (grid.numAtomsInColumn(i) + 3) >> (2 + 1);
687 int c2;
688 for (c2 = sc2; c2 < sc2 + nc2; c2++)
690 #if NBNXN_SEARCH_BB_SIMD4
691 Simd4Float min_S, max_S;
693 min_S = min(load4(bb[c2*2+0].lower.ptr()),
694 load4(bb[c2*2+1].lower.ptr()));
695 max_S = max(load4(bb[c2*2+0].upper.ptr()),
696 load4(bb[c2*2+1].upper.ptr()));
697 store4(bbj[c2].lower.ptr(), min_S);
698 store4(bbj[c2].upper.ptr(), max_S);
699 #else
700 bbj[c2].lower = BoundingBox::Corner::min(bb[c2*2 + 0].lower,
701 bb[c2*2 + 1].lower);
702 bbj[c2].upper = BoundingBox::Corner::max(bb[c2*2 + 0].upper,
703 bb[c2*2 + 1].upper);
704 #endif
706 if (((grid.numAtomsInColumn(i) + 3) >> 2) & 1)
708 /* The bb count in this column is odd: duplicate the last bb */
709 bbj[c2].lower = bb[c2*2].lower;
710 bbj[c2].upper = bb[c2*2].upper;
716 /*! \brief Prints the average bb size, used for debug output */
717 static void print_bbsizes_simple(FILE *fp,
718 const Grid &grid)
720 dvec ba = { 0 };
721 for (int c = 0; c < grid.numCells(); c++)
723 const BoundingBox &bb = grid.iBoundingBoxes()[c];
724 ba[XX] += bb.upper.x - bb.lower.x;
725 ba[YY] += bb.upper.y - bb.lower.y;
726 ba[ZZ] += bb.upper.z - bb.lower.z;
728 dsvmul(1.0/grid.numCells(), ba, ba);
730 const Grid::Dimensions &dims = grid.dimensions();
731 real avgCellSizeZ = (dims.atomDensity > 0 ? grid.geometry().numAtomsICluster/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]) : 0.0);
733 fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
734 dims.cellSize[XX],
735 dims.cellSize[YY],
736 avgCellSizeZ,
737 ba[XX], ba[YY], ba[ZZ],
738 ba[XX]*dims.invCellSize[XX],
739 ba[YY]*dims.invCellSize[YY],
740 dims.atomDensity > 0 ? ba[ZZ]/avgCellSizeZ : 0.0);
743 /*! \brief Prints the average bb size, used for debug output */
744 static void print_bbsizes_supersub(FILE *fp,
745 const Grid &grid)
747 int ns;
748 dvec ba;
750 clear_dvec(ba);
751 ns = 0;
752 for (int c = 0; c < grid.numCells(); c++)
754 #if NBNXN_BBXXXX
755 for (int s = 0; s < grid.numClustersPerCell()[c]; s += c_packedBoundingBoxesDimSize)
757 int cs_w = (c*c_gpuNumClusterPerCell + s)/c_packedBoundingBoxesDimSize;
758 auto boundingBoxes = grid.packedBoundingBoxes().subArray(cs_w*c_packedBoundingBoxesSize, c_packedBoundingBoxesSize);
759 for (int i = 0; i < c_packedBoundingBoxesDimSize; i++)
761 for (int d = 0; d < DIM; d++)
763 ba[d] +=
764 boundingBoxes[(DIM + d)*c_packedBoundingBoxesDimSize + i] -
765 boundingBoxes[(0 + d)*c_packedBoundingBoxesDimSize + i];
769 #else
770 for (int s = 0; s < grid.numClustersPerCell()[c]; s++)
772 const BoundingBox &bb = grid.iBoundingBoxes()[c*c_gpuNumClusterPerCell + s];
773 ba[XX] += bb.upper.x - bb.lower.x;
774 ba[YY] += bb.upper.y - bb.lower.y;
775 ba[ZZ] += bb.upper.z - bb.lower.z;
777 #endif
778 ns += grid.numClustersPerCell()[c];
780 dsvmul(1.0/ns, ba, ba);
782 const Grid::Dimensions &dims = grid.dimensions();
783 const real avgClusterSizeZ =
784 (dims.atomDensity > 0 ? grid.geometry().numAtomsPerCell/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0);
786 fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
787 dims.cellSize[XX]/c_gpuNumClusterPerCellX,
788 dims.cellSize[YY]/c_gpuNumClusterPerCellY,
789 avgClusterSizeZ,
790 ba[XX], ba[YY], ba[ZZ],
791 ba[XX]*c_gpuNumClusterPerCellX*dims.invCellSize[XX],
792 ba[YY]*c_gpuNumClusterPerCellY*dims.invCellSize[YY],
793 dims.atomDensity > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0);
796 /*!\brief Set non-bonded interaction flags for the current cluster.
798 * Sorts atoms on LJ coefficients: !=0 first, ==0 at the end.
800 static void sort_cluster_on_flag(int numAtomsInCluster,
801 int atomStart,
802 int atomEnd,
803 const int *atinfo,
804 gmx::ArrayRef<int> order,
805 int *flags)
807 constexpr int c_maxNumAtomsInCluster = 8;
808 int sort1[c_maxNumAtomsInCluster];
809 int sort2[c_maxNumAtomsInCluster];
811 GMX_ASSERT(numAtomsInCluster <= c_maxNumAtomsInCluster, "Need to increase c_maxNumAtomsInCluster to support larger clusters");
813 *flags = 0;
815 int subc = 0;
816 for (int s = atomStart; s < atomEnd; s += numAtomsInCluster)
818 /* Make lists for this (sub-)cell on atoms with and without LJ */
819 int n1 = 0;
820 int n2 = 0;
821 gmx_bool haveQ = FALSE;
822 int a_lj_max = -1;
823 for (int a = s; a < std::min(s + numAtomsInCluster, atomEnd); a++)
825 haveQ = haveQ || GET_CGINFO_HAS_Q(atinfo[order[a]]);
827 if (GET_CGINFO_HAS_VDW(atinfo[order[a]]))
829 sort1[n1++] = order[a];
830 a_lj_max = a;
832 else
834 sort2[n2++] = order[a];
838 /* If we don't have atoms with LJ, there's nothing to sort */
839 if (n1 > 0)
841 *flags |= NBNXN_CI_DO_LJ(subc);
843 if (2*n1 <= numAtomsInCluster)
845 /* Only sort when strictly necessary. Ordering particles
846 * Ordering particles can lead to less accurate summation
847 * due to rounding, both for LJ and Coulomb interactions.
849 if (2*(a_lj_max - s) >= numAtomsInCluster)
851 for (int i = 0; i < n1; i++)
853 order[atomStart + i] = sort1[i];
855 for (int j = 0; j < n2; j++)
857 order[atomStart + n1 + j] = sort2[j];
861 *flags |= NBNXN_CI_HALF_LJ(subc);
864 if (haveQ)
866 *flags |= NBNXN_CI_DO_COUL(subc);
868 subc++;
872 /*! \brief Fill a pair search cell with atoms.
874 * Potentially sorts atoms and sets the interaction flags.
876 void Grid::fillCell(GridSetData *gridSetData,
877 nbnxn_atomdata_t *nbat,
878 int atomStart,
879 int atomEnd,
880 const int *atinfo,
881 gmx::ArrayRef<const gmx::RVec> x,
882 BoundingBox gmx_unused *bb_work_aligned)
884 const int numAtoms = atomEnd - atomStart;
886 const gmx::ArrayRef<int> &cells = gridSetData->cells;
887 const gmx::ArrayRef<int> &atomIndices = gridSetData->atomIndices;
889 if (geometry_.isSimple)
891 /* Note that non-local grids are already sorted.
892 * Then sort_cluster_on_flag will only set the flags and the sorting
893 * will not affect the atom order.
895 sort_cluster_on_flag(geometry_.numAtomsICluster, atomStart, atomEnd, atinfo, atomIndices,
896 flags_.data() + atomToCluster(atomStart) - cellOffset_);
899 if (haveFep_)
901 /* Set the fep flag for perturbed atoms in this (sub-)cell */
903 /* The grid-local cluster/(sub-)cell index */
904 int cell = atomToCluster(atomStart) - cellOffset_*(geometry_.isSimple ? 1 : c_gpuNumClusterPerCell);
905 fep_[cell] = 0;
906 for (int at = atomStart; at < atomEnd; at++)
908 if (atomIndices[at] >= 0 && GET_CGINFO_FEP(atinfo[atomIndices[at]]))
910 fep_[cell] |= (1 << (at - atomStart));
915 /* Now we have sorted the atoms, set the cell indices */
916 for (int at = atomStart; at < atomEnd; at++)
918 cells[atomIndices[at]] = at;
921 copy_rvec_to_nbat_real(atomIndices.data() + atomStart, numAtoms, geometry_.numAtomsICluster,
922 as_rvec_array(x.data()),
923 nbat->XFormat, nbat->x().data(), atomStart);
925 if (nbat->XFormat == nbatX4)
927 /* Store the bounding boxes as xyz.xyz. */
928 size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster);
929 BoundingBox *bb_ptr = bb_.data() + offset;
931 #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
932 if (2*geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
934 calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
935 bbj_.data() + offset*2);
937 else
938 #endif
940 calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
943 else if (nbat->XFormat == nbatX8)
945 /* Store the bounding boxes as xyz.xyz. */
946 size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster);
947 BoundingBox *bb_ptr = bb_.data() + offset;
949 calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
951 #if NBNXN_BBXXXX
952 else if (!geometry_.isSimple)
954 /* Store the bounding boxes in a format convenient
955 * for SIMD4 calculations: xxxxyyyyzzzz...
957 const int clusterIndex = ((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> geometry_.numAtomsICluster2Log);
958 float *pbb_ptr =
959 pbb_.data() +
960 packedBoundingBoxesIndex(clusterIndex) +
961 (clusterIndex & (c_packedBoundingBoxesDimSize - 1));
963 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
964 if (nbat->XFormat == nbatXYZQ)
966 GMX_ASSERT(bb_work_aligned != nullptr, "Must have valid aligned work structure");
967 calc_bounding_box_xxxx_simd4(numAtoms, nbat->x().data() + atomStart*nbat->xstride,
968 bb_work_aligned, pbb_ptr);
970 else
971 #endif
973 calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
974 pbb_ptr);
976 if (gmx_debug_at)
978 fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
979 atomToCluster(atomStart),
980 pbb_ptr[0*c_packedBoundingBoxesDimSize], pbb_ptr[3*c_packedBoundingBoxesDimSize],
981 pbb_ptr[1*c_packedBoundingBoxesDimSize], pbb_ptr[4*c_packedBoundingBoxesDimSize],
982 pbb_ptr[2*c_packedBoundingBoxesDimSize], pbb_ptr[5*c_packedBoundingBoxesDimSize]);
985 #endif
986 else
988 /* Store the bounding boxes as xyz.xyz. */
989 BoundingBox *bb_ptr = bb_.data() + atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
991 calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
992 bb_ptr);
994 if (gmx_debug_at)
996 int bbo = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
997 fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
998 atomToCluster(atomStart),
999 bb_[bbo].lower.x,
1000 bb_[bbo].lower.y,
1001 bb_[bbo].lower.z,
1002 bb_[bbo].upper.x,
1003 bb_[bbo].upper.y,
1004 bb_[bbo].upper.z);
1009 void Grid::sortColumnsCpuGeometry(GridSetData *gridSetData,
1010 int dd_zone,
1011 int atomStart, int atomEnd,
1012 const int *atinfo,
1013 gmx::ArrayRef<const gmx::RVec> x,
1014 nbnxn_atomdata_t *nbat,
1015 int cxy_start, int cxy_end,
1016 gmx::ArrayRef<int> sort_work)
1018 if (debug)
1020 fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
1021 cellOffset_, cxy_start, cxy_end, atomStart, atomEnd);
1024 const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
1026 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1028 /* Sort the atoms within each x,y column in 3 dimensions */
1029 for (int cxy = cxy_start; cxy < cxy_end; cxy++)
1031 const int numAtoms = numAtomsInColumn(cxy);
1032 const int numCellsZ = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
1033 const int atomOffset = firstAtomInColumn(cxy);
1035 /* Sort the atoms within each x,y column on z coordinate */
1036 sort_atoms(ZZ, FALSE, dd_zone,
1037 relevantAtomsAreWithinGridBounds,
1038 gridSetData->atomIndices.data() + atomOffset, numAtoms, x,
1039 dimensions_.lowerCorner[ZZ],
1040 1.0/dimensions_.gridSize[ZZ], numCellsZ*numAtomsPerCell,
1041 sort_work);
1043 /* Fill the ncz cells in this column */
1044 const int firstCell = firstCellInColumn(cxy);
1045 int cellFilled = firstCell;
1046 for (int cellZ = 0; cellZ < numCellsZ; cellZ++)
1048 const int cell = firstCell + cellZ;
1050 const int atomOffsetCell = atomOffset + cellZ*numAtomsPerCell;
1051 const int numAtomsCell = std::min(numAtomsPerCell, numAtoms - (atomOffsetCell - atomOffset));
1053 fillCell(gridSetData, nbat,
1054 atomOffsetCell, atomOffsetCell + numAtomsCell, atinfo, x,
1055 nullptr);
1057 /* This copy to bbcz is not really necessary.
1058 * But it allows to use the same grid search code
1059 * for the simple and supersub cell setups.
1061 if (numAtomsCell > 0)
1063 cellFilled = cell;
1065 bbcz_[cell].lower = bb_[cellFilled].lower.z;
1066 bbcz_[cell].upper = bb_[cellFilled].upper.z;
1069 /* Set the unused atom indices to -1 */
1070 for (int ind = numAtoms; ind < numCellsZ*numAtomsPerCell; ind++)
1072 gridSetData->atomIndices[atomOffset + ind] = -1;
1077 /* Spatially sort the atoms within one grid column */
1078 void Grid::sortColumnsGpuGeometry(GridSetData *gridSetData,
1079 int dd_zone,
1080 int atomStart, int atomEnd,
1081 const int *atinfo,
1082 gmx::ArrayRef<const gmx::RVec> x,
1083 nbnxn_atomdata_t *nbat,
1084 int cxy_start, int cxy_end,
1085 gmx::ArrayRef<int> sort_work)
1087 BoundingBox bb_work_array[2];
1088 BoundingBox *bb_work_aligned = reinterpret_cast<BoundingBox *>((reinterpret_cast<std::size_t>(bb_work_array + 1)) & (~(static_cast<std::size_t>(15))));
1090 if (debug)
1092 fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
1093 cellOffset_, cxy_start, cxy_end, atomStart, atomEnd);
1096 const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
1098 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1100 const int subdiv_x = geometry_.numAtomsICluster;
1101 const int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x;
1102 const int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y;
1104 /* Extract the atom index array that will be filled here */
1105 const gmx::ArrayRef<int> &atomIndices = gridSetData->atomIndices;
1107 /* Sort the atoms within each x,y column in 3 dimensions.
1108 * Loop over all columns on the x/y grid.
1110 for (int cxy = cxy_start; cxy < cxy_end; cxy++)
1112 const int gridX = cxy/dimensions_.numCells[YY];
1113 const int gridY = cxy - gridX*dimensions_.numCells[YY];
1115 const int numAtomsInColumn = cxy_na_[cxy];
1116 const int numCellsInColumn = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
1117 const int atomOffset = firstAtomInColumn(cxy);
1119 /* Sort the atoms within each x,y column on z coordinate */
1120 sort_atoms(ZZ, FALSE, dd_zone,
1121 relevantAtomsAreWithinGridBounds,
1122 atomIndices.data() + atomOffset, numAtomsInColumn, x,
1123 dimensions_.lowerCorner[ZZ],
1124 1.0/dimensions_.gridSize[ZZ], numCellsInColumn*numAtomsPerCell,
1125 sort_work);
1127 /* This loop goes over the cells and clusters along z at once */
1128 for (int sub_z = 0; sub_z < numCellsInColumn*c_gpuNumClusterPerCellZ; sub_z++)
1130 const int atomOffsetZ = atomOffset + sub_z*subdiv_z;
1131 const int numAtomsZ = std::min(subdiv_z, numAtomsInColumn - (atomOffsetZ - atomOffset));
1132 int cz = -1;
1133 /* We have already sorted on z */
1135 if (sub_z % c_gpuNumClusterPerCellZ == 0)
1137 cz = sub_z/c_gpuNumClusterPerCellZ;
1138 const int cell = cxy_ind_[cxy] + cz;
1140 /* The number of atoms in this cell/super-cluster */
1141 const int numAtoms = std::min(numAtomsPerCell, numAtomsInColumn - (atomOffsetZ - atomOffset));
1143 numClusters_[cell] = std::min(c_gpuNumClusterPerCell,
1144 (numAtoms + geometry_.numAtomsICluster - 1)/ geometry_.numAtomsICluster);
1146 /* Store the z-boundaries of the bounding box of the cell */
1147 bbcz_[cell].lower = x[atomIndices[atomOffsetZ]][ZZ];
1148 bbcz_[cell].upper = x[atomIndices[atomOffsetZ + numAtoms - 1]][ZZ];
1151 if (c_gpuNumClusterPerCellY > 1)
1153 /* Sort the atoms along y */
1154 sort_atoms(YY, (sub_z & 1) != 0, dd_zone,
1155 relevantAtomsAreWithinGridBounds,
1156 atomIndices.data() + atomOffsetZ, numAtomsZ, x,
1157 dimensions_.lowerCorner[YY] + gridY*dimensions_.cellSize[YY],
1158 dimensions_.invCellSize[YY], subdiv_z,
1159 sort_work);
1162 for (int sub_y = 0; sub_y < c_gpuNumClusterPerCellY; sub_y++)
1164 const int atomOffsetY = atomOffsetZ + sub_y*subdiv_y;
1165 const int numAtomsY = std::min(subdiv_y, numAtomsInColumn - (atomOffsetY - atomOffset));
1167 if (c_gpuNumClusterPerCellX > 1)
1169 /* Sort the atoms along x */
1170 sort_atoms(XX, ((cz*c_gpuNumClusterPerCellY + sub_y) & 1) != 0, dd_zone,
1171 relevantAtomsAreWithinGridBounds,
1172 atomIndices.data() + atomOffsetY, numAtomsY, x,
1173 dimensions_.lowerCorner[XX] + gridX*dimensions_.cellSize[XX],
1174 dimensions_.invCellSize[XX], subdiv_y,
1175 sort_work);
1178 for (int sub_x = 0; sub_x < c_gpuNumClusterPerCellX; sub_x++)
1180 const int atomOffsetX = atomOffsetY + sub_x*subdiv_x;
1181 const int numAtomsX = std::min(subdiv_x, numAtomsInColumn - (atomOffsetX - atomOffset));
1183 fillCell(gridSetData, nbat,
1184 atomOffsetX, atomOffsetX + numAtomsX, atinfo, x,
1185 bb_work_aligned);
1190 /* Set the unused atom indices to -1 */
1191 for (int ind = numAtomsInColumn; ind < numCellsInColumn*numAtomsPerCell; ind++)
1193 atomIndices[atomOffset + ind] = -1;
1198 /*! \brief Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
1199 static void setCellAndAtomCount(gmx::ArrayRef<int> cell,
1200 int cellIndex,
1201 gmx::ArrayRef<int> cxy_na,
1202 int atomIndex)
1204 cell[atomIndex] = cellIndex;
1205 cxy_na[cellIndex] += 1;
1208 void Grid::calcColumnIndices(const Grid::Dimensions &gridDims,
1209 const gmx::UpdateGroupsCog *updateGroupsCog,
1210 const int atomStart,
1211 const int atomEnd,
1212 gmx::ArrayRef<const gmx::RVec> x,
1213 const int dd_zone,
1214 const int *move,
1215 const int thread,
1216 const int nthread,
1217 gmx::ArrayRef<int> cell,
1218 gmx::ArrayRef<int> cxy_na)
1220 const int numColumns = gridDims.numCells[XX]*gridDims.numCells[YY];
1222 /* We add one extra cell for particles which moved during DD */
1223 for (int i = 0; i < numColumns; i++)
1225 cxy_na[i] = 0;
1228 int taskAtomStart = atomStart + static_cast<int>((thread + 0)*(atomEnd - atomStart))/nthread;
1229 int taskAtomEnd = atomStart + static_cast<int>((thread + 1)*(atomEnd - atomStart))/nthread;
1231 if (dd_zone == 0)
1233 /* Home zone */
1234 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1236 if (move == nullptr || move[i] >= 0)
1239 const gmx::RVec &coord = (updateGroupsCog ? updateGroupsCog->cogForAtom(i) : x[i]);
1241 /* We need to be careful with rounding,
1242 * particles might be a few bits outside the local zone.
1243 * The int cast takes care of the lower bound,
1244 * we will explicitly take care of the upper bound.
1246 int cx = static_cast<int>((coord[XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]);
1247 int cy = static_cast<int>((coord[YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]);
1249 #ifndef NDEBUG
1250 if (cx < 0 || cx > gridDims.numCells[XX] ||
1251 cy < 0 || cy > gridDims.numCells[YY])
1253 gmx_fatal(FARGS,
1254 "grid cell cx %d cy %d out of range (max %d %d)\n"
1255 "atom %f %f %f, grid->c0 %f %f",
1256 cx, cy,
1257 gridDims.numCells[XX],
1258 gridDims.numCells[YY],
1259 x[i][XX], x[i][YY], x[i][ZZ],
1260 gridDims.lowerCorner[XX],
1261 gridDims.lowerCorner[YY]);
1263 #endif
1264 /* Take care of potential rouding issues */
1265 cx = std::min(cx, gridDims.numCells[XX] - 1);
1266 cy = std::min(cy, gridDims.numCells[YY] - 1);
1268 /* For the moment cell will contain only the, grid local,
1269 * x and y indices, not z.
1271 setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i);
1273 else
1275 /* Put this moved particle after the end of the grid,
1276 * so we can process it later without using conditionals.
1278 setCellAndAtomCount(cell, numColumns, cxy_na, i);
1282 else
1284 /* Non-home zone */
1285 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1287 int cx = static_cast<int>((x[i][XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]);
1288 int cy = static_cast<int>((x[i][YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]);
1290 /* For non-home zones there could be particles outside
1291 * the non-bonded cut-off range, which have been communicated
1292 * for bonded interactions only. For the result it doesn't
1293 * matter where these end up on the grid. For performance
1294 * we put them in an extra row at the border.
1296 cx = std::max(cx, 0);
1297 cx = std::min(cx, gridDims.numCells[XX] - 1);
1298 cy = std::max(cy, 0);
1299 cy = std::min(cy, gridDims.numCells[YY] - 1);
1301 /* For the moment cell will contain only the, grid local,
1302 * x and y indices, not z.
1304 setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i);
1309 /*! \brief Resizes grid and atom data which depend on the number of cells */
1310 static void resizeForNumberOfCells(const int numNbnxnAtoms,
1311 const int numAtomsMoved,
1312 GridSetData *gridSetData,
1313 nbnxn_atomdata_t *nbat)
1315 /* Note: gridSetData->cellIndices was already resized before */
1317 /* To avoid conditionals we store the moved particles at the end of a,
1318 * make sure we have enough space.
1320 gridSetData->atomIndices.resize(numNbnxnAtoms + numAtomsMoved);
1322 /* Make space in nbat for storing the atom coordinates */
1323 nbat->resizeCoordinateBuffer(numNbnxnAtoms);
1326 void Grid::setCellIndices(int ddZone,
1327 int cellOffset,
1328 GridSetData *gridSetData,
1329 gmx::ArrayRef<GridWork> gridWork,
1330 int atomStart,
1331 int atomEnd,
1332 const int *atinfo,
1333 gmx::ArrayRef<const gmx::RVec> x,
1334 const int numAtomsMoved,
1335 nbnxn_atomdata_t *nbat)
1337 cellOffset_ = cellOffset;
1339 srcAtomBegin_ = atomStart;
1340 srcAtomEnd_ = atomEnd;
1342 const int nthread = gmx_omp_nthreads_get(emntPairsearch);
1344 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1346 /* Make the cell index as a function of x and y */
1347 int ncz_max = 0;
1348 int ncz = 0;
1349 cxy_ind_[0] = 0;
1350 for (int i = 0; i < numColumns() + 1; i++)
1352 /* We set ncz_max at the beginning of the loop iso at the end
1353 * to skip i=grid->ncx*grid->numCells[YY] which are moved particles
1354 * that do not need to be ordered on the grid.
1356 if (ncz > ncz_max)
1358 ncz_max = ncz;
1360 int cxy_na_i = gridWork[0].numAtomsPerColumn[i];
1361 for (int thread = 1; thread < nthread; thread++)
1363 cxy_na_i += gridWork[thread].numAtomsPerColumn[i];
1365 ncz = (cxy_na_i + numAtomsPerCell - 1)/numAtomsPerCell;
1366 if (nbat->XFormat == nbatX8)
1368 /* Make the number of cell a multiple of 2 */
1369 ncz = (ncz + 1) & ~1;
1371 cxy_ind_[i+1] = cxy_ind_[i] + ncz;
1372 /* Clear cxy_na_, so we can reuse the array below */
1373 cxy_na_[i] = 0;
1375 numCellsTotal_ = cxy_ind_[numColumns()] - cxy_ind_[0];
1376 numCellsColumnMax_ = ncz_max;
1378 /* Resize grid and atom data which depend on the number of cells */
1379 resizeForNumberOfCells(atomIndexEnd(), numAtomsMoved, gridSetData, nbat);
1381 if (debug)
1383 fprintf(debug, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
1384 numAtomsPerCell, geometry_.numAtomsICluster, numCellsTotal_,
1385 dimensions_.numCells[XX], dimensions_.numCells[YY],
1386 numCellsTotal_/(static_cast<double>(numColumns())),
1387 ncz_max);
1388 if (gmx_debug_at)
1390 int i = 0;
1391 for (int cy = 0; cy < dimensions_.numCells[YY]; cy++)
1393 for (int cx = 0; cx < dimensions_.numCells[XX]; cx++)
1395 fprintf(debug, " %2d", cxy_ind_[i + 1] - cxy_ind_[i]);
1396 i++;
1398 fprintf(debug, "\n");
1403 /* Make sure the work array for sorting is large enough */
1404 const int worstCaseSortBufferSize = ncz_max*numAtomsPerCell*c_sortGridMaxSizeFactor;
1405 if (worstCaseSortBufferSize > gmx::index(gridWork[0].sortBuffer.size()))
1407 for (GridWork &work : gridWork)
1409 /* Elements not in use should be -1 */
1410 work.sortBuffer.resize(worstCaseSortBufferSize, -1);
1414 /* Now we know the dimensions we can fill the grid.
1415 * This is the first, unsorted fill. We sort the columns after this.
1417 gmx::ArrayRef<int> cells = gridSetData->cells;
1418 gmx::ArrayRef<int> atomIndices = gridSetData->atomIndices;
1419 for (int i = atomStart; i < atomEnd; i++)
1421 /* At this point nbs->cell contains the local grid x,y indices */
1422 const int cxy = cells[i];
1423 atomIndices[firstAtomInColumn(cxy) + cxy_na_[cxy]++] = i;
1426 if (ddZone == 0)
1428 /* Set the cell indices for the moved particles */
1429 int n0 = numCellsTotal_*numAtomsPerCell;
1430 int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()];
1431 for (int i = n0; i < n1; i++)
1433 cells[atomIndices[i]] = i;
1437 /* Sort the super-cell columns along z into the sub-cells. */
1438 #pragma omp parallel for num_threads(nthread) schedule(static)
1439 for (int thread = 0; thread < nthread; thread++)
1443 int columnStart = ((thread + 0)*numColumns())/nthread;
1444 int columnEnd = ((thread + 1)*numColumns())/nthread;
1445 if (geometry_.isSimple)
1447 sortColumnsCpuGeometry(gridSetData, ddZone,
1448 atomStart, atomEnd, atinfo, x, nbat,
1449 columnStart, columnEnd,
1450 gridWork[thread].sortBuffer);
1452 else
1454 sortColumnsGpuGeometry(gridSetData, ddZone,
1455 atomStart, atomEnd, atinfo, x, nbat,
1456 columnStart, columnEnd,
1457 gridWork[thread].sortBuffer);
1460 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1463 if (geometry_.isSimple && nbat->XFormat == nbatX8)
1465 combine_bounding_box_pairs(*this, bb_, bbj_);
1468 if (!geometry_.isSimple)
1470 numClustersTotal_ = 0;
1471 for (int i = 0; i < numCellsTotal_; i++)
1473 numClustersTotal_ += numClusters_[i];
1477 if (debug)
1479 if (geometry_.isSimple)
1481 print_bbsizes_simple(debug, *this);
1483 else
1485 fprintf(debug, "ns non-zero sub-cells: %d average atoms %.2f\n",
1486 numClustersTotal_,
1487 (atomEnd - atomStart)/static_cast<double>(numClustersTotal_));
1489 print_bbsizes_supersub(debug, *this);
1494 } // namespace Nbnxm