2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements the Grid class.
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_nbnxm
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/mdlib/gmx_omp_nthreads.h"
57 #include "gromacs/mdlib/updategroupscog.h"
58 #include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
59 #include "gromacs/nbnxm/atomdata.h"
60 #include "gromacs/nbnxm/nbnxm_geometry.h"
61 #include "gromacs/simd/simd.h"
62 #include "gromacs/simd/vector_operations.h"
64 #include "boundingboxes.h"
65 #include "gridsetdata.h"
66 #include "pairlistparams.h"
71 Grid::Geometry::Geometry(const PairlistType pairlistType
) :
72 isSimple(pairlistType
!= PairlistType::HierarchicalNxN
),
73 numAtomsICluster(IClusterSizePerListType
[pairlistType
]),
74 numAtomsJCluster(JClusterSizePerListType
[pairlistType
]),
75 numAtomsPerCell((isSimple
? 1 : c_gpuNumClusterPerCell
)*numAtomsICluster
),
76 numAtomsICluster2Log(get_2log(numAtomsICluster
))
80 Grid::Grid(const PairlistType pairlistType
,
81 const bool &haveFep
) :
82 geometry_(pairlistType
),
87 /*! \brief Returns the atom density (> 0) of a rectangular grid */
88 static real
gridAtomDensity(int numAtoms
,
89 const rvec lowerCorner
,
90 const rvec upperCorner
)
96 /* To avoid zero density we use a minimum of 1 atom */
100 rvec_sub(upperCorner
, lowerCorner
, size
);
102 return static_cast<real
>(numAtoms
)/(size
[XX
]*size
[YY
]*size
[ZZ
]);
105 void Grid::setDimensions(const int ddZone
,
107 gmx::RVec lowerCorner
,
108 gmx::RVec upperCorner
,
110 const real maxAtomGroupRadius
,
112 gmx::PinningPolicy pinningPolicy
)
114 /* We allow passing lowerCorner=upperCorner, in which case we need to
115 * create a finite sized bounding box to avoid division by zero.
116 * We use a minimum size such that the volume fits in float with some
117 * margin for computing and using the atom number density.
119 constexpr real c_minimumGridSize
= 1e-10;
120 for (int d
= 0; d
< DIM
; d
++)
122 GMX_ASSERT(upperCorner
[d
] >= lowerCorner
[d
], "Upper corner should be larger than the lower corner");
123 if (upperCorner
[d
] - lowerCorner
[d
] < c_minimumGridSize
)
125 /* Ensure we apply a correction to the bounding box */
126 real correction
= std::max(std::abs(lowerCorner
[d
])*GMX_REAL_EPS
,
127 0.5_real
*c_minimumGridSize
);
128 lowerCorner
[d
] -= correction
;
129 upperCorner
[d
] += correction
;
133 /* For the home zone we compute the density when not set (=-1) or when =0 */
134 if (ddZone
== 0 && atomDensity
<= 0)
136 atomDensity
= gridAtomDensity(numAtoms
, lowerCorner
, upperCorner
);
139 dimensions_
.atomDensity
= atomDensity
;
140 dimensions_
.maxAtomGroupRadius
= maxAtomGroupRadius
;
143 rvec_sub(upperCorner
, lowerCorner
, size
);
145 if (numAtoms
> geometry_
.numAtomsPerCell
)
147 GMX_ASSERT(atomDensity
> 0, "With one or more atoms, the density should be positive");
149 /* target cell length */
152 if (geometry_
.isSimple
)
154 /* To minimize the zero interactions, we should make
155 * the largest of the i/j cell cubic.
157 int numAtomsInCell
= std::max(geometry_
.numAtomsICluster
,
158 geometry_
.numAtomsJCluster
);
160 /* Approximately cubic cells */
161 real tlen
= std::cbrt(numAtomsInCell
/atomDensity
);
167 /* Approximately cubic sub cells */
168 real tlen
= std::cbrt(geometry_
.numAtomsICluster
/atomDensity
);
169 tlen_x
= tlen
*c_gpuNumClusterPerCellX
;
170 tlen_y
= tlen
*c_gpuNumClusterPerCellY
;
172 /* We round ncx and ncy down, because we get less cell pairs
173 * in the pairlist when the fixed cell dimensions (x,y) are
174 * larger than the variable one (z) than the other way around.
176 dimensions_
.numCells
[XX
] = std::max(1, static_cast<int>(size
[XX
]/tlen_x
));
177 dimensions_
.numCells
[YY
] = std::max(1, static_cast<int>(size
[YY
]/tlen_y
));
181 dimensions_
.numCells
[XX
] = 1;
182 dimensions_
.numCells
[YY
] = 1;
185 for (int d
= 0; d
< DIM
- 1; d
++)
187 dimensions_
.cellSize
[d
] = size
[d
]/dimensions_
.numCells
[d
];
188 dimensions_
.invCellSize
[d
] = 1/dimensions_
.cellSize
[d
];
193 /* This is a non-home zone, add an extra row of cells
194 * for particles communicated for bonded interactions.
195 * These can be beyond the cut-off. It doesn't matter where
196 * they end up on the grid, but for performance it's better
197 * if they don't end up in cells that can be within cut-off range.
199 dimensions_
.numCells
[XX
]++;
200 dimensions_
.numCells
[YY
]++;
203 /* We need one additional cell entry for particles moved by DD */
204 cxy_na_
.resize(numColumns() + 1);
205 cxy_ind_
.resize(numColumns() + 2);
206 changePinningPolicy(&cxy_na_
, pinningPolicy
);
207 changePinningPolicy(&cxy_ind_
, pinningPolicy
);
209 /* Worst case scenario of 1 atom in each last cell */
211 if (geometry_
.numAtomsJCluster
<= geometry_
.numAtomsICluster
)
213 maxNumCells
= numAtoms
/geometry_
.numAtomsPerCell
+ numColumns();
217 maxNumCells
= numAtoms
/geometry_
.numAtomsPerCell
+ numColumns()*geometry_
.numAtomsJCluster
/geometry_
.numAtomsICluster
;
220 if (!geometry_
.isSimple
)
222 numClusters_
.resize(maxNumCells
);
224 bbcz_
.resize(maxNumCells
);
226 /* This resize also zeros the contents, this avoid possible
227 * floating exceptions in SIMD with the unused bb elements.
229 if (geometry_
.isSimple
)
231 bb_
.resize(maxNumCells
);
236 pbb_
.resize(packedBoundingBoxesIndex(maxNumCells
*c_gpuNumClusterPerCell
));
238 bb_
.resize(maxNumCells
*c_gpuNumClusterPerCell
);
242 if (geometry_
.numAtomsJCluster
== geometry_
.numAtomsICluster
)
248 GMX_ASSERT(geometry_
.isSimple
, "Only CPU lists should have different i/j cluster sizes");
250 bbjStorage_
.resize(maxNumCells
*geometry_
.numAtomsICluster
/geometry_
.numAtomsJCluster
);
254 flags_
.resize(maxNumCells
);
257 fep_
.resize(maxNumCells
*geometry_
.numAtomsPerCell
/geometry_
.numAtomsICluster
);
260 copy_rvec(lowerCorner
, dimensions_
.lowerCorner
);
261 copy_rvec(upperCorner
, dimensions_
.upperCorner
);
262 copy_rvec(size
, dimensions_
.gridSize
);
265 /* We need to sort paricles in grid columns on z-coordinate.
266 * As particle are very often distributed homogeneously, we use a sorting
267 * algorithm similar to pigeonhole sort. We multiply the z-coordinate
268 * by a factor, cast to an int and try to store in that hole. If the hole
269 * is full, we move this or another particle. A second pass is needed to make
270 * contiguous elements. SORT_GRID_OVERSIZE is the ratio of holes to particles.
271 * 4 is the optimal value for homogeneous particle distribution and allows
272 * for an O(#particles) sort up till distributions were all particles are
273 * concentrated in 1/4 of the space. No NlogN fallback is implemented,
274 * as it can be expensive to detect imhomogeneous particle distributions.
276 /*! \brief Ratio of grid cells to atoms */
277 static constexpr int c_sortGridRatio
= 4;
278 /*! \brief Maximum ratio of holes used, in the worst case all particles
279 * end up in the last hole and we need num. atoms extra holes at the end.
281 static constexpr int c_sortGridMaxSizeFactor
= c_sortGridRatio
+ 1;
283 /*! \brief Sorts particle index a on coordinates x along dim.
285 * Backwards tells if we want decreasing iso increasing coordinates.
286 * h0 is the minimum of the coordinate range.
287 * invh is the 1/length of the sorting range.
288 * n_per_h (>=n) is the expected average number of particles per 1/invh
289 * sort is the sorting work array.
290 * sort should have a size of at least n_per_h*c_sortGridRatio + n,
291 * or easier, allocate at least n*c_sortGridMaxSizeFactor elements.
293 static void sort_atoms(int dim
, gmx_bool Backwards
,
294 int gmx_unused dd_zone
,
295 bool gmx_unused relevantAtomsAreWithinGridBounds
,
297 gmx::ArrayRef
<const gmx::RVec
> x
,
298 real h0
, real invh
, int n_per_h
,
299 gmx::ArrayRef
<int> sort
)
307 GMX_ASSERT(n
<= n_per_h
, "We require n <= n_per_h");
309 /* Transform the inverse range height into the inverse hole height */
310 invh
*= n_per_h
*c_sortGridRatio
;
312 /* Set nsort to the maximum possible number of holes used.
313 * In worst case all n elements end up in the last bin.
315 int nsort
= n_per_h
*c_sortGridRatio
+ n
;
317 /* Determine the index range used, so we can limit it for the second pass */
318 int zi_min
= INT_MAX
;
321 /* Sort the particles using a simple index sort */
322 for (int i
= 0; i
< n
; i
++)
324 /* The cast takes care of float-point rounding effects below zero.
325 * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
326 * times the box height out of the box.
328 int zi
= static_cast<int>((x
[a
[i
]][dim
] - h0
)*invh
);
331 /* As we can have rounding effect, we use > iso >= here */
332 if (relevantAtomsAreWithinGridBounds
&&
333 (zi
< 0 || (dd_zone
== 0 && zi
> n_per_h
*c_sortGridRatio
)))
335 gmx_fatal(FARGS
, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n",
336 a
[i
], 'x'+dim
, x
[a
[i
]][dim
], h0
, invh
, zi
,
337 n_per_h
, c_sortGridRatio
);
345 /* In a non-local domain, particles communcated for bonded interactions
346 * can be far beyond the grid size, which is set by the non-bonded
347 * cut-off distance. We sort such particles into the last cell.
349 if (zi
> n_per_h
*c_sortGridRatio
)
351 zi
= n_per_h
*c_sortGridRatio
;
354 /* Ideally this particle should go in sort cell zi,
355 * but that might already be in use,
356 * in that case find the first empty cell higher up
361 zi_min
= std::min(zi_min
, zi
);
362 zi_max
= std::max(zi_max
, zi
);
366 /* We have multiple atoms in the same sorting slot.
367 * Sort on real z for minimal bounding box size.
368 * There is an extra check for identical z to ensure
369 * well-defined output order, independent of input order
370 * to ensure binary reproducibility after restarts.
372 while (sort
[zi
] >= 0 && ( x
[a
[i
]][dim
] > x
[sort
[zi
]][dim
] ||
373 (x
[a
[i
]][dim
] == x
[sort
[zi
]][dim
] &&
381 /* Shift all elements by one slot until we find an empty slot */
384 while (sort
[zim
] >= 0)
392 zi_max
= std::max(zi_max
, zim
);
395 zi_max
= std::max(zi_max
, zi
);
402 for (int zi
= 0; zi
< nsort
; zi
++)
413 for (int zi
= zi_max
; zi
>= zi_min
; zi
--)
424 gmx_incons("Lost particles while sorting");
429 //! Returns double up to one least significant float bit smaller than x
430 static double R2F_D(const float x
)
432 return static_cast<float>(x
>= 0 ? (1 - GMX_FLOAT_EPS
)*x
: (1 + GMX_FLOAT_EPS
)*x
);
434 //! Returns double up to one least significant float bit larger than x
435 static double R2F_U(const float x
)
437 return static_cast<float>(x
>= 0 ? (1 + GMX_FLOAT_EPS
)*x
: (1 - GMX_FLOAT_EPS
)*x
);
441 static float R2F_D(const float x
)
446 static float R2F_U(const float x
)
452 //! Computes the bounding box for na coordinates in order x,y,z, bb order xyz0
453 static void calc_bounding_box(int na
, int stride
, const real
*x
,
457 real xl
, xh
, yl
, yh
, zl
, zh
;
467 for (int j
= 1; j
< na
; j
++)
469 xl
= std::min(xl
, x
[i
+XX
]);
470 xh
= std::max(xh
, x
[i
+XX
]);
471 yl
= std::min(yl
, x
[i
+YY
]);
472 yh
= std::max(yh
, x
[i
+YY
]);
473 zl
= std::min(zl
, x
[i
+ZZ
]);
474 zh
= std::max(zh
, x
[i
+ZZ
]);
477 /* Note: possible double to float conversion here */
478 bb
->lower
.x
= R2F_D(xl
);
479 bb
->lower
.y
= R2F_D(yl
);
480 bb
->lower
.z
= R2F_D(zl
);
481 bb
->upper
.x
= R2F_U(xh
);
482 bb
->upper
.y
= R2F_U(yh
);
483 bb
->upper
.z
= R2F_U(zh
);
486 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
487 static void calc_bounding_box_x_x4(int na
, const real
*x
,
490 real xl
, xh
, yl
, yh
, zl
, zh
;
498 for (int j
= 1; j
< na
; j
++)
500 xl
= std::min(xl
, x
[j
+XX
*c_packX4
]);
501 xh
= std::max(xh
, x
[j
+XX
*c_packX4
]);
502 yl
= std::min(yl
, x
[j
+YY
*c_packX4
]);
503 yh
= std::max(yh
, x
[j
+YY
*c_packX4
]);
504 zl
= std::min(zl
, x
[j
+ZZ
*c_packX4
]);
505 zh
= std::max(zh
, x
[j
+ZZ
*c_packX4
]);
507 /* Note: possible double to float conversion here */
508 bb
->lower
.x
= R2F_D(xl
);
509 bb
->lower
.y
= R2F_D(yl
);
510 bb
->lower
.z
= R2F_D(zl
);
511 bb
->upper
.x
= R2F_U(xh
);
512 bb
->upper
.y
= R2F_U(yh
);
513 bb
->upper
.z
= R2F_U(zh
);
516 /*! \brief Computes the bounding box for na coordinates, bb order xyz0 */
517 static void calc_bounding_box_x_x8(int na
, const real
*x
,
520 real xl
, xh
, yl
, yh
, zl
, zh
;
528 for (int j
= 1; j
< na
; j
++)
530 xl
= std::min(xl
, x
[j
+XX
*c_packX8
]);
531 xh
= std::max(xh
, x
[j
+XX
*c_packX8
]);
532 yl
= std::min(yl
, x
[j
+YY
*c_packX8
]);
533 yh
= std::max(yh
, x
[j
+YY
*c_packX8
]);
534 zl
= std::min(zl
, x
[j
+ZZ
*c_packX8
]);
535 zh
= std::max(zh
, x
[j
+ZZ
*c_packX8
]);
537 /* Note: possible double to float conversion here */
538 bb
->lower
.x
= R2F_D(xl
);
539 bb
->lower
.y
= R2F_D(yl
);
540 bb
->lower
.z
= R2F_D(zl
);
541 bb
->upper
.x
= R2F_U(xh
);
542 bb
->upper
.y
= R2F_U(yh
);
543 bb
->upper
.z
= R2F_U(zh
);
546 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
547 gmx_unused
static void calc_bounding_box_x_x4_halves(int na
, const real
*x
,
551 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
554 calc_bounding_box_x_x4(std::min(na
, 2), x
, bbj
);
558 calc_bounding_box_x_x4(std::min(na
-2, 2), x
+(c_packX4
>>1), bbj
+1);
562 /* Set the "empty" bounding box to the same as the first one,
563 * so we don't need to treat special cases in the rest of the code.
565 #if NBNXN_SEARCH_BB_SIMD4
566 store4(bbj
[1].lower
.ptr(), load4(bbj
[0].lower
.ptr()));
567 store4(bbj
[1].upper
.ptr(), load4(bbj
[0].upper
.ptr()));
573 #if NBNXN_SEARCH_BB_SIMD4
574 store4(bb
->lower
.ptr(),
575 min(load4(bbj
[0].lower
.ptr()), load4(bbj
[1].lower
.ptr())));
576 store4(bb
->upper
.ptr(),
577 max(load4(bbj
[0].upper
.ptr()), load4(bbj
[1].upper
.ptr())));
580 bb
->lower
= BoundingBox::Corner::min(bbj
[0].lower
,
582 bb
->upper
= BoundingBox::Corner::max(bbj
[0].upper
,
590 /*! \brief Computes the bounding box for na coordinates in order xyz, bb order xxxxyyyyzzzz */
591 static void calc_bounding_box_xxxx(int na
, int stride
, const real
*x
, float *bb
)
594 real xl
, xh
, yl
, yh
, zl
, zh
;
604 for (int j
= 1; j
< na
; j
++)
606 xl
= std::min(xl
, x
[i
+XX
]);
607 xh
= std::max(xh
, x
[i
+XX
]);
608 yl
= std::min(yl
, x
[i
+YY
]);
609 yh
= std::max(yh
, x
[i
+YY
]);
610 zl
= std::min(zl
, x
[i
+ZZ
]);
611 zh
= std::max(zh
, x
[i
+ZZ
]);
614 /* Note: possible double to float conversion here */
615 bb
[0*c_packedBoundingBoxesDimSize
] = R2F_D(xl
);
616 bb
[1*c_packedBoundingBoxesDimSize
] = R2F_D(yl
);
617 bb
[2*c_packedBoundingBoxesDimSize
] = R2F_D(zl
);
618 bb
[3*c_packedBoundingBoxesDimSize
] = R2F_U(xh
);
619 bb
[4*c_packedBoundingBoxesDimSize
] = R2F_U(yh
);
620 bb
[5*c_packedBoundingBoxesDimSize
] = R2F_U(zh
);
623 #endif /* NBNXN_BBXXXX */
625 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
627 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xyz0 */
628 static void calc_bounding_box_simd4(int na
, const float *x
,
631 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
634 static_assert(sizeof(BoundingBox::Corner
) == GMX_SIMD4_WIDTH
*sizeof(float),
635 "The Corner struct should hold exactly 4 floats");
637 Simd4Float bb_0_S
= load4(x
);
638 Simd4Float bb_1_S
= bb_0_S
;
640 for (int i
= 1; i
< na
; i
++)
642 Simd4Float x_S
= load4(x
+ i
*GMX_SIMD4_WIDTH
);
643 bb_0_S
= min(bb_0_S
, x_S
);
644 bb_1_S
= max(bb_1_S
, x_S
);
647 store4(bb
->lower
.ptr(), bb_0_S
);
648 store4(bb
->upper
.ptr(), bb_1_S
);
653 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */
654 static void calc_bounding_box_xxxx_simd4(int na
, const float *x
,
655 BoundingBox
*bb_work_aligned
,
658 calc_bounding_box_simd4(na
, x
, bb_work_aligned
);
660 bb
[0*c_packedBoundingBoxesDimSize
] = bb_work_aligned
->lower
.x
;
661 bb
[1*c_packedBoundingBoxesDimSize
] = bb_work_aligned
->lower
.y
;
662 bb
[2*c_packedBoundingBoxesDimSize
] = bb_work_aligned
->lower
.z
;
663 bb
[3*c_packedBoundingBoxesDimSize
] = bb_work_aligned
->upper
.x
;
664 bb
[4*c_packedBoundingBoxesDimSize
] = bb_work_aligned
->upper
.y
;
665 bb
[5*c_packedBoundingBoxesDimSize
] = bb_work_aligned
->upper
.z
;
668 #endif /* NBNXN_BBXXXX */
670 #endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
673 /*! \brief Combines pairs of consecutive bounding boxes */
674 static void combine_bounding_box_pairs(const Grid
&grid
,
675 gmx::ArrayRef
<const BoundingBox
> bb
,
676 gmx::ArrayRef
<BoundingBox
> bbj
)
678 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
681 for (int i
= 0; i
< grid
.numColumns(); i
++)
683 /* Starting bb in a column is expected to be 2-aligned */
684 const int sc2
= grid
.firstCellInColumn(i
) >> 1;
685 /* For odd numbers skip the last bb here */
686 const int nc2
= (grid
.numAtomsInColumn(i
) + 3) >> (2 + 1);
688 for (c2
= sc2
; c2
< sc2
+ nc2
; c2
++)
690 #if NBNXN_SEARCH_BB_SIMD4
691 Simd4Float min_S
, max_S
;
693 min_S
= min(load4(bb
[c2
*2+0].lower
.ptr()),
694 load4(bb
[c2
*2+1].lower
.ptr()));
695 max_S
= max(load4(bb
[c2
*2+0].upper
.ptr()),
696 load4(bb
[c2
*2+1].upper
.ptr()));
697 store4(bbj
[c2
].lower
.ptr(), min_S
);
698 store4(bbj
[c2
].upper
.ptr(), max_S
);
700 bbj
[c2
].lower
= BoundingBox::Corner::min(bb
[c2
*2 + 0].lower
,
702 bbj
[c2
].upper
= BoundingBox::Corner::max(bb
[c2
*2 + 0].upper
,
706 if (((grid
.numAtomsInColumn(i
) + 3) >> 2) & 1)
708 /* The bb count in this column is odd: duplicate the last bb */
709 bbj
[c2
].lower
= bb
[c2
*2].lower
;
710 bbj
[c2
].upper
= bb
[c2
*2].upper
;
716 /*! \brief Prints the average bb size, used for debug output */
717 static void print_bbsizes_simple(FILE *fp
,
721 for (int c
= 0; c
< grid
.numCells(); c
++)
723 const BoundingBox
&bb
= grid
.iBoundingBoxes()[c
];
724 ba
[XX
] += bb
.upper
.x
- bb
.lower
.x
;
725 ba
[YY
] += bb
.upper
.y
- bb
.lower
.y
;
726 ba
[ZZ
] += bb
.upper
.z
- bb
.lower
.z
;
728 dsvmul(1.0/grid
.numCells(), ba
, ba
);
730 const Grid::Dimensions
&dims
= grid
.dimensions();
731 real avgCellSizeZ
= (dims
.atomDensity
> 0 ? grid
.geometry().numAtomsICluster
/(dims
.atomDensity
*dims
.cellSize
[XX
]*dims
.cellSize
[YY
]) : 0.0);
733 fprintf(fp
, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
737 ba
[XX
], ba
[YY
], ba
[ZZ
],
738 ba
[XX
]*dims
.invCellSize
[XX
],
739 ba
[YY
]*dims
.invCellSize
[YY
],
740 dims
.atomDensity
> 0 ? ba
[ZZ
]/avgCellSizeZ
: 0.0);
743 /*! \brief Prints the average bb size, used for debug output */
744 static void print_bbsizes_supersub(FILE *fp
,
752 for (int c
= 0; c
< grid
.numCells(); c
++)
755 for (int s
= 0; s
< grid
.numClustersPerCell()[c
]; s
+= c_packedBoundingBoxesDimSize
)
757 int cs_w
= (c
*c_gpuNumClusterPerCell
+ s
)/c_packedBoundingBoxesDimSize
;
758 auto boundingBoxes
= grid
.packedBoundingBoxes().subArray(cs_w
*c_packedBoundingBoxesSize
, c_packedBoundingBoxesSize
);
759 for (int i
= 0; i
< c_packedBoundingBoxesDimSize
; i
++)
761 for (int d
= 0; d
< DIM
; d
++)
764 boundingBoxes
[(DIM
+ d
)*c_packedBoundingBoxesDimSize
+ i
] -
765 boundingBoxes
[(0 + d
)*c_packedBoundingBoxesDimSize
+ i
];
770 for (int s
= 0; s
< grid
.numClustersPerCell()[c
]; s
++)
772 const BoundingBox
&bb
= grid
.iBoundingBoxes()[c
*c_gpuNumClusterPerCell
+ s
];
773 ba
[XX
] += bb
.upper
.x
- bb
.lower
.x
;
774 ba
[YY
] += bb
.upper
.y
- bb
.lower
.y
;
775 ba
[ZZ
] += bb
.upper
.z
- bb
.lower
.z
;
778 ns
+= grid
.numClustersPerCell()[c
];
780 dsvmul(1.0/ns
, ba
, ba
);
782 const Grid::Dimensions
&dims
= grid
.dimensions();
783 const real avgClusterSizeZ
=
784 (dims
.atomDensity
> 0 ? grid
.geometry().numAtomsPerCell
/(dims
.atomDensity
*dims
.cellSize
[XX
]*dims
.cellSize
[YY
]*c_gpuNumClusterPerCellZ
) : 0.0);
786 fprintf(fp
, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
787 dims
.cellSize
[XX
]/c_gpuNumClusterPerCellX
,
788 dims
.cellSize
[YY
]/c_gpuNumClusterPerCellY
,
790 ba
[XX
], ba
[YY
], ba
[ZZ
],
791 ba
[XX
]*c_gpuNumClusterPerCellX
*dims
.invCellSize
[XX
],
792 ba
[YY
]*c_gpuNumClusterPerCellY
*dims
.invCellSize
[YY
],
793 dims
.atomDensity
> 0 ? ba
[ZZ
]/avgClusterSizeZ
: 0.0);
796 /*!\brief Set non-bonded interaction flags for the current cluster.
798 * Sorts atoms on LJ coefficients: !=0 first, ==0 at the end.
800 static void sort_cluster_on_flag(int numAtomsInCluster
,
804 gmx::ArrayRef
<int> order
,
807 constexpr int c_maxNumAtomsInCluster
= 8;
808 int sort1
[c_maxNumAtomsInCluster
];
809 int sort2
[c_maxNumAtomsInCluster
];
811 GMX_ASSERT(numAtomsInCluster
<= c_maxNumAtomsInCluster
, "Need to increase c_maxNumAtomsInCluster to support larger clusters");
816 for (int s
= atomStart
; s
< atomEnd
; s
+= numAtomsInCluster
)
818 /* Make lists for this (sub-)cell on atoms with and without LJ */
821 gmx_bool haveQ
= FALSE
;
823 for (int a
= s
; a
< std::min(s
+ numAtomsInCluster
, atomEnd
); a
++)
825 haveQ
= haveQ
|| GET_CGINFO_HAS_Q(atinfo
[order
[a
]]);
827 if (GET_CGINFO_HAS_VDW(atinfo
[order
[a
]]))
829 sort1
[n1
++] = order
[a
];
834 sort2
[n2
++] = order
[a
];
838 /* If we don't have atoms with LJ, there's nothing to sort */
841 *flags
|= NBNXN_CI_DO_LJ(subc
);
843 if (2*n1
<= numAtomsInCluster
)
845 /* Only sort when strictly necessary. Ordering particles
846 * Ordering particles can lead to less accurate summation
847 * due to rounding, both for LJ and Coulomb interactions.
849 if (2*(a_lj_max
- s
) >= numAtomsInCluster
)
851 for (int i
= 0; i
< n1
; i
++)
853 order
[atomStart
+ i
] = sort1
[i
];
855 for (int j
= 0; j
< n2
; j
++)
857 order
[atomStart
+ n1
+ j
] = sort2
[j
];
861 *flags
|= NBNXN_CI_HALF_LJ(subc
);
866 *flags
|= NBNXN_CI_DO_COUL(subc
);
872 /*! \brief Fill a pair search cell with atoms.
874 * Potentially sorts atoms and sets the interaction flags.
876 void Grid::fillCell(GridSetData
*gridSetData
,
877 nbnxn_atomdata_t
*nbat
,
881 gmx::ArrayRef
<const gmx::RVec
> x
,
882 BoundingBox gmx_unused
*bb_work_aligned
)
884 const int numAtoms
= atomEnd
- atomStart
;
886 const gmx::ArrayRef
<int> &cells
= gridSetData
->cells
;
887 const gmx::ArrayRef
<int> &atomIndices
= gridSetData
->atomIndices
;
889 if (geometry_
.isSimple
)
891 /* Note that non-local grids are already sorted.
892 * Then sort_cluster_on_flag will only set the flags and the sorting
893 * will not affect the atom order.
895 sort_cluster_on_flag(geometry_
.numAtomsICluster
, atomStart
, atomEnd
, atinfo
, atomIndices
,
896 flags_
.data() + atomToCluster(atomStart
) - cellOffset_
);
901 /* Set the fep flag for perturbed atoms in this (sub-)cell */
903 /* The grid-local cluster/(sub-)cell index */
904 int cell
= atomToCluster(atomStart
) - cellOffset_
*(geometry_
.isSimple
? 1 : c_gpuNumClusterPerCell
);
906 for (int at
= atomStart
; at
< atomEnd
; at
++)
908 if (atomIndices
[at
] >= 0 && GET_CGINFO_FEP(atinfo
[atomIndices
[at
]]))
910 fep_
[cell
] |= (1 << (at
- atomStart
));
915 /* Now we have sorted the atoms, set the cell indices */
916 for (int at
= atomStart
; at
< atomEnd
; at
++)
918 cells
[atomIndices
[at
]] = at
;
921 copy_rvec_to_nbat_real(atomIndices
.data() + atomStart
, numAtoms
, geometry_
.numAtomsICluster
,
922 as_rvec_array(x
.data()),
923 nbat
->XFormat
, nbat
->x().data(), atomStart
);
925 if (nbat
->XFormat
== nbatX4
)
927 /* Store the bounding boxes as xyz.xyz. */
928 size_t offset
= atomToCluster(atomStart
- cellOffset_
*geometry_
.numAtomsICluster
);
929 BoundingBox
*bb_ptr
= bb_
.data() + offset
;
931 #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
932 if (2*geometry_
.numAtomsJCluster
== geometry_
.numAtomsICluster
)
934 calc_bounding_box_x_x4_halves(numAtoms
, nbat
->x().data() + atom_to_x_index
<c_packX4
>(atomStart
), bb_ptr
,
935 bbj_
.data() + offset
*2);
940 calc_bounding_box_x_x4(numAtoms
, nbat
->x().data() + atom_to_x_index
<c_packX4
>(atomStart
), bb_ptr
);
943 else if (nbat
->XFormat
== nbatX8
)
945 /* Store the bounding boxes as xyz.xyz. */
946 size_t offset
= atomToCluster(atomStart
- cellOffset_
*geometry_
.numAtomsICluster
);
947 BoundingBox
*bb_ptr
= bb_
.data() + offset
;
949 calc_bounding_box_x_x8(numAtoms
, nbat
->x().data() + atom_to_x_index
<c_packX8
>(atomStart
), bb_ptr
);
952 else if (!geometry_
.isSimple
)
954 /* Store the bounding boxes in a format convenient
955 * for SIMD4 calculations: xxxxyyyyzzzz...
957 const int clusterIndex
= ((atomStart
- cellOffset_
*geometry_
.numAtomsPerCell
) >> geometry_
.numAtomsICluster2Log
);
960 packedBoundingBoxesIndex(clusterIndex
) +
961 (clusterIndex
& (c_packedBoundingBoxesDimSize
- 1));
963 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
964 if (nbat
->XFormat
== nbatXYZQ
)
966 GMX_ASSERT(bb_work_aligned
!= nullptr, "Must have valid aligned work structure");
967 calc_bounding_box_xxxx_simd4(numAtoms
, nbat
->x().data() + atomStart
*nbat
->xstride
,
968 bb_work_aligned
, pbb_ptr
);
973 calc_bounding_box_xxxx(numAtoms
, nbat
->xstride
, nbat
->x().data() + atomStart
*nbat
->xstride
,
978 fprintf(debug
, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
979 atomToCluster(atomStart
),
980 pbb_ptr
[0*c_packedBoundingBoxesDimSize
], pbb_ptr
[3*c_packedBoundingBoxesDimSize
],
981 pbb_ptr
[1*c_packedBoundingBoxesDimSize
], pbb_ptr
[4*c_packedBoundingBoxesDimSize
],
982 pbb_ptr
[2*c_packedBoundingBoxesDimSize
], pbb_ptr
[5*c_packedBoundingBoxesDimSize
]);
988 /* Store the bounding boxes as xyz.xyz. */
989 BoundingBox
*bb_ptr
= bb_
.data() + atomToCluster(atomStart
- cellOffset_
*geometry_
.numAtomsPerCell
);
991 calc_bounding_box(numAtoms
, nbat
->xstride
, nbat
->x().data() + atomStart
*nbat
->xstride
,
996 int bbo
= atomToCluster(atomStart
- cellOffset_
*geometry_
.numAtomsPerCell
);
997 fprintf(debug
, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
998 atomToCluster(atomStart
),
1009 void Grid::sortColumnsCpuGeometry(GridSetData
*gridSetData
,
1011 int atomStart
, int atomEnd
,
1013 gmx::ArrayRef
<const gmx::RVec
> x
,
1014 nbnxn_atomdata_t
*nbat
,
1015 int cxy_start
, int cxy_end
,
1016 gmx::ArrayRef
<int> sort_work
)
1020 fprintf(debug
, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
1021 cellOffset_
, cxy_start
, cxy_end
, atomStart
, atomEnd
);
1024 const bool relevantAtomsAreWithinGridBounds
= (dimensions_
.maxAtomGroupRadius
== 0);
1026 const int numAtomsPerCell
= geometry_
.numAtomsPerCell
;
1028 /* Sort the atoms within each x,y column in 3 dimensions */
1029 for (int cxy
= cxy_start
; cxy
< cxy_end
; cxy
++)
1031 const int numAtoms
= numAtomsInColumn(cxy
);
1032 const int numCellsZ
= cxy_ind_
[cxy
+ 1] - cxy_ind_
[cxy
];
1033 const int atomOffset
= firstAtomInColumn(cxy
);
1035 /* Sort the atoms within each x,y column on z coordinate */
1036 sort_atoms(ZZ
, FALSE
, dd_zone
,
1037 relevantAtomsAreWithinGridBounds
,
1038 gridSetData
->atomIndices
.data() + atomOffset
, numAtoms
, x
,
1039 dimensions_
.lowerCorner
[ZZ
],
1040 1.0/dimensions_
.gridSize
[ZZ
], numCellsZ
*numAtomsPerCell
,
1043 /* Fill the ncz cells in this column */
1044 const int firstCell
= firstCellInColumn(cxy
);
1045 int cellFilled
= firstCell
;
1046 for (int cellZ
= 0; cellZ
< numCellsZ
; cellZ
++)
1048 const int cell
= firstCell
+ cellZ
;
1050 const int atomOffsetCell
= atomOffset
+ cellZ
*numAtomsPerCell
;
1051 const int numAtomsCell
= std::min(numAtomsPerCell
, numAtoms
- (atomOffsetCell
- atomOffset
));
1053 fillCell(gridSetData
, nbat
,
1054 atomOffsetCell
, atomOffsetCell
+ numAtomsCell
, atinfo
, x
,
1057 /* This copy to bbcz is not really necessary.
1058 * But it allows to use the same grid search code
1059 * for the simple and supersub cell setups.
1061 if (numAtomsCell
> 0)
1065 bbcz_
[cell
].lower
= bb_
[cellFilled
].lower
.z
;
1066 bbcz_
[cell
].upper
= bb_
[cellFilled
].upper
.z
;
1069 /* Set the unused atom indices to -1 */
1070 for (int ind
= numAtoms
; ind
< numCellsZ
*numAtomsPerCell
; ind
++)
1072 gridSetData
->atomIndices
[atomOffset
+ ind
] = -1;
1077 /* Spatially sort the atoms within one grid column */
1078 void Grid::sortColumnsGpuGeometry(GridSetData
*gridSetData
,
1080 int atomStart
, int atomEnd
,
1082 gmx::ArrayRef
<const gmx::RVec
> x
,
1083 nbnxn_atomdata_t
*nbat
,
1084 int cxy_start
, int cxy_end
,
1085 gmx::ArrayRef
<int> sort_work
)
1087 BoundingBox bb_work_array
[2];
1088 BoundingBox
*bb_work_aligned
= reinterpret_cast<BoundingBox
*>((reinterpret_cast<std::size_t>(bb_work_array
+ 1)) & (~(static_cast<std::size_t>(15))));
1092 fprintf(debug
, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
1093 cellOffset_
, cxy_start
, cxy_end
, atomStart
, atomEnd
);
1096 const bool relevantAtomsAreWithinGridBounds
= (dimensions_
.maxAtomGroupRadius
== 0);
1098 const int numAtomsPerCell
= geometry_
.numAtomsPerCell
;
1100 const int subdiv_x
= geometry_
.numAtomsICluster
;
1101 const int subdiv_y
= c_gpuNumClusterPerCellX
*subdiv_x
;
1102 const int subdiv_z
= c_gpuNumClusterPerCellY
*subdiv_y
;
1104 /* Extract the atom index array that will be filled here */
1105 const gmx::ArrayRef
<int> &atomIndices
= gridSetData
->atomIndices
;
1107 /* Sort the atoms within each x,y column in 3 dimensions.
1108 * Loop over all columns on the x/y grid.
1110 for (int cxy
= cxy_start
; cxy
< cxy_end
; cxy
++)
1112 const int gridX
= cxy
/dimensions_
.numCells
[YY
];
1113 const int gridY
= cxy
- gridX
*dimensions_
.numCells
[YY
];
1115 const int numAtomsInColumn
= cxy_na_
[cxy
];
1116 const int numCellsInColumn
= cxy_ind_
[cxy
+ 1] - cxy_ind_
[cxy
];
1117 const int atomOffset
= firstAtomInColumn(cxy
);
1119 /* Sort the atoms within each x,y column on z coordinate */
1120 sort_atoms(ZZ
, FALSE
, dd_zone
,
1121 relevantAtomsAreWithinGridBounds
,
1122 atomIndices
.data() + atomOffset
, numAtomsInColumn
, x
,
1123 dimensions_
.lowerCorner
[ZZ
],
1124 1.0/dimensions_
.gridSize
[ZZ
], numCellsInColumn
*numAtomsPerCell
,
1127 /* This loop goes over the cells and clusters along z at once */
1128 for (int sub_z
= 0; sub_z
< numCellsInColumn
*c_gpuNumClusterPerCellZ
; sub_z
++)
1130 const int atomOffsetZ
= atomOffset
+ sub_z
*subdiv_z
;
1131 const int numAtomsZ
= std::min(subdiv_z
, numAtomsInColumn
- (atomOffsetZ
- atomOffset
));
1133 /* We have already sorted on z */
1135 if (sub_z
% c_gpuNumClusterPerCellZ
== 0)
1137 cz
= sub_z
/c_gpuNumClusterPerCellZ
;
1138 const int cell
= cxy_ind_
[cxy
] + cz
;
1140 /* The number of atoms in this cell/super-cluster */
1141 const int numAtoms
= std::min(numAtomsPerCell
, numAtomsInColumn
- (atomOffsetZ
- atomOffset
));
1143 numClusters_
[cell
] = std::min(c_gpuNumClusterPerCell
,
1144 (numAtoms
+ geometry_
.numAtomsICluster
- 1)/ geometry_
.numAtomsICluster
);
1146 /* Store the z-boundaries of the bounding box of the cell */
1147 bbcz_
[cell
].lower
= x
[atomIndices
[atomOffsetZ
]][ZZ
];
1148 bbcz_
[cell
].upper
= x
[atomIndices
[atomOffsetZ
+ numAtoms
- 1]][ZZ
];
1151 if (c_gpuNumClusterPerCellY
> 1)
1153 /* Sort the atoms along y */
1154 sort_atoms(YY
, (sub_z
& 1) != 0, dd_zone
,
1155 relevantAtomsAreWithinGridBounds
,
1156 atomIndices
.data() + atomOffsetZ
, numAtomsZ
, x
,
1157 dimensions_
.lowerCorner
[YY
] + gridY
*dimensions_
.cellSize
[YY
],
1158 dimensions_
.invCellSize
[YY
], subdiv_z
,
1162 for (int sub_y
= 0; sub_y
< c_gpuNumClusterPerCellY
; sub_y
++)
1164 const int atomOffsetY
= atomOffsetZ
+ sub_y
*subdiv_y
;
1165 const int numAtomsY
= std::min(subdiv_y
, numAtomsInColumn
- (atomOffsetY
- atomOffset
));
1167 if (c_gpuNumClusterPerCellX
> 1)
1169 /* Sort the atoms along x */
1170 sort_atoms(XX
, ((cz
*c_gpuNumClusterPerCellY
+ sub_y
) & 1) != 0, dd_zone
,
1171 relevantAtomsAreWithinGridBounds
,
1172 atomIndices
.data() + atomOffsetY
, numAtomsY
, x
,
1173 dimensions_
.lowerCorner
[XX
] + gridX
*dimensions_
.cellSize
[XX
],
1174 dimensions_
.invCellSize
[XX
], subdiv_y
,
1178 for (int sub_x
= 0; sub_x
< c_gpuNumClusterPerCellX
; sub_x
++)
1180 const int atomOffsetX
= atomOffsetY
+ sub_x
*subdiv_x
;
1181 const int numAtomsX
= std::min(subdiv_x
, numAtomsInColumn
- (atomOffsetX
- atomOffset
));
1183 fillCell(gridSetData
, nbat
,
1184 atomOffsetX
, atomOffsetX
+ numAtomsX
, atinfo
, x
,
1190 /* Set the unused atom indices to -1 */
1191 for (int ind
= numAtomsInColumn
; ind
< numCellsInColumn
*numAtomsPerCell
; ind
++)
1193 atomIndices
[atomOffset
+ ind
] = -1;
1198 /*! \brief Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
1199 static void setCellAndAtomCount(gmx::ArrayRef
<int> cell
,
1201 gmx::ArrayRef
<int> cxy_na
,
1204 cell
[atomIndex
] = cellIndex
;
1205 cxy_na
[cellIndex
] += 1;
1208 void Grid::calcColumnIndices(const Grid::Dimensions
&gridDims
,
1209 const gmx::UpdateGroupsCog
*updateGroupsCog
,
1210 const int atomStart
,
1212 gmx::ArrayRef
<const gmx::RVec
> x
,
1217 gmx::ArrayRef
<int> cell
,
1218 gmx::ArrayRef
<int> cxy_na
)
1220 const int numColumns
= gridDims
.numCells
[XX
]*gridDims
.numCells
[YY
];
1222 /* We add one extra cell for particles which moved during DD */
1223 for (int i
= 0; i
< numColumns
; i
++)
1228 int taskAtomStart
= atomStart
+ static_cast<int>((thread
+ 0)*(atomEnd
- atomStart
))/nthread
;
1229 int taskAtomEnd
= atomStart
+ static_cast<int>((thread
+ 1)*(atomEnd
- atomStart
))/nthread
;
1234 for (int i
= taskAtomStart
; i
< taskAtomEnd
; i
++)
1236 if (move
== nullptr || move
[i
] >= 0)
1239 const gmx::RVec
&coord
= (updateGroupsCog
? updateGroupsCog
->cogForAtom(i
) : x
[i
]);
1241 /* We need to be careful with rounding,
1242 * particles might be a few bits outside the local zone.
1243 * The int cast takes care of the lower bound,
1244 * we will explicitly take care of the upper bound.
1246 int cx
= static_cast<int>((coord
[XX
] - gridDims
.lowerCorner
[XX
])*gridDims
.invCellSize
[XX
]);
1247 int cy
= static_cast<int>((coord
[YY
] - gridDims
.lowerCorner
[YY
])*gridDims
.invCellSize
[YY
]);
1250 if (cx
< 0 || cx
> gridDims
.numCells
[XX
] ||
1251 cy
< 0 || cy
> gridDims
.numCells
[YY
])
1254 "grid cell cx %d cy %d out of range (max %d %d)\n"
1255 "atom %f %f %f, grid->c0 %f %f",
1257 gridDims
.numCells
[XX
],
1258 gridDims
.numCells
[YY
],
1259 x
[i
][XX
], x
[i
][YY
], x
[i
][ZZ
],
1260 gridDims
.lowerCorner
[XX
],
1261 gridDims
.lowerCorner
[YY
]);
1264 /* Take care of potential rouding issues */
1265 cx
= std::min(cx
, gridDims
.numCells
[XX
] - 1);
1266 cy
= std::min(cy
, gridDims
.numCells
[YY
] - 1);
1268 /* For the moment cell will contain only the, grid local,
1269 * x and y indices, not z.
1271 setCellAndAtomCount(cell
, cx
*gridDims
.numCells
[YY
] + cy
, cxy_na
, i
);
1275 /* Put this moved particle after the end of the grid,
1276 * so we can process it later without using conditionals.
1278 setCellAndAtomCount(cell
, numColumns
, cxy_na
, i
);
1285 for (int i
= taskAtomStart
; i
< taskAtomEnd
; i
++)
1287 int cx
= static_cast<int>((x
[i
][XX
] - gridDims
.lowerCorner
[XX
])*gridDims
.invCellSize
[XX
]);
1288 int cy
= static_cast<int>((x
[i
][YY
] - gridDims
.lowerCorner
[YY
])*gridDims
.invCellSize
[YY
]);
1290 /* For non-home zones there could be particles outside
1291 * the non-bonded cut-off range, which have been communicated
1292 * for bonded interactions only. For the result it doesn't
1293 * matter where these end up on the grid. For performance
1294 * we put them in an extra row at the border.
1296 cx
= std::max(cx
, 0);
1297 cx
= std::min(cx
, gridDims
.numCells
[XX
] - 1);
1298 cy
= std::max(cy
, 0);
1299 cy
= std::min(cy
, gridDims
.numCells
[YY
] - 1);
1301 /* For the moment cell will contain only the, grid local,
1302 * x and y indices, not z.
1304 setCellAndAtomCount(cell
, cx
*gridDims
.numCells
[YY
] + cy
, cxy_na
, i
);
1309 /*! \brief Resizes grid and atom data which depend on the number of cells */
1310 static void resizeForNumberOfCells(const int numNbnxnAtoms
,
1311 const int numAtomsMoved
,
1312 GridSetData
*gridSetData
,
1313 nbnxn_atomdata_t
*nbat
)
1315 /* Note: gridSetData->cellIndices was already resized before */
1317 /* To avoid conditionals we store the moved particles at the end of a,
1318 * make sure we have enough space.
1320 gridSetData
->atomIndices
.resize(numNbnxnAtoms
+ numAtomsMoved
);
1322 /* Make space in nbat for storing the atom coordinates */
1323 nbat
->resizeCoordinateBuffer(numNbnxnAtoms
);
1326 void Grid::setCellIndices(int ddZone
,
1328 GridSetData
*gridSetData
,
1329 gmx::ArrayRef
<GridWork
> gridWork
,
1333 gmx::ArrayRef
<const gmx::RVec
> x
,
1334 const int numAtomsMoved
,
1335 nbnxn_atomdata_t
*nbat
)
1337 cellOffset_
= cellOffset
;
1339 srcAtomBegin_
= atomStart
;
1340 srcAtomEnd_
= atomEnd
;
1342 const int nthread
= gmx_omp_nthreads_get(emntPairsearch
);
1344 const int numAtomsPerCell
= geometry_
.numAtomsPerCell
;
1346 /* Make the cell index as a function of x and y */
1350 for (int i
= 0; i
< numColumns() + 1; i
++)
1352 /* We set ncz_max at the beginning of the loop iso at the end
1353 * to skip i=grid->ncx*grid->numCells[YY] which are moved particles
1354 * that do not need to be ordered on the grid.
1360 int cxy_na_i
= gridWork
[0].numAtomsPerColumn
[i
];
1361 for (int thread
= 1; thread
< nthread
; thread
++)
1363 cxy_na_i
+= gridWork
[thread
].numAtomsPerColumn
[i
];
1365 ncz
= (cxy_na_i
+ numAtomsPerCell
- 1)/numAtomsPerCell
;
1366 if (nbat
->XFormat
== nbatX8
)
1368 /* Make the number of cell a multiple of 2 */
1369 ncz
= (ncz
+ 1) & ~1;
1371 cxy_ind_
[i
+1] = cxy_ind_
[i
] + ncz
;
1372 /* Clear cxy_na_, so we can reuse the array below */
1375 numCellsTotal_
= cxy_ind_
[numColumns()] - cxy_ind_
[0];
1376 numCellsColumnMax_
= ncz_max
;
1378 /* Resize grid and atom data which depend on the number of cells */
1379 resizeForNumberOfCells(atomIndexEnd(), numAtomsMoved
, gridSetData
, nbat
);
1383 fprintf(debug
, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
1384 numAtomsPerCell
, geometry_
.numAtomsICluster
, numCellsTotal_
,
1385 dimensions_
.numCells
[XX
], dimensions_
.numCells
[YY
],
1386 numCellsTotal_
/(static_cast<double>(numColumns())),
1391 for (int cy
= 0; cy
< dimensions_
.numCells
[YY
]; cy
++)
1393 for (int cx
= 0; cx
< dimensions_
.numCells
[XX
]; cx
++)
1395 fprintf(debug
, " %2d", cxy_ind_
[i
+ 1] - cxy_ind_
[i
]);
1398 fprintf(debug
, "\n");
1403 /* Make sure the work array for sorting is large enough */
1404 const int worstCaseSortBufferSize
= ncz_max
*numAtomsPerCell
*c_sortGridMaxSizeFactor
;
1405 if (worstCaseSortBufferSize
> gmx::index(gridWork
[0].sortBuffer
.size()))
1407 for (GridWork
&work
: gridWork
)
1409 /* Elements not in use should be -1 */
1410 work
.sortBuffer
.resize(worstCaseSortBufferSize
, -1);
1414 /* Now we know the dimensions we can fill the grid.
1415 * This is the first, unsorted fill. We sort the columns after this.
1417 gmx::ArrayRef
<int> cells
= gridSetData
->cells
;
1418 gmx::ArrayRef
<int> atomIndices
= gridSetData
->atomIndices
;
1419 for (int i
= atomStart
; i
< atomEnd
; i
++)
1421 /* At this point nbs->cell contains the local grid x,y indices */
1422 const int cxy
= cells
[i
];
1423 atomIndices
[firstAtomInColumn(cxy
) + cxy_na_
[cxy
]++] = i
;
1428 /* Set the cell indices for the moved particles */
1429 int n0
= numCellsTotal_
*numAtomsPerCell
;
1430 int n1
= numCellsTotal_
*numAtomsPerCell
+ cxy_na_
[numColumns()];
1431 for (int i
= n0
; i
< n1
; i
++)
1433 cells
[atomIndices
[i
]] = i
;
1437 /* Sort the super-cell columns along z into the sub-cells. */
1438 #pragma omp parallel for num_threads(nthread) schedule(static)
1439 for (int thread
= 0; thread
< nthread
; thread
++)
1443 int columnStart
= ((thread
+ 0)*numColumns())/nthread
;
1444 int columnEnd
= ((thread
+ 1)*numColumns())/nthread
;
1445 if (geometry_
.isSimple
)
1447 sortColumnsCpuGeometry(gridSetData
, ddZone
,
1448 atomStart
, atomEnd
, atinfo
, x
, nbat
,
1449 columnStart
, columnEnd
,
1450 gridWork
[thread
].sortBuffer
);
1454 sortColumnsGpuGeometry(gridSetData
, ddZone
,
1455 atomStart
, atomEnd
, atinfo
, x
, nbat
,
1456 columnStart
, columnEnd
,
1457 gridWork
[thread
].sortBuffer
);
1460 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
1463 if (geometry_
.isSimple
&& nbat
->XFormat
== nbatX8
)
1465 combine_bounding_box_pairs(*this, bb_
, bbj_
);
1468 if (!geometry_
.isSimple
)
1470 numClustersTotal_
= 0;
1471 for (int i
= 0; i
< numCellsTotal_
; i
++)
1473 numClustersTotal_
+= numClusters_
[i
];
1479 if (geometry_
.isSimple
)
1481 print_bbsizes_simple(debug
, *this);
1485 fprintf(debug
, "ns non-zero sub-cells: %d average atoms %.2f\n",
1487 (atomEnd
- atomStart
)/static_cast<double>(numClustersTotal_
));
1489 print_bbsizes_supersub(debug
, *this);
1494 } // namespace Nbnxm