From 6ca52dcf0bf0f39d03fc21531bd88e12b50ac4e6 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Wed, 9 Aug 2017 13:54:27 +0200 Subject: [PATCH] Clean up nbnxn cluster pair addition code Partial clean up of the nbnxn CPU makeClusterList functions. Clarified variable names and added more documentation. No functional changes, except for the removal of setting the start of the column range the diagonal element, which lead to more checks but did not affect the resulting cell range. This is prepration for a bug fix. Change-Id: Ib0a32087d205a23ebef85394d226f084ba515c24 --- src/gromacs/mdlib/nbnxn_search.cpp | 299 ++++++++++++++++------------- src/gromacs/mdlib/nbnxn_search_simd_2xnn.h | 102 +++++----- src/gromacs/mdlib/nbnxn_search_simd_4xn.h | 101 +++++----- 3 files changed, 275 insertions(+), 227 deletions(-) diff --git a/src/gromacs/mdlib/nbnxn_search.cpp b/src/gromacs/mdlib/nbnxn_search.cpp index 739a19432d..56eb5e1150 100644 --- a/src/gromacs/mdlib/nbnxn_search.cpp +++ b/src/gromacs/mdlib/nbnxn_search.cpp @@ -78,7 +78,7 @@ using namespace gmx; // TODO: Remove when this file is moved into gmx namespace * This leads to more conditionals than shifting forward. * We do this to get more balanced pair lists. */ -static const bool pbc_shift_backward = true; +constexpr bool c_pbcShiftBackward = true; static void nbs_cycle_clear(nbnxn_cycle_t *cc) @@ -566,12 +566,12 @@ static void subc_bb_dist2_simd4_xxxx(const float *bb_j, #endif /* NBNXN_SEARCH_BB_SIMD4 */ -/* Returns if any atom pair from two clusters is within distance sqrt(rl2) */ +/* Returns if any atom pair from two clusters is within distance sqrt(rlist2) */ static gmx_inline gmx_bool clusterpair_in_range(const nbnxn_list_work_t *work, int si, int csj, int stride, const real *x_j, - real rl2) + real rlist2) { #if !GMX_SIMD4_HAVE_REAL @@ -590,7 +590,7 @@ clusterpair_in_range(const nbnxn_list_work_t *work, real d2 = gmx::square(x_i[i0 ] - x_j[j0 ]) + gmx::square(x_i[i0+1] - x_j[j0+1]) + gmx::square(x_i[i0+2] - x_j[j0+2]); - if (d2 < rl2) + if (d2 < rlist2) { return TRUE; } @@ -608,7 +608,7 @@ clusterpair_in_range(const nbnxn_list_work_t *work, */ assert(c_nbnxnGpuClusterSize == 8); - Simd4Real rc2_S = Simd4Real(rl2); + Simd4Real rc2_S = Simd4Real(rlist2); const real *x_i = work->x_ci_simd; @@ -722,13 +722,13 @@ static void check_excl_space(nbnxn_pairlist_t *nbl, int extra) } } -/* Ensures there is enough space for ncell extra j-cells in the list */ +/* Ensures there is enough space for maxNumExtraClusters extra j-clusters in the list */ static void check_cell_list_space_simple(nbnxn_pairlist_t *nbl, - int ncell) + int maxNumExtraClusters) { int cj_max; - cj_max = nbl->ncj + ncell; + cj_max = nbl->ncj + maxNumExtraClusters; if (cj_max > nbl->cj_nalloc) { @@ -1166,32 +1166,41 @@ static unsigned int get_imask_simd_j8(gmx_bool rdiag, int ci, int cj) #endif #endif -/* Plain C code for making a pair list of cell ci vs cell cjf-cjl. - * Checks bounding box distances and possibly atom pair distances. +/* Plain C code for checking and adding cluster-pairs to the list. + * + * \param[in] gridj The j-grid + * \param[in,out] nbl The pair-list to store the cluster pairs in + * \param[in] icluster The index of the i-cluster + * \param[in] jclusterFirst The first cluster in the j-range + * \param[in] jclusterLast The last cluster in the j-range + * \param[in] excludeSubDiagonal Exclude atom pairs with i-index > j-index + * \param[in] x_j Coordinates for the j-atom, in xyz format + * \param[in] rlist2 The squared list cut-off + * \param[in] rbb2 The squared cut-off for putting cluster-pairs in the list based on bounding box distance only + * \param[in,out] numDistanceChecks The number of distance checks performed */ -static void make_cluster_list_simple(const nbnxn_grid_t *gridj, - nbnxn_pairlist_t *nbl, - int ci, int cjf, int cjl, - gmx_bool remove_sub_diag, - const real *x_j, - real rl2, float rbb2, - int *ndistc) +static void +makeClusterListSimple(const nbnxn_grid_t * gridj, + nbnxn_pairlist_t * nbl, + int icluster, + int jclusterFirst, + int jclusterLast, + bool excludeSubDiagonal, + const real * gmx_restrict x_j, + real rlist2, + float rbb2, + int * gmx_restrict numDistanceChecks) { - const nbnxn_bb_t *bb_ci; - const real *x_ci; - - gmx_bool InRange; - real d2; - int cjf_gl, cjl_gl; + const nbnxn_bb_t * gmx_restrict bb_ci = nbl->work->bb_ci; + const real * gmx_restrict x_ci = nbl->work->x_ci; - bb_ci = nbl->work->bb_ci; - x_ci = nbl->work->x_ci; + gmx_bool InRange; InRange = FALSE; - while (!InRange && cjf <= cjl) + while (!InRange && jclusterFirst <= jclusterLast) { - d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bb); - *ndistc += 2; + real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, gridj->bb); + *numDistanceChecks += 2; /* Check if the distance is within the distance where * we use only the bounding box distance rbb, @@ -1202,9 +1211,9 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, { InRange = TRUE; } - else if (d2 < rl2) + else if (d2 < rlist2) { - cjf_gl = gridj->cell0 + cjf; + int cjf_gl = gridj->cell0 + jclusterFirst; for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++) { for (int j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++) @@ -1212,14 +1221,14 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, InRange = InRange || (gmx::square(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) + gmx::square(x_ci[i*STRIDE_XYZ+YY] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+YY]) + - gmx::square(x_ci[i*STRIDE_XYZ+ZZ] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+ZZ]) < rl2); + gmx::square(x_ci[i*STRIDE_XYZ+ZZ] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+ZZ]) < rlist2); } } - *ndistc += NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE; + *numDistanceChecks += NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE; } if (!InRange) { - cjf++; + jclusterFirst++; } } if (!InRange) @@ -1228,10 +1237,10 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, } InRange = FALSE; - while (!InRange && cjl > cjf) + while (!InRange && jclusterLast > jclusterFirst) { - d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bb); - *ndistc += 2; + real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, gridj->bb); + *numDistanceChecks += 2; /* Check if the distance is within the distance where * we use only the bounding box distance rbb, @@ -1242,9 +1251,9 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, { InRange = TRUE; } - else if (d2 < rl2) + else if (d2 < rlist2) { - cjl_gl = gridj->cell0 + cjl; + int cjl_gl = gridj->cell0 + jclusterLast; for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++) { for (int j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++) @@ -1252,24 +1261,24 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, InRange = InRange || (gmx::square(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) + gmx::square(x_ci[i*STRIDE_XYZ+YY] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+YY]) + - gmx::square(x_ci[i*STRIDE_XYZ+ZZ] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+ZZ]) < rl2); + gmx::square(x_ci[i*STRIDE_XYZ+ZZ] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+ZZ]) < rlist2); } } - *ndistc += NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE; + *numDistanceChecks += NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE; } if (!InRange) { - cjl--; + jclusterLast--; } } - if (cjf <= cjl) + if (jclusterFirst <= jclusterLast) { - for (int cj = cjf; cj <= cjl; cj++) + for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++) { /* Store cj and the interaction mask */ - nbl->cj[nbl->ncj].cj = gridj->cell0 + cj; - nbl->cj[nbl->ncj].excl = get_imask(remove_sub_diag, ci, cj); + nbl->cj[nbl->ncj].cj = gridj->cell0 + jcluster; + nbl->cj[nbl->ncj].excl = get_imask(excludeSubDiagonal, icluster, jcluster); nbl->ncj++; } /* Increase the closing index in i super-cell list */ @@ -1293,8 +1302,8 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, int sci, int scj, gmx_bool sci_equals_scj, int stride, const real *x, - real rl2, float rbb2, - int *ndistc) + real rlist2, float rbb2, + int *numDistanceChecks) { nbnxn_list_work_t *work = nbl->work; @@ -1348,7 +1357,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, /* Determine all ci1 bb distances in one call with SIMD4 */ subc_bb_dist2_simd4_xxxx(gridj->pbb+(cj>>STRIDE_PBB_2LOG)*NNBSBB_XXXX+(cj & (STRIDE_PBB-1)), ci1, pbb_ci, d2l); - *ndistc += c_nbnxnGpuClusterSize*2; + *numDistanceChecks += c_nbnxnGpuClusterSize*2; #endif int npair = 0; @@ -1363,8 +1372,8 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, #if !NBNXN_BBXXXX /* Determine the bb distance between ci and cj */ - d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, gridj->bb); - *ndistc += 2; + d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, gridj->bb); + *numDistanceChecks += 2; #endif float d2 = d2l[ci]; @@ -1374,15 +1383,15 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, * or within the cut-off and there is at least one atom pair * within the cut-off. This check is very costly. */ - *ndistc += c_nbnxnGpuClusterSize*c_nbnxnGpuClusterSize; + *numDistanceChecks += c_nbnxnGpuClusterSize*c_nbnxnGpuClusterSize; if (d2 < rbb2 || - (d2 < rl2 && - clusterpair_in_range(work, ci, cj_gl, stride, x, rl2))) + (d2 < rlist2 && + clusterpair_in_range(work, ci, cj_gl, stride, x, rlist2))) #else /* Check if the distance between the two bounding boxes * in within the pair-list cut-off. */ - if (d2 < rl2) + if (d2 < rlist2) #endif { /* Flag this i-subcell to be taken into account */ @@ -1401,7 +1410,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, * within the cut-off, so we could get rid of it. */ if (npair == 1 && d2l[ci_last] >= rbb2 && - !clusterpair_in_range(work, ci_last, cj_gl, stride, x, rl2)) + !clusterpair_in_range(work, ci_last, cj_gl, stride, x, rlist2)) { imask &= ~(1U << (cj_offset*c_gpuNumClusterPerCell + ci_last)); npair--; @@ -3166,7 +3175,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, { int na_cj_2log; matrix box; - real rl2, rl_fep2 = 0; + real rlist2, rl_fep2 = 0; float rbb2; int ci_b, ci, ci_x, ci_y, ci_xy, cj; ivec shp; @@ -3183,9 +3192,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, real bz1_frac; real d2cx, d2z, d2z_cx, d2z_cy, d2zx, d2zxy, d2xy; int cxf, cxl, cyf, cyf_x, cyl; - int c0, c1, cs, cf, cl; - int ndistc; - int ncpcheck; + int numDistanceChecks; int gridi_flag_shift = 0, gridj_flag_shift = 0; gmx_bitmask_t *gridj_flag = nullptr; int ncj_old_i, ncj_old_j; @@ -3216,7 +3223,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, copy_mat(nbs->box, box); - rl2 = nbl->rlist*nbl->rlist; + rlist2 = nbl->rlist*nbl->rlist; if (nbs->bFEP && !nbl->bSimple) { @@ -3255,7 +3262,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, else { if (d == XX && - box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < std::sqrt(rl2)) + box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < std::sqrt(rlist2)) { shp[d] = 2; } @@ -3310,8 +3317,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, gridi->nc, gridi->nc/(double)(gridi->ncx*gridi->ncy), ci_block); } - ndistc = 0; - ncpcheck = 0; + numDistanceChecks = 0; /* Initially ci_b and ci to 1 before where we want them to start, * as they will both be incremented in next_ci. @@ -3344,7 +3350,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, { d2cx = gmx::square(gridj->c0[XX] - bx1); - if (d2cx >= rl2) + if (d2cx >= rlist2) { continue; } @@ -3376,7 +3382,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, d2z_cx = d2z + d2cx; - if (d2z_cx >= rl2) + if (d2z_cx >= rlist2) { continue; } @@ -3405,7 +3411,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, get_cell_range(by0, by1, gridj->ncy, gridj->c0[YY], gridj->sy, gridj->inv_sy, - d2z_cx, rl2, + d2z_cx, rlist2, &cyf, &cyl); if (cyf > cyl) @@ -3427,7 +3433,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, { shift = XYZ2IS(tx, ty, tz); - if (pbc_shift_backward && gridi == gridj && shift > CENTRAL) + if (c_pbcShiftBackward && gridi == gridj && shift > CENTRAL) { continue; } @@ -3447,7 +3453,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, get_cell_range(bx0, bx1, gridj->ncx, gridj->c0[XX], gridj->sx, gridj->inv_sx, - d2z_cy, rl2, + d2z_cy, rlist2, &cxf, &cxl); if (cxf > cxl) @@ -3464,7 +3470,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, new_sci_entry(nbl, cell0_i+ci, shift); } - if ((!pbc_shift_backward || (shift == CENTRAL && + if ((!c_pbcShiftBackward || (shift == CENTRAL && gridi == gridj)) && cxf < ci_x) { @@ -3508,7 +3514,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, if (gridi == gridj && cx == 0 && - (!pbc_shift_backward || shift == CENTRAL) && + (!c_pbcShiftBackward || shift == CENTRAL) && cyf < ci_y) { /* Leave the pairs with i > j. @@ -3523,15 +3529,8 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, for (int cy = cyf_x; cy <= cyl; cy++) { - c0 = gridj->cxy_ind[cx*gridj->ncy+cy]; - c1 = gridj->cxy_ind[cx*gridj->ncy+cy+1]; - - if (pbc_shift_backward && - gridi == gridj && - shift == CENTRAL && c0 < ci) - { - c0 = ci; - } + const int columnStart = gridj->cxy_ind[cx*gridj->ncy + cy]; + const int columnEnd = gridj->cxy_ind[cx*gridj->ncy + cy + 1]; d2zxy = d2zx; if (gridj->c0[YY] + cy*gridj->sy > by1) @@ -3542,56 +3541,76 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, { d2zxy += gmx::square(gridj->c0[YY] + (cy+1)*gridj->sy - by0); } - if (c1 > c0 && d2zxy < rl2) + if (columnStart < columnEnd && d2zxy < rlist2) { - cs = c0 + static_cast(bz1_frac*(c1 - c0)); - if (cs >= c1) + /* To improve efficiency in the common case + * of a homogeneous particle distribution, + * we estimate the index of the middle cell + * in range (midCell). We search down and up + * starting from this index. + * + * Note that the bbcz_j array contains bounds + * for i-clusters, thus for clusters of 4 atoms. + * For the common case where the j-cluster size + * is 8, we could step with a stride of 2, + * but we do not do this because it would + * complicate this code even more. + */ + int midCell = columnStart + static_cast(bz1_frac*(columnEnd - columnStart)); + if (midCell >= columnEnd) { - cs = c1 - 1; + midCell = columnEnd - 1; } d2xy = d2zxy - d2z; /* Find the lowest cell that can possibly * be within range. + * Check if we hit the bottom of the grid, + * if the j-cell is below the i-cell and if so, + * if it is within range. */ - cf = cs; - while (cf > c0 && - (bbcz_j[cf*NNBSBB_D+1] >= bz0 || - d2xy + gmx::square(bbcz_j[cf*NNBSBB_D+1] - bz0) < rl2)) + int firstCell = midCell; + while (firstCell > columnStart && + (bbcz_j[firstCell*NNBSBB_D + 1] >= bz0 || + d2xy + gmx::square(bbcz_j[firstCell*NNBSBB_D + 1] - bz0) < rlist2)) { - cf--; + firstCell--; } /* Find the highest cell that can possibly * be within range. + * Check if we hit the top of the grid, + * if the j-cell is above the i-cell and if so, + * if it is within range. */ - cl = cs; - while (cl < c1-1 && - (bbcz_j[cl*NNBSBB_D] <= bz1 || - d2xy + gmx::square(bbcz_j[cl*NNBSBB_D] - bz1) < rl2)) + int lastCell = midCell; + while (lastCell < columnEnd - 1 && + (bbcz_j[lastCell*NNBSBB_D] <= bz1 || + d2xy + gmx::square(bbcz_j[lastCell*NNBSBB_D] - bz1) < rlist2)) { - cl++; + lastCell++; } -#ifdef NBNXN_REFCODE +#define NBNXN_REFCODE 0 +#if NBNXN_REFCODE { /* Simple reference code, for debugging, * overrides the more complex code above. */ - cf = c1; - cl = -1; - for (int k = c0; k < c1; k++) + firstCell = columnEnd; + lastCell = -1; + for (int k = columnStart; k < columnEnd; k++) { - if (box_dist2(bx0, bx1, by0, by1, bz0, bz1, bb+k) < rl2 && - k < cf) + if (d2xy + gmx::square(bbcz_j[k*NNBSBB_D + 1] - bz0) < rlist2 && + k < firstCell) { - cf = k; + firstCell = k; } - if (box_dist2(bx0, bx1, by0, by1, bz0, bz1, bb+k) < rl2 && - k > cl) + if (d2xy + gmx::square(bbcz_j[k*NNBSBB_D] - bz1) < rlist2 && + k > lastCell) { - cl = k; + lastCell = k; } } } @@ -3602,66 +3621,74 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, /* We want each atom/cell pair only once, * only use cj >= ci. */ - if (!pbc_shift_backward || shift == CENTRAL) + if (!c_pbcShiftBackward || shift == CENTRAL) { - cf = std::max(cf, ci); + firstCell = std::max(firstCell, ci); } } - if (cf <= cl) + if (firstCell <= lastCell) { + GMX_ASSERT(firstCell >= columnStart && lastCell < columnEnd, "The range should reside within the current grid column"); + /* For f buffer flags with simple lists */ ncj_old_j = nbl->ncj; + if (nbl->bSimple) + { + /* We have a maximum of 2 j-clusters + * per i-cluster sized cell. + */ + check_cell_list_space_simple(nbl, 2*(lastCell - firstCell + 1)); + } + else + { + check_cell_list_space_supersub(nbl, lastCell - firstCell + 1); + } + switch (nb_kernel_type) { case nbnxnk4x4_PlainC: - check_cell_list_space_simple(nbl, cl-cf+1); - - make_cluster_list_simple(gridj, - nbl, ci, cf, cl, - (gridi == gridj && shift == CENTRAL), - nbat->x, - rl2, rbb2, - &ndistc); + makeClusterListSimple(gridj, + nbl, ci, firstCell, lastCell, + (gridi == gridj && shift == CENTRAL), + nbat->x, + rlist2, rbb2, + &numDistanceChecks); break; #ifdef GMX_NBNXN_SIMD_4XN case nbnxnk4xN_SIMD_4xN: - check_cell_list_space_simple(nbl, ci_to_cj_simd_4xn(cl - cf) + 2); - make_cluster_list_simd_4xn(gridj, - nbl, ci, cf, cl, - (gridi == gridj && shift == CENTRAL), - nbat->x, - rl2, rbb2, - &ndistc); + makeClusterListSimd4xn(gridj, + nbl, ci, firstCell, lastCell, + (gridi == gridj && shift == CENTRAL), + nbat->x, + rlist2, rbb2, + &numDistanceChecks); break; #endif #ifdef GMX_NBNXN_SIMD_2XNN case nbnxnk4xN_SIMD_2xNN: - check_cell_list_space_simple(nbl, ci_to_cj_simd_2xnn(cl - cf) + 2); - make_cluster_list_simd_2xnn(gridj, - nbl, ci, cf, cl, - (gridi == gridj && shift == CENTRAL), - nbat->x, - rl2, rbb2, - &ndistc); + makeClusterListSimd2xnn(gridj, + nbl, ci, firstCell, lastCell, + (gridi == gridj && shift == CENTRAL), + nbat->x, + rlist2, rbb2, + &numDistanceChecks); break; #endif case nbnxnk8x8x8_PlainC: case nbnxnk8x8x8_GPU: - check_cell_list_space_supersub(nbl, cl-cf+1); - for (cj = cf; cj <= cl; cj++) + for (cj = firstCell; cj <= lastCell; cj++) { make_cluster_list_supersub(gridi, gridj, nbl, ci, cj, (gridi == gridj && shift == CENTRAL && ci == cj), nbat->xstride, nbat->x, - rl2, rbb2, - &ndistc); + rlist2, rbb2, + &numDistanceChecks); } break; } - ncpcheck += cl - cf + 1; if (bFBufferFlag && nbl->ncj > ncj_old_j) { @@ -3740,7 +3767,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, } } - work->ndistc = ndistc; + work->ndistc = numDistanceChecks; nbs_cycle_stop(&work->cc[enbsCCsearch]); @@ -3748,9 +3775,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, if (debug) { - fprintf(debug, "number of distance checks %d\n", ndistc); - fprintf(debug, "ncpcheck %s %d\n", gridi == gridj ? "local" : "non-local", - ncpcheck); + fprintf(debug, "number of distance checks %d\n", numDistanceChecks); if (nbl->bSimple) { diff --git a/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h b/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h index 44b7aa8702..b9045ad702 100644 --- a/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h +++ b/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -61,22 +61,36 @@ icell_set_x_simd_2xnn(int ci, store(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH, load1DualHsimd(x + ia + 2*STRIDE_S + 2) + SimdReal(shz) ); } -/* SIMD code for making a pair list of cell ci vs cell cjf-cjl - * for coordinates in packed format. +/* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format. + * * Checks bouding box distances and possibly atom pair distances. * This is an accelerated version of make_cluster_list_simple. + * + * \param[in] gridj The j-grid + * \param[in,out] nbl The pair-list to store the cluster pairs in + * \param[in] icluster The index of the i-cluster + * \param[in] firstCell The first cluster in the j-range, using i-cluster size indexing + * \param[in] lastCell The last cluster in the j-range, using i-cluster size indexing + * \param[in] excludeSubDiagonal Exclude atom pairs with i-index > j-index + * \param[in] x_j Coordinates for the j-atom, in SIMD packed format + * \param[in] rlist2 The squared list cut-off + * \param[in] rbb2 The squared cut-off for putting cluster-pairs in the list based on bounding box distance only + * \param[in,out] numDistanceChecks The number of distance checks performed */ static gmx_inline void -make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, - nbnxn_pairlist_t *nbl, - int ci, int cjf, int cjl, - gmx_bool remove_sub_diag, - const real *x_j, - real rl2, float rbb2, - int *ndistc) +makeClusterListSimd2xnn(const nbnxn_grid_t * gridj, + nbnxn_pairlist_t * nbl, + int icluster, + int firstCell, + int lastCell, + bool excludeSubDiagonal, + const real * gmx_restrict x_j, + real rlist2, + float rbb2, + int * gmx_restrict numDistanceChecks) { - const real *x_ci_simd; - const nbnxn_bb_t *bb_ci; + const real * gmx_restrict x_ci_simd = nbl->work->x_ci_simd; + const nbnxn_bb_t * gmx_restrict bb_ci = nbl->work->bb_ci; SimdReal jx_S, jy_S, jz_S; @@ -94,26 +108,22 @@ make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, gmx_bool InRange; float d2; - int xind_f, xind_l, cj; - - cjf = ci_to_cj_simd_2xnn(cjf); - cjl = ci_to_cj_simd_2xnn(cjl + 1) - 1; - - x_ci_simd = nbl->work->x_ci_simd; + int xind_f, xind_l; - bb_ci = nbl->work->bb_ci; + int jclusterFirst = ci_to_cj_simd_2xnn(firstCell); + int jclusterLast = ci_to_cj_simd_2xnn(lastCell + 1) - 1; - rc2_S = SimdReal(rl2); + rc2_S = SimdReal(rlist2); InRange = FALSE; - while (!InRange && cjf <= cjl) + while (!InRange && jclusterFirst <= jclusterLast) { #ifdef NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, cjf, gridj->bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, gridj->bbj); #else - d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, gridj->bbj); #endif - *ndistc += 2; + *numDistanceChecks += 2; /* Check if the distance is within the distance where * we use only the bounding box distance rbb, @@ -124,13 +134,13 @@ make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, { InRange = TRUE; } - else if (d2 < rl2) + else if (d2 < rlist2) { - xind_f = x_ind_cj_simd_2xnn(ci_to_cj_simd_2xnn(gridj->cell0) + cjf); + xind_f = x_ind_cj_simd_2xnn(ci_to_cj_simd_2xnn(gridj->cell0) + jclusterFirst); - jx_S = loadDuplicateHsimd(x_j+xind_f+0*STRIDE_S); - jy_S = loadDuplicateHsimd(x_j+xind_f+1*STRIDE_S); - jz_S = loadDuplicateHsimd(x_j+xind_f+2*STRIDE_S); + jx_S = loadDuplicateHsimd(x_j + xind_f + 0*STRIDE_S); + jy_S = loadDuplicateHsimd(x_j + xind_f + 1*STRIDE_S); + jz_S = loadDuplicateHsimd(x_j + xind_f + 2*STRIDE_S); /* Calculate distance */ dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S; @@ -151,11 +161,11 @@ make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, InRange = anyTrue(wco_any_S); - *ndistc += 2*GMX_SIMD_REAL_WIDTH; + *numDistanceChecks += 2*GMX_SIMD_REAL_WIDTH; } if (!InRange) { - cjf++; + jclusterFirst++; } } if (!InRange) @@ -164,14 +174,14 @@ make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, } InRange = FALSE; - while (!InRange && cjl > cjf) + while (!InRange && jclusterLast > jclusterFirst) { #ifdef NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, cjl, gridj->bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, gridj->bbj); #else - d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterLast, gridj->bbj); #endif - *ndistc += 2; + *numDistanceChecks += 2; /* Check if the distance is within the distance where * we use only the bounding box distance rbb, @@ -182,13 +192,13 @@ make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, { InRange = TRUE; } - else if (d2 < rl2) + else if (d2 < rlist2) { - xind_l = x_ind_cj_simd_2xnn(ci_to_cj_simd_2xnn(gridj->cell0) + cjl); + xind_l = x_ind_cj_simd_2xnn(ci_to_cj_simd_2xnn(gridj->cell0) + jclusterLast); - jx_S = loadDuplicateHsimd(x_j+xind_l+0*STRIDE_S); - jy_S = loadDuplicateHsimd(x_j+xind_l+1*STRIDE_S); - jz_S = loadDuplicateHsimd(x_j+xind_l+2*STRIDE_S); + jx_S = loadDuplicateHsimd(x_j + xind_l + 0*STRIDE_S); + jy_S = loadDuplicateHsimd(x_j + xind_l + 1*STRIDE_S); + jz_S = loadDuplicateHsimd(x_j + xind_l + 2*STRIDE_S); /* Calculate distance */ dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S; @@ -209,21 +219,21 @@ make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj, InRange = anyTrue(wco_any_S); - *ndistc += 2*GMX_SIMD_REAL_WIDTH; + *numDistanceChecks += 2*GMX_SIMD_REAL_WIDTH; } if (!InRange) { - cjl--; + jclusterLast--; } } - if (cjf <= cjl) + if (jclusterFirst <= jclusterLast) { - for (cj = cjf; cj <= cjl; cj++) + for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++) { /* Store cj and the interaction mask */ - nbl->cj[nbl->ncj].cj = ci_to_cj_simd_2xnn(gridj->cell0) + cj; - nbl->cj[nbl->ncj].excl = get_imask_simd_2xnn(remove_sub_diag, ci, cj); + nbl->cj[nbl->ncj].cj = ci_to_cj_simd_2xnn(gridj->cell0) + jcluster; + nbl->cj[nbl->ncj].excl = get_imask_simd_2xnn(excludeSubDiagonal, icluster, jcluster); nbl->ncj++; } /* Increase the closing index in i super-cell list */ diff --git a/src/gromacs/mdlib/nbnxn_search_simd_4xn.h b/src/gromacs/mdlib/nbnxn_search_simd_4xn.h index 6add781f7b..f69fdc3da0 100644 --- a/src/gromacs/mdlib/nbnxn_search_simd_4xn.h +++ b/src/gromacs/mdlib/nbnxn_search_simd_4xn.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -67,22 +67,36 @@ icell_set_x_simd_4xn(int ci, store(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*STRIDE_S + 3] + shz) ); } -/* SIMD code for making a pair list of cell ci vs cell cjf-cjl - * for coordinates in packed format. +/* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format. + * * Checks bouding box distances and possibly atom pair distances. * This is an accelerated version of make_cluster_list_simple. + * + * \param[in] gridj The j-grid + * \param[in,out] nbl The pair-list to store the cluster pairs in + * \param[in] icluster The index of the i-cluster + * \param[in] firstCell The first cluster in the j-range, using i-cluster size indexing + * \param[in] lastCell The last cluster in the j-range, using i-cluster size indexing + * \param[in] excludeSubDiagonal Exclude atom pairs with i-index > j-index + * \param[in] x_j Coordinates for the j-atom, in SIMD packed format + * \param[in] rlist2 The squared list cut-off + * \param[in] rbb2 The squared cut-off for putting cluster-pairs in the list based on bounding box distance only + * \param[in,out] numDistanceChecks The number of distance checks performed */ static gmx_inline void -make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, - nbnxn_pairlist_t *nbl, - int ci, int cjf, int cjl, - gmx_bool remove_sub_diag, - const real *x_j, - real rl2, float rbb2, - int *ndistc) +makeClusterListSimd4xn(const nbnxn_grid_t * gridj, + nbnxn_pairlist_t * nbl, + int icluster, + int firstCell, + int lastCell, + bool excludeSubDiagonal, + const real * gmx_restrict x_j, + real rlist2, + float rbb2, + int * gmx_restrict numDistanceChecks) { - const real *x_ci_simd = nbl->work->x_ci_simd; - const nbnxn_bb_t *bb_ci; + const real * gmx_restrict x_ci_simd = nbl->work->x_ci_simd; + const nbnxn_bb_t * gmx_restrict bb_ci = nbl->work->bb_ci; SimdReal jx_S, jy_S, jz_S; @@ -106,25 +120,24 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, gmx_bool InRange; float d2; - int xind_f, xind_l, cj; + int xind_f, xind_l; + /* Convert the j-range from i-cluster size indexing to j-cluster indexing */ /* cppcheck-suppress selfAssignment . selfAssignment for width 4.*/ - cjf = ci_to_cj_simd_4xn(cjf); - cjl = ci_to_cj_simd_4xn(cjl + 1) - 1; - - bb_ci = nbl->work->bb_ci; + int jclusterFirst = ci_to_cj_simd_4xn(firstCell); + int jclusterLast = ci_to_cj_simd_4xn(lastCell + 1) - 1; - rc2_S = SimdReal(rl2); + rc2_S = SimdReal(rlist2); InRange = FALSE; - while (!InRange && cjf <= cjl) + while (!InRange && jclusterFirst <= jclusterLast) { #ifdef NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, cjf, gridj->bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, gridj->bbj); #else - d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, gridj->bbj); #endif - *ndistc += 2; + *numDistanceChecks += 2; /* Check if the distance is within the distance where * we use only the bounding box distance rbb, @@ -135,13 +148,13 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, { InRange = TRUE; } - else if (d2 < rl2) + else if (d2 < rlist2) { - xind_f = x_ind_cj_simd_4xn(ci_to_cj_simd_4xn(gridj->cell0) + cjf); + xind_f = x_ind_cj_simd_4xn(ci_to_cj_simd_4xn(gridj->cell0) + jclusterFirst); - jx_S = load(x_j+xind_f+0*STRIDE_S); - jy_S = load(x_j+xind_f+1*STRIDE_S); - jz_S = load(x_j+xind_f+2*STRIDE_S); + jx_S = load(x_j + xind_f + 0*STRIDE_S); + jy_S = load(x_j + xind_f + 1*STRIDE_S); + jz_S = load(x_j + xind_f + 2*STRIDE_S); /* Calculate distance */ @@ -175,11 +188,11 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, InRange = anyTrue(wco_any_S); - *ndistc += 4*GMX_SIMD_REAL_WIDTH; + *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH; } if (!InRange) { - cjf++; + jclusterFirst++; } } if (!InRange) @@ -188,14 +201,14 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, } InRange = FALSE; - while (!InRange && cjl > cjf) + while (!InRange && jclusterLast > jclusterFirst) { #ifdef NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, cjl, gridj->bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, gridj->bbj); #else - d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterLast, gridj->bbj); #endif - *ndistc += 2; + *numDistanceChecks += 2; /* Check if the distance is within the distance where * we use only the bounding box distance rbb, @@ -206,13 +219,13 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, { InRange = TRUE; } - else if (d2 < rl2) + else if (d2 < rlist2) { - xind_l = x_ind_cj_simd_4xn(ci_to_cj_simd_4xn(gridj->cell0) + cjl); + xind_l = x_ind_cj_simd_4xn(ci_to_cj_simd_4xn(gridj->cell0) + jclusterLast); - jx_S = load(x_j+xind_l+0*STRIDE_S); - jy_S = load(x_j+xind_l+1*STRIDE_S); - jz_S = load(x_j+xind_l+2*STRIDE_S); + jx_S = load(x_j +xind_l + 0*STRIDE_S); + jy_S = load(x_j +xind_l + 1*STRIDE_S); + jz_S = load(x_j +xind_l + 2*STRIDE_S); /* Calculate distance */ dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S; @@ -245,21 +258,21 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj, InRange = anyTrue(wco_any_S); - *ndistc += 4*GMX_SIMD_REAL_WIDTH; + *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH; } if (!InRange) { - cjl--; + jclusterLast--; } } - if (cjf <= cjl) + if (jclusterFirst <= jclusterLast) { - for (cj = cjf; cj <= cjl; cj++) + for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++) { /* Store cj and the interaction mask */ - nbl->cj[nbl->ncj].cj = ci_to_cj_simd_4xn(gridj->cell0) + cj; - nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(remove_sub_diag, ci, cj); + nbl->cj[nbl->ncj].cj = ci_to_cj_simd_4xn(gridj->cell0) + jcluster; + nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster); nbl->ncj++; } /* Increase the closing index in i super-cell list */ -- 2.11.4.GIT