From 7b4d632dd09d0714f65ec48d7380084d65907c4b Mon Sep 17 00:00:00 2001 From: Teemu Murtola Date: Tue, 22 Aug 2017 22:12:04 +0300 Subject: [PATCH] Introduce self-pairs search in nbsearch Make it possible to search for all pairs within a single set of positions using AnalysisNeighborhood. This effectively excludes half of the pairs from the search, speeding things up. Not used yet anywhere, but this makes the code a better reference for performance comparisons, and for places where this is applicable it has potential for speeding things up quite a bit. Change-Id: Ib0e6f36460b8dbda97704447222c864c149d8e56 --- docs/doxygen/user/analysisnbsearch.md | 10 +-- src/gromacs/selection/nbsearch.cpp | 78 ++++++++++++++++--- src/gromacs/selection/nbsearch.h | 29 ++++++- src/gromacs/selection/tests/nbsearch.cpp | 127 ++++++++++++++++++++++++++++--- 4 files changed, 214 insertions(+), 30 deletions(-) diff --git a/docs/doxygen/user/analysisnbsearch.md b/docs/doxygen/user/analysisnbsearch.md index a8b2bf554a..3b46da5de8 100644 --- a/docs/doxygen/user/analysisnbsearch.md +++ b/docs/doxygen/user/analysisnbsearch.md @@ -21,6 +21,8 @@ performance improvement this allows). The main features that it provides: density and not limited by the cutoff. - Transparent fallback to a simple all-pairs search if the cutoff is too long for the algorithm or grid searching is not otherwise supported. + - Support for either N-vs-M pair search with two sets of coordinates, or for + all pairs within a single set of coordinates. - Support for computing all distances in the XY plane only (and still grid-based). - Convenience functions for finding the shortest distance or the nearest pair @@ -83,18 +85,14 @@ PBC information: falls back automatically to an all-pairs search. For correct operation, the grid algorithm needs three cells in each dimension, but the code can fall back to a non-gridded search for each dimension separately. - - If the resulting grid has so few cells that the search would anyways - consider all (or nearly all) cell pairs, the search falls back to a - simple search. - The initialization also pre-calculates the shifts required across the periodic boundaries for triclinic cells, i.e., the fractional number of cells that the grid origin is shifted when crossing the periodic boundary in Y or Z directions. - Finally, all the reference positions are mapped to the grid cells. -There are a few heuristic numbers in the above logic: the average number of -particles within a cell, and the cutover point from grid to an all-pairs -search. These have not been particularly optimized for best performance. +The average number of particles within a cell is somewhat heuristic in the +above logic. This has not been particularly optimized for best performance. When doing the search for test positions, each test position is considered independently: diff --git a/src/gromacs/selection/nbsearch.cpp b/src/gromacs/selection/nbsearch.cpp index cff2a39f85..55a5010ce4 100644 --- a/src/gromacs/selection/nbsearch.cpp +++ b/src/gromacs/selection/nbsearch.cpp @@ -189,6 +189,13 @@ class AnalysisNeighborhoodSearchImpl */ int getGridCellIndex(const ivec cell) const; /*! \brief + * Calculates linear index of a grid cell from fractional coordinates. + * + * \param[in] cell Cell indices (must be within the grid). + * \returns Linear index of \p cell. + */ + int getGridCellIndex(const rvec cell) const; + /*! \brief * Adds an index into a grid cell. * * \param[in] cell Fractional cell coordinates into which \p i should @@ -327,6 +334,7 @@ class AnalysisNeighborhoodPairSearchImpl explicit AnalysisNeighborhoodPairSearchImpl(const AnalysisNeighborhoodSearchImpl &search) : search_(search) { + selfSearchMode_ = false; testPosCount_ = 0; testPositions_ = nullptr; testExclusionIds_ = nullptr; @@ -342,6 +350,8 @@ class AnalysisNeighborhoodPairSearchImpl //! Initializes a search to find reference positions neighboring \p x. void startSearch(const AnalysisNeighborhoodPositions &positions); + //! Initializes a search to find reference position pairs. + void startSelfSearch(); //! Searches for the next neighbor. template bool searchNext(Action action); @@ -358,6 +368,8 @@ class AnalysisNeighborhoodPairSearchImpl //! Parent search object. const AnalysisNeighborhoodSearchImpl &search_; + //! Whether we are searching for ref-ref pairs. + bool selfSearchMode_; //! Number of test positions. int testPosCount_; //! Reference to the test positions. @@ -376,14 +388,16 @@ class AnalysisNeighborhoodPairSearchImpl rvec xtest_; //! Stores the previous returned position during a pair loop. int previ_; - //! Stores the pair distance corresponding to previ_; + //! Stores the pair distance corresponding to previ_. real prevr2_; - //! Stores the shortest distance vector corresponding to previ_; + //! Stores the shortest distance vector corresponding to previ_. rvec prevdx_; //! Stores the current exclusion index during loops. int exclind_; //! Stores the fractional test particle cell location during loops. rvec testcell_; + //! Stores the cell index corresponding to testcell_. + int testCellIndex_; //! Stores the current cell during pair loops. ivec currCell_; //! Stores the current loop upper bounds for each dimension during pair loops. @@ -685,7 +699,7 @@ int AnalysisNeighborhoodSearchImpl::getGridCellIndex(const ivec cell) const + cell[ZZ] * ncelldim_[XX] * ncelldim_[YY]; } -void AnalysisNeighborhoodSearchImpl::addToGridCell(const rvec cell, int i) +int AnalysisNeighborhoodSearchImpl::getGridCellIndex(const rvec cell) const { ivec icell; for (int dd = 0; dd < DIM; ++dd) @@ -705,7 +719,12 @@ void AnalysisNeighborhoodSearchImpl::addToGridCell(const rvec cell, int i) } icell[dd] = cellIndex; } - const int ci = getGridCellIndex(icell); + return getGridCellIndex(icell); +} + +void AnalysisNeighborhoodSearchImpl::addToGridCell(const rvec cell, int i) +{ + const int ci = getGridCellIndex(cell); cells_[ci].push_back(i); } @@ -951,7 +970,13 @@ void AnalysisNeighborhoodSearchImpl::init( void AnalysisNeighborhoodPairSearchImpl::reset(int testIndex) { - testIndex_ = testIndex; + testIndex_ = testIndex; + testCellIndex_ = -1; + previ_ = -1; + prevr2_ = 0.0; + clear_rvec(prevdx_); + exclind_ = 0; + prevcai_ = -1; if (testIndex_ >= 0 && testIndex_ < testPosCount_) { const int index = @@ -962,10 +987,18 @@ void AnalysisNeighborhoodPairSearchImpl::reset(int testIndex) search_.initCellRange(testcell_, currCell_, cellBound_, ZZ); search_.initCellRange(testcell_, currCell_, cellBound_, YY); search_.initCellRange(testcell_, currCell_, cellBound_, XX); + if (selfSearchMode_) + { + testCellIndex_ = search_.getGridCellIndex(testcell_); + } } else { copy_rvec(testPositions_[index], xtest_); + if (selfSearchMode_) + { + previ_ = testIndex_; + } } if (search_.excls_ != nullptr) { @@ -983,11 +1016,6 @@ void AnalysisNeighborhoodPairSearchImpl::reset(int testIndex) } } } - previ_ = -1; - prevr2_ = 0.0; - clear_rvec(prevdx_); - exclind_ = 0; - prevcai_ = -1; } void AnalysisNeighborhoodPairSearchImpl::nextTestPosition() @@ -1022,6 +1050,7 @@ bool AnalysisNeighborhoodPairSearchImpl::isExcluded(int j) void AnalysisNeighborhoodPairSearchImpl::startSearch( const AnalysisNeighborhoodPositions &positions) { + selfSearchMode_ = false; testPosCount_ = positions.count_; testPositions_ = positions.x_; testExclusionIds_ = positions.exclusionIds_; @@ -1041,6 +1070,18 @@ void AnalysisNeighborhoodPairSearchImpl::startSearch( } } +void AnalysisNeighborhoodPairSearchImpl::startSelfSearch() +{ + selfSearchMode_ = true; + testPosCount_ = search_.nref_; + testPositions_ = search_.xref_; + testExclusionIds_ = search_.refExclusionIds_; + testIndices_ = search_.refIndices_; + GMX_RELEASE_ASSERT(search_.excls_ == nullptr || testIndices_ == nullptr, + "Exclusion IDs not implemented with indexed ref positions"); + reset(0); +} + template bool AnalysisNeighborhoodPairSearchImpl::searchNext(Action action) { @@ -1054,10 +1095,18 @@ bool AnalysisNeighborhoodPairSearchImpl::searchNext(Action action) { rvec shift; const int ci = search_.shiftCell(currCell_, shift); + if (selfSearchMode_ && ci > testCellIndex_) + { + continue; + } const int cellSize = static_cast(search_.cells_[ci].size()); for (; cai < cellSize; ++cai) { const int i = search_.cells_[ci][cai]; + if (selfSearchMode_ && ci == testCellIndex_ && i >= testIndex_) + { + continue; + } if (isExcluded(i)) { continue; @@ -1379,6 +1428,15 @@ AnalysisNeighborhoodSearch::nearestPoint( } AnalysisNeighborhoodPairSearch +AnalysisNeighborhoodSearch::startSelfPairSearch() const +{ + GMX_RELEASE_ASSERT(impl_, "Accessing an invalid search object"); + Impl::PairSearchImplPointer pairSearch(impl_->getPairSearch()); + pairSearch->startSelfSearch(); + return AnalysisNeighborhoodPairSearch(pairSearch); +} + +AnalysisNeighborhoodPairSearch AnalysisNeighborhoodSearch::startPairSearch( const AnalysisNeighborhoodPositions &positions) const { diff --git a/src/gromacs/selection/nbsearch.h b/src/gromacs/selection/nbsearch.h index 8a4c3a29db..0561822a68 100644 --- a/src/gromacs/selection/nbsearch.h +++ b/src/gromacs/selection/nbsearch.h @@ -415,8 +415,8 @@ class AnalysisNeighborhoodPair * variable (see sm_distance.cpp). * * \todo - * Consider merging nearestPoint() and minimumDistance() by adding the distance - * to AnalysisNeighborhoodPair. + * Consider removing minimumDistance(), as nearestPoint() already returns the + * distance. * * \inpublicapi * \ingroup module_selection @@ -470,7 +470,7 @@ class AnalysisNeighborhoodSearch AnalysisNeighborhood::SearchMode mode() const; /*! \brief - * Check whether a point is within a neighborhood. + * Checks whether a point is within a neighborhood. * * \param[in] positions Set of test positions to use. * \returns true if any of the test positions is within the cutoff of @@ -500,12 +500,33 @@ class AnalysisNeighborhoodSearch nearestPoint(const AnalysisNeighborhoodPositions &positions) const; /*! \brief - * Start a search to find reference positions within a cutoff. + * Starts a search to find all reference position pairs within a cutoff. + * + * \returns Initialized search object to loop through all reference + * position pairs within the configured cutoff. + * \throws std::bad_alloc if out of memory. + * + * This works as if the reference positions were passed to + * startPairSearch(), except that it only returns each pair once, + * instead of returning both i-j and j-i pairs, as startPairSearch() + * does. i-i pairs are not returned. Note that the order of ref/test + * indices in the returned pairs is not predictable. That is, one of + * i-j or j-i is always returned, but there is no control which one. + */ + AnalysisNeighborhoodPairSearch + startSelfPairSearch() const; + + /*! \brief + * Starts a search to find reference positions within a cutoff. * * \param[in] positions Set of test positions to use. * \returns Initialized search object to loop through all reference * positions within the configured cutoff. * \throws std::bad_alloc if out of memory. + * + * If you want to pass the same positions here as you used for the + * reference positions, consider using startSelfPairSearch(). + * It can be up to 50% faster. */ AnalysisNeighborhoodPairSearch startPairSearch(const AnalysisNeighborhoodPositions &positions) const; diff --git a/src/gromacs/selection/tests/nbsearch.cpp b/src/gromacs/selection/tests/nbsearch.cpp index 3a5f875154..dae59c0ba0 100644 --- a/src/gromacs/selection/tests/nbsearch.cpp +++ b/src/gromacs/selection/tests/nbsearch.cpp @@ -153,6 +153,7 @@ class NeighborhoodSearchTestData std::vector generateIndex(int count, gmx_uint64_t seed) const; void generateRandomRefPositions(int count); void generateRandomTestPositions(int count); + void useRefPositionsAsTestPositions(); void computeReferences(t_pbc *pbc) { computeReferencesInternal(pbc, false); @@ -259,6 +260,15 @@ void NeighborhoodSearchTestData::generateRandomTestPositions(int count) } } +void NeighborhoodSearchTestData::useRefPositionsAsTestPositions() +{ + testPositions_.reserve(refPosCount_); + for (const auto &refPos : refPos_) + { + addTestPosition(refPos); + } +} + void NeighborhoodSearchTestData::computeReferencesInternal(t_pbc *pbc, bool bXY) { real cutoff = cutoff_; @@ -292,7 +302,7 @@ void NeighborhoodSearchTestData::computeReferencesInternal(t_pbc *pbc, bool bXY) testPos.refMinDist = dist; testPos.refNearestPoint = j; } - if (dist <= cutoff) + if (dist > 0 && dist <= cutoff) { RefPair pair(j, dist); GMX_RELEASE_ASSERT(testPos.refPairs.empty() || testPos.refPairs.back() < pair, @@ -425,7 +435,8 @@ class NeighborhoodSearchTest : public ::testing::Test const gmx::AnalysisNeighborhoodPositions &pos, const t_blocka *excls, const gmx::ConstArrayRef &refIndices, - const gmx::ConstArrayRef &testIndices); + const gmx::ConstArrayRef &testIndices, + bool selfPairs); gmx::AnalysisNeighborhood nb_; }; @@ -519,7 +530,7 @@ void NeighborhoodSearchTest::testPairSearch( const NeighborhoodSearchTestData &data) { testPairSearchFull(search, data, data.testPositions(), nullptr, - gmx::EmptyArrayRef(), gmx::EmptyArrayRef()); + gmx::EmptyArrayRef(), gmx::EmptyArrayRef(), false); } void NeighborhoodSearchTest::testPairSearchIndexed( @@ -533,7 +544,7 @@ void NeighborhoodSearchTest::testPairSearchIndexed( nb->initSearch(&data.pbc_, data.refPositions().indexed(refIndices)); testPairSearchFull(&search, data, data.testPositions(), nullptr, - refIndices, testIndices); + refIndices, testIndices, false); } void NeighborhoodSearchTest::testPairSearchFull( @@ -542,12 +553,12 @@ void NeighborhoodSearchTest::testPairSearchFull( const gmx::AnalysisNeighborhoodPositions &pos, const t_blocka *excls, const gmx::ConstArrayRef &refIndices, - const gmx::ConstArrayRef &testIndices) + const gmx::ConstArrayRef &testIndices, + bool selfPairs) { std::map refPairs; // TODO: Some parts of this code do not work properly if pos does not // initially contain all the test positions. - gmx::AnalysisNeighborhoodPositions posCopy(pos); if (testIndices.empty()) { for (size_t i = 0; i < data.testPositions_.size(); ++i) @@ -561,10 +572,10 @@ void NeighborhoodSearchTest::testPairSearchFull( { refPairs[index] = data.testPositions_[index].refPairs; } - posCopy.indexed(testIndices); } if (excls != nullptr) { + GMX_RELEASE_ASSERT(!selfPairs, "Self-pairs testing not implemented with exclusions"); for (auto &entry : refPairs) { const int testIndex = entry.first; @@ -573,6 +584,7 @@ void NeighborhoodSearchTest::testPairSearchFull( } if (!refIndices.empty()) { + GMX_RELEASE_ASSERT(!selfPairs, "Self-pairs testing not implemented with indexing"); for (auto &entry : refPairs) { for (auto &refPair : entry.second) @@ -598,8 +610,15 @@ void NeighborhoodSearchTest::testPairSearchFull( } } + gmx::AnalysisNeighborhoodPositions posCopy(pos); + if (!testIndices.empty()) + { + posCopy.indexed(testIndices); + } gmx::AnalysisNeighborhoodPairSearch pairSearch - = search->startPairSearch(posCopy); + = selfPairs + ? search->startSelfPairSearch() + : search->startPairSearch(posCopy); gmx::AnalysisNeighborhoodPair pair; while (pairSearch.findNextPair(&pair)) { @@ -655,6 +674,16 @@ void NeighborhoodSearchTest::testPairSearchFull( EXPECT_REAL_EQ_TOL(foundRefPair->distance, searchPair.distance, data.relativeTolerance()) << "Distance computed by the neighborhood search does not match."; + if (selfPairs) + { + searchPair = NeighborhoodSearchTestData::RefPair(testIndex, 0.0); + const auto otherRefPair + = std::lower_bound(refPairs[refIndex].begin(), refPairs[refIndex].end(), + searchPair); + GMX_RELEASE_ASSERT(otherRefPair != refPairs[refIndex].end(), + "Precomputed reference data is not symmetric"); + otherRefPair->bFound = true; + } } } @@ -696,6 +725,30 @@ class TrivialTestData NeighborhoodSearchTestData data_; }; +class TrivialSelfPairsTestData +{ + public: + static const NeighborhoodSearchTestData &get() + { + static TrivialSelfPairsTestData singleton; + return singleton.data_; + } + + TrivialSelfPairsTestData() : data_(12345, 1.0) + { + data_.box_[XX][XX] = 3.0; + data_.box_[YY][YY] = 3.0; + data_.box_[ZZ][ZZ] = 3.0; + data_.generateRandomRefPositions(20); + data_.useRefPositionsAsTestPositions(); + set_pbc(&data_.pbc_, epbcXYZ, data_.box_); + data_.computeReferences(&data_.pbc_); + } + + private: + NeighborhoodSearchTestData data_; +}; + class TrivialNoPBCTestData { public: @@ -742,6 +795,30 @@ class RandomBoxFullPBCData NeighborhoodSearchTestData data_; }; +class RandomBoxSelfPairsData +{ + public: + static const NeighborhoodSearchTestData &get() + { + static RandomBoxSelfPairsData singleton; + return singleton.data_; + } + + RandomBoxSelfPairsData() : data_(12345, 1.0) + { + data_.box_[XX][XX] = 10.0; + data_.box_[YY][YY] = 5.0; + data_.box_[ZZ][ZZ] = 7.0; + data_.generateRandomRefPositions(1000); + data_.useRefPositionsAsTestPositions(); + set_pbc(&data_.pbc_, epbcXYZ, data_.box_); + data_.computeReferences(&data_.pbc_); + } + + private: + NeighborhoodSearchTestData data_; +}; + class RandomBoxXYFullPBCData { public: @@ -967,6 +1044,34 @@ TEST_F(NeighborhoodSearchTest, GridSearchXYBox) testPairSearch(&search, data); } +TEST_F(NeighborhoodSearchTest, SimpleSelfPairsSearch) +{ + const NeighborhoodSearchTestData &data = TrivialSelfPairsTestData::get(); + + nb_.setCutoff(data.cutoff_); + nb_.setMode(gmx::AnalysisNeighborhood::eSearchMode_Simple); + gmx::AnalysisNeighborhoodSearch search = + nb_.initSearch(&data.pbc_, data.refPositions()); + ASSERT_EQ(gmx::AnalysisNeighborhood::eSearchMode_Simple, search.mode()); + + testPairSearchFull(&search, data, data.testPositions(), nullptr, + gmx::EmptyArrayRef(), gmx::EmptyArrayRef(), true); +} + +TEST_F(NeighborhoodSearchTest, GridSelfPairsSearch) +{ + const NeighborhoodSearchTestData &data = RandomBoxSelfPairsData::get(); + + nb_.setCutoff(data.cutoff_); + nb_.setMode(gmx::AnalysisNeighborhood::eSearchMode_Grid); + gmx::AnalysisNeighborhoodSearch search = + nb_.initSearch(&data.pbc_, data.refPositions()); + ASSERT_EQ(gmx::AnalysisNeighborhood::eSearchMode_Grid, search.mode()); + + testPairSearchFull(&search, data, data.testPositions(), nullptr, + gmx::EmptyArrayRef(), gmx::EmptyArrayRef(), true); +} + TEST_F(NeighborhoodSearchTest, HandlesConcurrentSearches) { const NeighborhoodSearchTestData &data = TrivialTestData::get(); @@ -1081,7 +1186,8 @@ TEST_F(NeighborhoodSearchTest, SimpleSearchExclusions) testPairSearchFull(&search, data, data.testPositions().exclusionIds(helper.testPosIds()), - helper.exclusions(), gmx::EmptyArrayRef(), gmx::EmptyArrayRef()); + helper.exclusions(), gmx::EmptyArrayRef(), + gmx::EmptyArrayRef(), false); } TEST_F(NeighborhoodSearchTest, GridSearchExclusions) @@ -1101,7 +1207,8 @@ TEST_F(NeighborhoodSearchTest, GridSearchExclusions) testPairSearchFull(&search, data, data.testPositions().exclusionIds(helper.testPosIds()), - helper.exclusions(), gmx::EmptyArrayRef(), gmx::EmptyArrayRef()); + helper.exclusions(), gmx::EmptyArrayRef(), + gmx::EmptyArrayRef(), false); } } // namespace -- 2.11.4.GIT