Separate management of GPU contexts from modules
[gromacs.git] / src / gromacs / taskassignment / findallgputasks.cpp
bloba9c63e5e4892e78df28a72af890ac48c5e42a6f7
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
36 * \brief
37 * Defines routine for collecting all GPU tasks found on ranks of a node.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
42 #include "gmxpre.h"
44 #include "findallgputasks.h"
46 #include "config.h"
48 #include <numeric>
49 #include <vector>
51 #include "gromacs/mdtypes/commrec.h"
52 #include "gromacs/utility/exceptions.h"
53 #include "gromacs/utility/gmxassert.h"
54 #include "gromacs/utility/gmxmpi.h"
56 namespace gmx
59 namespace
62 //! Constant used to help minimize preprocessing of code.
63 constexpr bool g_usingMpi = GMX_MPI;
65 //! Helper function to prepare to all-gather the vector of non-bonded tasks on this node.
66 static std::vector<int> allgather(const int &input,
67 int numRanks,
68 MPI_Comm communicator)
70 std::vector<int> result(numRanks);
71 if (g_usingMpi && numRanks > 1)
73 // TODO This works as an MPI_Allgather, but thread-MPI does
74 // not implement that. It's only intra-node communication, and
75 // happens rarely, so not worth optimizing (yet). Also
76 // thread-MPI segfaults with 1 rank.
77 #if GMX_MPI
78 int root = 0;
79 // Calling a C API with the const T * from data() doesn't seem
80 // to compile warning-free with all versions of MPI headers.
82 // TODO Make an allgather template to deal with this nonsense.
83 MPI_Gather(const_cast<int *>(&input),
85 MPI_INT,
86 const_cast<int *>(result.data()),
88 MPI_INT,
89 root,
90 communicator);
91 MPI_Bcast(const_cast<int *>(result.data()),
92 result.size(),
93 MPI_INT,
94 root,
95 communicator);
96 #else
97 GMX_UNUSED_VALUE(communicator);
98 #endif
100 else
102 result[0] = input;
105 return result;
108 //! Helper function to compute allgatherv displacements.
109 static std::vector<int> computeDisplacements(ArrayRef<const int> extentOnEachRank,
110 int numRanks)
112 std::vector<int> displacements(numRanks + 1);
113 displacements[0] = 0;
114 std::partial_sum(std::begin(extentOnEachRank), std::end(extentOnEachRank), std::begin(displacements) + 1);
115 return displacements;
118 //! Helper function to all-gather the vector of all GPU tasks on ranks of this node.
119 static std::vector<GpuTask> allgatherv(ArrayRef<const GpuTask> input,
120 ArrayRef<const int> extentOnEachRank,
121 ArrayRef<const int> displacementForEachRank,
122 MPI_Comm communicator)
124 // Now allocate the vector and do the allgatherv
125 int totalExtent = displacementForEachRank.back();
127 std::vector<GpuTask> result;
128 result.reserve(totalExtent);
129 if (g_usingMpi && extentOnEachRank.size() > 1 && totalExtent > 0)
131 result.resize(totalExtent);
132 // TODO This works as an MPI_Allgatherv, but thread-MPI does
133 // not implement that. It's only intra-node communication, and
134 // happens rarely, so not worth optimizing (yet). Also
135 // thread-MPI segfaults with 1 rank and with zero totalExtent.
136 #if GMX_MPI
137 int root = 0;
138 // Calling a C API with the const T * from data() doesn't seem to compile reliably.
139 // TODO Make an allgatherv template to deal with this nonsense.
140 MPI_Gatherv(const_cast<GpuTask *>(input.data()),
141 input.size(),
142 MPI_INT,
143 const_cast<GpuTask *>(result.data()),
144 const_cast<int *>(extentOnEachRank.data()),
145 const_cast<int *>(displacementForEachRank.data()),
146 MPI_INT,
147 root,
148 communicator);
149 MPI_Bcast(const_cast<GpuTask *>(result.data()),
150 result.size(),
151 MPI_INT,
152 root,
153 communicator);
154 #else
155 GMX_UNUSED_VALUE(communicator);
156 #endif
158 else
160 for (const auto &gpuTask : input)
162 result.push_back(gpuTask);
165 return result;
168 } // namespace
170 /*! \brief Returns container of all tasks on all ranks of this node
171 * that are eligible for GPU execution.
173 * Perform all necessary communication for preparing for task
174 * assignment. Separating this aspect makes it possible to unit test
175 * the logic of task assignment. */
176 GpuTasksOnRanks
177 findAllGpuTasksOnThisNode(ArrayRef<const GpuTask> gpuTasksOnThisRank,
178 int numRanksOnThisNode,
179 MPI_Comm communicator)
181 // Find out how many GPU tasks are on each rank on this node.
182 auto numGpuTasksOnEachRankOfThisNode =
183 allgather(gpuTasksOnThisRank.size(), numRanksOnThisNode, communicator);
185 /* Collect on each rank of this node a vector describing all
186 * GPU tasks on this node, in ascending order of rank. This
187 * requires a vector allgather. The displacements indicate where
188 * the GPU tasks on each rank of this node start and end within
189 * the vector. */
190 auto displacementsForEachRank = computeDisplacements(numGpuTasksOnEachRankOfThisNode, numRanksOnThisNode);
191 auto gpuTasksOnThisNode = allgatherv(gpuTasksOnThisRank, numGpuTasksOnEachRankOfThisNode,
192 displacementsForEachRank, communicator);
194 /* Next, we re-use the displacements to break up the vector
195 * of GPU tasks into something that can be indexed like
196 * gpuTasks[rankIndex][taskIndex]. */
197 GpuTasksOnRanks gpuTasksOnRanksOfThisNode;
198 // TODO This would be nicer if we had a good abstraction for "pair
199 // of iterators that point to adjacent container elements" or
200 // "iterator that points to the first of a pair of valid adjacent
201 // container elements, or end".
202 GMX_ASSERT(displacementsForEachRank.size() > 1, "Even with one rank, there's always both a start and end displacement");
203 auto currentDisplacementIt = displacementsForEachRank.begin();
204 auto nextDisplacementIt = currentDisplacementIt + 1;
207 gpuTasksOnRanksOfThisNode.emplace_back(std::vector<GpuTask>());
208 for (auto taskOnThisRankIndex = *currentDisplacementIt; taskOnThisRankIndex != *nextDisplacementIt; ++taskOnThisRankIndex)
210 gpuTasksOnRanksOfThisNode.back().push_back(gpuTasksOnThisNode[taskOnThisRankIndex]);
213 currentDisplacementIt = nextDisplacementIt;
214 ++nextDisplacementIt;
216 while (nextDisplacementIt != displacementsForEachRank.end());
218 return gpuTasksOnRanksOfThisNode;
221 } // namespace