2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Defines routine for collecting all GPU tasks found on ranks of a node.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
44 #include "findallgputasks.h"
51 #include "gromacs/mdtypes/commrec.h"
52 #include "gromacs/utility/exceptions.h"
53 #include "gromacs/utility/gmxassert.h"
54 #include "gromacs/utility/gmxmpi.h"
62 //! Constant used to help minimize preprocessing of code.
63 constexpr bool g_usingMpi
= GMX_MPI
;
65 //! Helper function to prepare to all-gather the vector of non-bonded tasks on this node.
66 static std::vector
<int> allgather(const int &input
,
68 MPI_Comm communicator
)
70 std::vector
<int> result(numRanks
);
71 if (g_usingMpi
&& numRanks
> 1)
73 // TODO This works as an MPI_Allgather, but thread-MPI does
74 // not implement that. It's only intra-node communication, and
75 // happens rarely, so not worth optimizing (yet). Also
76 // thread-MPI segfaults with 1 rank.
79 // Calling a C API with the const T * from data() doesn't seem
80 // to compile warning-free with all versions of MPI headers.
82 // TODO Make an allgather template to deal with this nonsense.
83 MPI_Gather(const_cast<int *>(&input
),
86 const_cast<int *>(result
.data()),
91 MPI_Bcast(const_cast<int *>(result
.data()),
97 GMX_UNUSED_VALUE(communicator
);
108 //! Helper function to compute allgatherv displacements.
109 static std::vector
<int> computeDisplacements(ArrayRef
<const int> extentOnEachRank
,
112 std::vector
<int> displacements(numRanks
+ 1);
113 displacements
[0] = 0;
114 std::partial_sum(std::begin(extentOnEachRank
), std::end(extentOnEachRank
), std::begin(displacements
) + 1);
115 return displacements
;
118 //! Helper function to all-gather the vector of all GPU tasks on ranks of this node.
119 static std::vector
<GpuTask
> allgatherv(ArrayRef
<const GpuTask
> input
,
120 ArrayRef
<const int> extentOnEachRank
,
121 ArrayRef
<const int> displacementForEachRank
,
122 MPI_Comm communicator
)
124 // Now allocate the vector and do the allgatherv
125 int totalExtent
= displacementForEachRank
.back();
127 std::vector
<GpuTask
> result
;
128 result
.reserve(totalExtent
);
129 if (g_usingMpi
&& extentOnEachRank
.size() > 1 && totalExtent
> 0)
131 result
.resize(totalExtent
);
132 // TODO This works as an MPI_Allgatherv, but thread-MPI does
133 // not implement that. It's only intra-node communication, and
134 // happens rarely, so not worth optimizing (yet). Also
135 // thread-MPI segfaults with 1 rank and with zero totalExtent.
138 // Calling a C API with the const T * from data() doesn't seem to compile reliably.
139 // TODO Make an allgatherv template to deal with this nonsense.
140 MPI_Gatherv(const_cast<GpuTask
*>(input
.data()),
143 const_cast<GpuTask
*>(result
.data()),
144 const_cast<int *>(extentOnEachRank
.data()),
145 const_cast<int *>(displacementForEachRank
.data()),
149 MPI_Bcast(const_cast<GpuTask
*>(result
.data()),
155 GMX_UNUSED_VALUE(communicator
);
160 for (const auto &gpuTask
: input
)
162 result
.push_back(gpuTask
);
170 /*! \brief Returns container of all tasks on all ranks of this node
171 * that are eligible for GPU execution.
173 * Perform all necessary communication for preparing for task
174 * assignment. Separating this aspect makes it possible to unit test
175 * the logic of task assignment. */
177 findAllGpuTasksOnThisNode(ArrayRef
<const GpuTask
> gpuTasksOnThisRank
,
178 int numRanksOnThisNode
,
179 MPI_Comm communicator
)
181 // Find out how many GPU tasks are on each rank on this node.
182 auto numGpuTasksOnEachRankOfThisNode
=
183 allgather(gpuTasksOnThisRank
.size(), numRanksOnThisNode
, communicator
);
185 /* Collect on each rank of this node a vector describing all
186 * GPU tasks on this node, in ascending order of rank. This
187 * requires a vector allgather. The displacements indicate where
188 * the GPU tasks on each rank of this node start and end within
190 auto displacementsForEachRank
= computeDisplacements(numGpuTasksOnEachRankOfThisNode
, numRanksOnThisNode
);
191 auto gpuTasksOnThisNode
= allgatherv(gpuTasksOnThisRank
, numGpuTasksOnEachRankOfThisNode
,
192 displacementsForEachRank
, communicator
);
194 /* Next, we re-use the displacements to break up the vector
195 * of GPU tasks into something that can be indexed like
196 * gpuTasks[rankIndex][taskIndex]. */
197 GpuTasksOnRanks gpuTasksOnRanksOfThisNode
;
198 // TODO This would be nicer if we had a good abstraction for "pair
199 // of iterators that point to adjacent container elements" or
200 // "iterator that points to the first of a pair of valid adjacent
201 // container elements, or end".
202 GMX_ASSERT(displacementsForEachRank
.size() > 1, "Even with one rank, there's always both a start and end displacement");
203 auto currentDisplacementIt
= displacementsForEachRank
.begin();
204 auto nextDisplacementIt
= currentDisplacementIt
+ 1;
207 gpuTasksOnRanksOfThisNode
.emplace_back(std::vector
<GpuTask
>());
208 for (auto taskOnThisRankIndex
= *currentDisplacementIt
; taskOnThisRankIndex
!= *nextDisplacementIt
; ++taskOnThisRankIndex
)
210 gpuTasksOnRanksOfThisNode
.back().push_back(gpuTasksOnThisNode
[taskOnThisRankIndex
]);
213 currentDisplacementIt
= nextDisplacementIt
;
214 ++nextDisplacementIt
;
216 while (nextDisplacementIt
!= displacementsForEachRank
.end());
218 return gpuTasksOnRanksOfThisNode
;