Fix undefined behavior flagged by UBSAN
[gromacs.git] / src / gromacs / mdrunutility / threadaffinity.cpp
blob8cf377acbf160ab0ee2c720f86bea938eecc880b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 #include "gmxpre.h"
38 #include "threadaffinity.h"
40 #include "config.h"
42 #include <cerrno>
43 #include <cstdio>
44 #include <cstring>
46 #if HAVE_SCHED_AFFINITY
47 # include <sched.h>
48 #endif
50 #include "thread_mpi/threads.h"
52 #include "gromacs/hardware/hardwaretopology.h"
53 #include "gromacs/hardware/hw_info.h"
54 #include "gromacs/mdtypes/commrec.h"
55 #include "gromacs/utility/basenetwork.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/exceptions.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/gmxomp.h"
61 #include "gromacs/utility/logger.h"
62 #include "gromacs/utility/physicalnodecommunicator.h"
63 #include "gromacs/utility/programcontext.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/unique_cptr.h"
67 namespace
70 class DefaultThreadAffinityAccess : public gmx::IThreadAffinityAccess
72 public:
73 bool isThreadAffinitySupported() const override
75 return tMPI_Thread_setaffinity_support() == TMPI_SETAFFINITY_SUPPORT_YES;
77 bool setCurrentThreadAffinityToCore(int core) override
79 const int ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
80 return ret == 0;
84 //! Global instance of DefaultThreadAffinityAccess
85 DefaultThreadAffinityAccess g_defaultAffinityAccess;
87 } // namespace
89 gmx::IThreadAffinityAccess::~IThreadAffinityAccess() {}
91 static bool invalidWithinSimulation(const t_commrec* cr, bool invalidLocally)
93 #if GMX_MPI
94 if (cr->nnodes > 1)
96 int value = invalidLocally ? 1 : 0;
97 int globalValue;
98 MPI_Reduce(&value, &globalValue, 1, MPI_INT, MPI_LOR, MASTERRANK(cr), cr->mpi_comm_mysim);
99 return SIMMASTER(cr) ? (globalValue != 0) : invalidLocally;
101 #else
102 GMX_UNUSED_VALUE(cr);
103 #endif
104 return invalidLocally;
107 static bool get_thread_affinity_layout(const gmx::MDLogger& mdlog,
108 const t_commrec* cr,
109 const gmx::HardwareTopology& hwTop,
110 int threads,
111 bool affinityIsAutoAndNumThreadsIsNotAuto,
112 int pin_offset,
113 int* pin_stride,
114 int** localityOrder,
115 bool* issuedWarning)
117 int hwThreads;
118 int hwThreadsPerCore = 0;
119 bool bPickPinStride;
120 bool haveTopology;
121 bool invalidValue;
123 haveTopology = (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic);
125 if (pin_offset < 0)
127 gmx_fatal(FARGS, "Negative thread pinning offset requested");
129 if (*pin_stride < 0)
131 gmx_fatal(FARGS, "Negative thread pinning stride requested");
134 if (haveTopology)
136 hwThreads = hwTop.machine().logicalProcessorCount;
137 // Just use the value for the first core
138 hwThreadsPerCore = hwTop.machine().sockets[0].cores[0].hwThreads.size();
139 snew(*localityOrder, hwThreads);
140 int i = 0;
141 for (auto& s : hwTop.machine().sockets)
143 for (auto& c : s.cores)
145 for (auto& t : c.hwThreads)
147 (*localityOrder)[i++] = t.logicalProcessorId;
152 else
154 /* topology information not available or invalid, ignore it */
155 hwThreads = hwTop.machine().logicalProcessorCount;
156 *localityOrder = nullptr;
158 // Only warn about the first problem per node. Otherwise, the first test
159 // failing would essentially always cause also the other problems get
160 // reported, leading to bogus warnings. The order in the conditionals
161 // with this variable is important, since the MPI_Reduce() in
162 // invalidWithinSimulation() needs to always happen.
163 bool alreadyWarned = false;
164 invalidValue = (hwThreads <= 0);
165 if (invalidWithinSimulation(cr, invalidValue))
167 /* We don't know anything about the hardware, don't pin */
168 GMX_LOG(mdlog.warning)
169 .asParagraph()
170 .appendText("NOTE: No information on available cores, thread pinning disabled.");
171 alreadyWarned = true;
173 bool validLayout = !invalidValue;
175 if (affinityIsAutoAndNumThreadsIsNotAuto)
177 invalidValue = (threads != hwThreads);
178 bool warn = (invalidValue && threads > 1 && threads < hwThreads);
179 if (invalidWithinSimulation(cr, warn) && !alreadyWarned)
181 GMX_LOG(mdlog.warning)
182 .asParagraph()
183 .appendText(
184 "NOTE: The number of threads is not equal to the number of (logical) "
185 "cores\n"
186 " and the -pin option is set to auto: will not pin threads to "
187 "cores.\n"
188 " This can lead to significant performance degradation.\n"
189 " Consider using -pin on (and -pinoffset in case you run multiple "
190 "jobs).");
191 alreadyWarned = true;
193 validLayout = validLayout && !invalidValue;
196 invalidValue = (threads > hwThreads);
197 if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
199 GMX_LOG(mdlog.warning)
200 .asParagraph()
201 .appendText("NOTE: Oversubscribing the CPU, will not pin threads");
202 alreadyWarned = true;
204 validLayout = validLayout && !invalidValue;
206 invalidValue = (pin_offset + threads > hwThreads);
207 if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
209 GMX_LOG(mdlog.warning)
210 .asParagraph()
211 .appendText(
212 "WARNING: Requested offset too large for available cores, thread pinning "
213 "disabled.");
214 alreadyWarned = true;
216 validLayout = validLayout && !invalidValue;
218 invalidValue = false;
219 /* do we need to choose the pinning stride? */
220 bPickPinStride = (*pin_stride == 0);
222 if (bPickPinStride)
224 if (haveTopology && pin_offset + threads * hwThreadsPerCore <= hwThreads)
226 /* Put one thread on each physical core */
227 *pin_stride = hwThreadsPerCore;
229 else
231 /* We don't know if we have SMT, and if we do, we don't know
232 * if hw threads in the same physical core are consecutive.
233 * Without SMT the pinning layout should not matter too much.
234 * so we assume a consecutive layout and maximally spread out"
235 * the threads at equal threads per core.
236 * Note that IBM is the major non-x86 case with cpuid support
237 * and probably threads are already pinned by the queuing system,
238 * so we wouldn't end up here in the first place.
240 *pin_stride = (hwThreads - pin_offset) / threads;
243 else
245 /* Check the placement of the thread with the largest index to make sure
246 * that the offset & stride doesn't cause pinning beyond the last hardware thread. */
247 invalidValue = (pin_offset + (threads - 1) * (*pin_stride) >= hwThreads);
249 if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
251 /* We are oversubscribing, don't pin */
252 GMX_LOG(mdlog.warning)
253 .asParagraph()
254 .appendText(
255 "WARNING: Requested stride too large for available cores, thread pinning "
256 "disabled.");
257 alreadyWarned = true;
259 validLayout = validLayout && !invalidValue;
261 if (validLayout)
263 GMX_LOG(mdlog.info)
264 .appendTextFormatted("Pinning threads with a%s logical core stride of %d",
265 bPickPinStride ? "n auto-selected" : " user-specified", *pin_stride);
268 *issuedWarning = alreadyWarned;
270 return validLayout;
273 static bool set_affinity(const t_commrec* cr,
274 int nthread_local,
275 int intraNodeThreadOffset,
276 int offset,
277 int core_pinning_stride,
278 const int* localityOrder,
279 gmx::IThreadAffinityAccess* affinityAccess)
281 // Set the per-thread affinity. In order to be able to check the success
282 // of affinity settings, we will set nth_affinity_set to 1 on threads
283 // where the affinity setting succeded and to 0 where it failed.
284 // Reducing these 0/1 values over the threads will give the total number
285 // of threads on which we succeeded.
287 // To avoid warnings from the static analyzer we initialize nth_affinity_set
288 // to zero outside the OpenMP block, and then add to it inside the block.
289 // The value will still always be 0 or 1 from each thread.
290 int nth_affinity_set = 0;
291 #pragma omp parallel num_threads(nthread_local) reduction(+ : nth_affinity_set)
295 int thread_id, thread_id_node;
296 int index, core;
298 thread_id = gmx_omp_get_thread_num();
299 thread_id_node = intraNodeThreadOffset + thread_id;
300 index = offset + thread_id_node * core_pinning_stride;
301 if (localityOrder != nullptr)
303 core = localityOrder[index];
305 else
307 core = index;
310 const bool ret = affinityAccess->setCurrentThreadAffinityToCore(core);
312 /* store the per-thread success-values of the setaffinity */
313 nth_affinity_set += (ret ? 1 : 0);
315 if (debug)
317 fprintf(debug,
318 "On rank %2d, thread %2d, index %2d, core %2d the affinity setting "
319 "returned %d\n",
320 cr->nodeid, gmx_omp_get_thread_num(), index, core, ret ? 1 : 0);
323 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
326 if (nth_affinity_set > nthread_local)
328 char msg[STRLEN];
330 sprintf(msg,
331 "Looks like we have set affinity for more threads than "
332 "we have (%d > %d)!\n",
333 nth_affinity_set, nthread_local);
334 gmx_incons(msg);
337 /* check & warn if some threads failed to set their affinities */
338 const bool allAffinitiesSet = (nth_affinity_set == nthread_local);
339 if (!allAffinitiesSet)
341 char sbuf1[STRLEN], sbuf2[STRLEN];
343 /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
344 sbuf1[0] = sbuf2[0] = '\0';
345 /* Only add rank info if we have more than one rank. */
346 if (cr->nnodes > 1)
348 #if GMX_MPI
349 # if GMX_THREAD_MPI
350 sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid);
351 # else /* GMX_LIB_MPI */
352 sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
353 # endif
354 #endif /* GMX_MPI */
357 if (nthread_local > 1)
359 sprintf(sbuf2, "for %d/%d thread%s ", nthread_local - nth_affinity_set, nthread_local,
360 nthread_local > 1 ? "s" : "");
363 // TODO: This output should also go through mdlog.
364 fprintf(stderr, "NOTE: %sAffinity setting %sfailed.\n", sbuf1, sbuf2);
366 return allAffinitiesSet;
369 void analyzeThreadsOnThisNode(const gmx::PhysicalNodeCommunicator& physicalNodeComm,
370 int numThreadsOnThisRank,
371 int* numThreadsOnThisNode,
372 int* intraNodeThreadOffset)
374 *intraNodeThreadOffset = 0;
375 *numThreadsOnThisNode = numThreadsOnThisRank;
376 #if GMX_MPI
377 if (physicalNodeComm.size_ > 1)
379 /* We need to determine a scan of the thread counts in this
380 * compute node. */
381 MPI_Scan(&numThreadsOnThisRank, intraNodeThreadOffset, 1, MPI_INT, MPI_SUM, physicalNodeComm.comm_);
382 /* MPI_Scan is inclusive, but here we need exclusive */
383 *intraNodeThreadOffset -= numThreadsOnThisRank;
384 /* Get the total number of threads on this physical node */
385 MPI_Allreduce(&numThreadsOnThisRank, numThreadsOnThisNode, 1, MPI_INT, MPI_SUM,
386 physicalNodeComm.comm_);
388 #else
389 GMX_UNUSED_VALUE(physicalNodeComm);
390 #endif
393 /* Set CPU affinity. Can be important for performance.
394 On some systems (e.g. Cray) CPU Affinity is set by default.
395 But default assigning doesn't work (well) with only some ranks
396 having threads. This causes very low performance.
397 External tools have cumbersome syntax for setting affinity
398 in the case that only some ranks have threads.
399 Thus it is important that GROMACS sets the affinity internally
400 if only PME is using threads.
402 void gmx_set_thread_affinity(const gmx::MDLogger& mdlog,
403 const t_commrec* cr,
404 const gmx_hw_opt_t* hw_opt,
405 const gmx::HardwareTopology& hwTop,
406 int numThreadsOnThisRank,
407 int numThreadsOnThisNode,
408 int intraNodeThreadOffset,
409 gmx::IThreadAffinityAccess* affinityAccess)
411 int* localityOrder = nullptr;
413 if (hw_opt->threadAffinity == ThreadAffinity::Off)
415 /* Nothing to do */
416 return;
419 if (affinityAccess == nullptr)
421 affinityAccess = &g_defaultAffinityAccess;
424 /* If the tMPI thread affinity setting is not supported encourage the user
425 * to report it as it's either a bug or an exotic platform which we might
426 * want to support. */
427 if (!affinityAccess->isThreadAffinitySupported())
429 /* we know Mac OS does not support setting thread affinity, so there's
430 no point in warning the user in that case. In any other case
431 the user might be able to do something about it. */
432 #if !defined(__APPLE__)
433 GMX_LOG(mdlog.warning)
434 .asParagraph()
435 .appendText("NOTE: Cannot set thread affinities on the current platform.");
436 #endif /* __APPLE__ */
437 return;
440 int offset = hw_opt->core_pinning_offset;
441 int core_pinning_stride = hw_opt->core_pinning_stride;
442 if (offset != 0)
444 GMX_LOG(mdlog.warning).appendTextFormatted("Applying core pinning offset %d", offset);
447 bool affinityIsAutoAndNumThreadsIsNotAuto =
448 (hw_opt->threadAffinity == ThreadAffinity::Auto && !hw_opt->totNumThreadsIsAuto);
449 bool issuedWarning;
450 bool validLayout = get_thread_affinity_layout(
451 mdlog, cr, hwTop, numThreadsOnThisNode, affinityIsAutoAndNumThreadsIsNotAuto, offset,
452 &core_pinning_stride, &localityOrder, &issuedWarning);
453 const gmx::sfree_guard localityOrderGuard(localityOrder);
455 bool allAffinitiesSet;
456 if (validLayout)
458 allAffinitiesSet = set_affinity(cr, numThreadsOnThisRank, intraNodeThreadOffset, offset,
459 core_pinning_stride, localityOrder, affinityAccess);
461 else
463 // Produce the warning if any rank fails.
464 allAffinitiesSet = false;
466 if (invalidWithinSimulation(cr, !allAffinitiesSet) && !issuedWarning)
468 GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: Thread affinity was not set.");
472 /* Detects and returns whether we have the default affinity mask
474 * Returns true when we can query thread affinities and CPU count is
475 * consistent and we have default affinity mask on all ranks.
477 * Should be called simultaneously by all MPI ranks.
479 static bool detectDefaultAffinityMask(const int nthreads_hw_avail)
481 bool detectedDefaultAffinityMask = true;
483 #if HAVE_SCHED_AFFINITY
484 cpu_set_t mask_current;
485 CPU_ZERO(&mask_current);
486 int ret;
487 if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
489 /* failed to query affinity mask, will just return */
490 if (debug)
492 fprintf(debug, "Failed to query affinity mask (error %d)", ret);
494 detectedDefaultAffinityMask = false;
497 /* Before proceeding with the actual check, make sure that the number of
498 * detected CPUs is >= the CPUs in the current set.
499 * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
500 # ifdef CPU_COUNT
501 if (detectedDefaultAffinityMask && nthreads_hw_avail < CPU_COUNT(&mask_current))
503 if (debug)
505 fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT",
506 nthreads_hw_avail, CPU_COUNT(&mask_current));
508 detectedDefaultAffinityMask = false;
510 # endif /* CPU_COUNT */
512 if (detectedDefaultAffinityMask)
514 /* Here we check whether all bits of the affinity mask are set.
515 * Note that this mask can change while the program is executing,
516 * e.g. by the system dynamically enabling or disabling cores or
517 * by some scheduler changing the affinities. Thus we can not assume
518 * that the result is the same on all ranks within a node
519 * when running this code at different times.
521 bool allBitsAreSet = true;
522 for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
524 allBitsAreSet = allBitsAreSet && (CPU_ISSET(i, &mask_current) != 0);
526 if (debug)
528 fprintf(debug, "%s affinity mask found\n", allBitsAreSet ? "Default" : "Non-default");
530 if (!allBitsAreSet)
532 detectedDefaultAffinityMask = false;
535 #else
536 GMX_UNUSED_VALUE(nthreads_hw_avail);
537 #endif /* HAVE_SCHED_AFFINITY */
539 #if GMX_MPI
540 int mpiIsInitialized;
541 MPI_Initialized(&mpiIsInitialized);
542 if (mpiIsInitialized)
544 bool maskToReduce = detectedDefaultAffinityMask;
545 MPI_Allreduce(&maskToReduce, &detectedDefaultAffinityMask, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);
547 #endif
549 return detectedDefaultAffinityMask;
552 /* Check the process affinity mask and if it is found to be non-zero,
553 * will honor it and disable mdrun internal affinity setting.
554 * Note that this will only work on Linux as we use a GNU feature.
556 void gmx_check_thread_affinity_set(const gmx::MDLogger& mdlog,
557 gmx_hw_opt_t* hw_opt,
558 int gmx_unused nthreads_hw_avail,
559 gmx_bool bAfterOpenmpInit)
561 GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer");
563 if (!bAfterOpenmpInit)
565 /* Check for externally set OpenMP affinity and turn off internal
566 * pinning if any is found. We need to do this check early to tell
567 * thread-MPI whether it should do pinning when spawning threads.
568 * TODO: the above no longer holds, we should move these checks later
570 if (hw_opt->threadAffinity != ThreadAffinity::Off)
572 char* message;
573 if (!gmx_omp_check_thread_affinity(&message))
575 /* We only pin automatically with totNumThreadsIsAuto=true */
576 if (hw_opt->threadAffinity == ThreadAffinity::On || hw_opt->totNumThreadsIsAuto)
578 GMX_LOG(mdlog.warning).asParagraph().appendText(message);
580 sfree(message);
581 hw_opt->threadAffinity = ThreadAffinity::Off;
586 if (!detectDefaultAffinityMask(nthreads_hw_avail))
588 if (hw_opt->threadAffinity == ThreadAffinity::Auto)
590 if (!bAfterOpenmpInit)
592 GMX_LOG(mdlog.warning)
593 .asParagraph()
594 .appendText(
595 "Non-default thread affinity set, disabling internal thread "
596 "affinity");
598 else
600 GMX_LOG(mdlog.warning)
601 .asParagraph()
602 .appendText(
603 "Non-default thread affinity set probably by the OpenMP library,\n"
604 "disabling internal thread affinity");
606 hw_opt->threadAffinity = ThreadAffinity::Off;
608 else
610 /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */
611 if (bAfterOpenmpInit)
613 GMX_LOG(mdlog.warning)
614 .asParagraph()
615 .appendTextFormatted("Overriding thread affinity set outside %s",
616 gmx::getProgramContext().displayName());