Move consistency check for number of threads
[gromacs/AngularHB.git] / src / gromacs / hardware / hardwaretopology.cpp
blobc23ff0f3063bf5abfc487c36a84b4ca3298b8944
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief
38 * Implements gmx::HardwareTopology.
40 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
44 #include "gmxpre.h"
46 #include "hardwaretopology.h"
48 #include "config.h"
50 #include <cstdio>
52 #include <algorithm>
53 #include <vector>
55 #if GMX_HWLOC
56 # include <hwloc.h>
57 #endif
59 #include "gromacs/gmxlib/md_logging.h"
60 #include "gromacs/hardware/cpuinfo.h"
61 #include "gromacs/utility/gmxassert.h"
63 #ifdef HAVE_UNISTD_H
64 # include <unistd.h> // sysconf()
65 #endif
66 #if GMX_NATIVE_WINDOWS
67 # include <windows.h> // GetSystemInfo()
68 #endif
70 #if defined(_M_ARM) || defined(__arm__) || defined(__ARM_ARCH) || defined (__aarch64__)
71 //! Constant used to help minimize preprocessed code
72 static const bool isArm = true;
73 #else
74 //! Constant used to help minimize preprocessed code
75 static const bool isArm = false;
76 #endif
78 namespace gmx
81 namespace
84 /*****************************************************************************
85 * *
86 * Utility functions for extracting hardware topology from CpuInfo object *
87 * *
88 *****************************************************************************/
90 /*! \brief Initialize machine data from basic information in cpuinfo
92 * \param machine Machine tree structure where information will be assigned
93 * if the cpuinfo object contains topology information.
94 * \param supportLevel If topology information is available in CpuInfo,
95 * this will be updated to reflect the amount of
96 * information written to the machine structure.
98 void
99 parseCpuInfo(HardwareTopology::Machine * machine,
100 HardwareTopology::SupportLevel * supportLevel)
102 CpuInfo cpuInfo(CpuInfo::detect());
104 if (!cpuInfo.logicalProcessors().empty())
106 int nSockets = 0;
107 int nCores = 0;
108 int nHwThreads = 0;
110 // Copy the logical processor information from cpuinfo
111 for (auto &l : cpuInfo.logicalProcessors())
113 machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
114 nSockets = std::max(nSockets, l.socketRankInMachine);
115 nCores = std::max(nCores, l.coreRankInSocket);
116 nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
119 // Fill info form sockets/cores/hwthreads
120 int socketId = 0;
121 int coreId = 0;
122 int hwThreadId = 0;
124 machine->sockets.resize(nSockets + 1);
125 for (auto &s : machine->sockets)
127 s.id = socketId++;
128 s.cores.resize(nCores + 1);
129 for (auto &c : s.cores)
131 c.id = coreId++;
132 c.numaNodeId = -1; // No numa information
133 c.hwThreads.resize(nHwThreads + 1);
134 for (auto &t : c.hwThreads)
136 t.id = hwThreadId++;
137 t.logicalProcessorId = -1; // set as unassigned for now
142 // Fill the logical processor id in the right place
143 for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
145 const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
146 machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
148 machine->logicalProcessorCount = machine->logicalProcessors.size();
149 *supportLevel = HardwareTopology::SupportLevel::Basic;
151 else
153 *supportLevel = HardwareTopology::SupportLevel::None;
157 #if GMX_HWLOC
159 #if HWLOC_API_VERSION < 0x00010b00
160 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
161 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
162 #endif
164 /*****************************************************************************
166 * Utility functions for extracting hardware topology from hwloc library *
168 *****************************************************************************/
170 /*! \brief Return vector of all descendants of a given type in hwloc topology
172 * \param obj Non-null hwloc object.
173 * \param type hwloc object type to find. The routine will only search
174 * on levels below obj.
176 * \return vector containing all the objects of given type that are
177 * descendants of the provided object. If no objects of this type
178 * were found, the vector will be empty.
180 const std::vector<hwloc_obj_t>
181 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
183 GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
185 std::vector<hwloc_obj_t> v;
187 // Go through children; if this object has no children obj->arity is 0,
188 // and we'll return an empty vector.
189 for (std::size_t i = 0; i < obj->arity; i++)
191 // If the child is the type we're looking for, add it directly.
192 // Otherwise call this routine recursively for each child.
193 if (obj->children[i]->type == type)
195 v.push_back(obj->children[i]);
197 else
199 std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
200 v.insert(v.end(), v2.begin(), v2.end());
203 return v;
206 /*! \brief Read information about sockets, cores and threads from hwloc topology
208 * \param topo hwloc topology handle that has been initialized and loaded
209 * \param machine Pointer to the machine structure in the HardwareTopology
210 * class, where the tree of sockets/cores/threads will be written.
212 * \return If all the data is found the return value is 0, otherwise non-zero.
215 parseHwLocSocketsCoresThreads(const hwloc_topology_t topo,
216 HardwareTopology::Machine * machine)
218 const hwloc_obj_t root = hwloc_get_root_obj(topo);
219 std::vector<hwloc_obj_t> hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
221 machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
222 machine->logicalProcessors.resize(machine->logicalProcessorCount);
223 machine->sockets.resize(hwlocSockets.size());
225 bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
227 for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
229 // Assign information about this socket
230 machine->sockets[i].id = hwlocSockets[i]->logical_index;
232 // Get children (cores)
233 std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
234 machine->sockets[i].cores.resize(hwlocCores.size());
236 topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
238 // Loop over child cores
239 for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
241 // Assign information about this core
242 machine->sockets[i].cores[j].id = hwlocCores[j]->logical_index;
243 machine->sockets[i].cores[j].numaNodeId = -1;
245 // Get children (hwthreads)
246 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
247 machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
249 topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
251 // Loop over child hwthreads
252 for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
254 // Assign information about this hwthread
255 std::size_t logicalProcessorId = hwlocPUs[k]->os_index;
256 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
257 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
259 if (logicalProcessorId < machine->logicalProcessors.size())
261 // Cross-assign data for this hwthread to the logicalprocess vector
262 machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
263 machine->logicalProcessors[logicalProcessorId].coreRankInSocket = static_cast<int>(j);
264 machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore = static_cast<int>(k);
265 machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
267 else
269 topologyOk = false;
275 if (topologyOk)
277 return 0;
279 else
281 machine->logicalProcessors.clear();
282 machine->sockets.clear();
283 return -1;
287 /*! \brief Read cache information from hwloc topology
289 * \param topo hwloc topology handle that has been initialized and loaded
290 * \param machine Pointer to the machine structure in the HardwareTopology
291 * class, where cache data will be filled.
293 * \return If any cache data is found the return value is 0, otherwise non-zero.
296 parseHwLocCache(const hwloc_topology_t topo,
297 HardwareTopology::Machine * machine)
299 // Parse caches up to L5
300 for (int cachelevel : { 1, 2, 3, 4, 5})
302 int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
304 if (depth >= 0)
306 hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
307 if (cache != NULL)
309 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
311 machine->caches.push_back( {
312 static_cast<int>(cache->attr->cache.depth),
313 static_cast<std::size_t>(cache->attr->cache.size),
314 static_cast<int>(cache->attr->cache.linesize),
315 static_cast<int>(cache->attr->cache.associativity),
316 std::max(static_cast<int>(hwThreads.size()), 1)
317 } );
321 return machine->caches.empty();
325 /*! \brief Read numa information from hwloc topology
327 * \param topo hwloc topology handle that has been initialized and loaded
328 * \param machine Pointer to the machine structure in the HardwareTopology
329 * class, where numa information will be filled.
331 * Hwloc should virtually always be able to detect numa information, but if
332 * there is only a single numa node in the system it is not reported at all.
333 * In this case we create a single numa node covering all cores.
335 * This function uses the basic socket/core/thread information detected by
336 * parseHwLocSocketsCoresThreads(), which means that routine must have
337 * completed successfully before calling this one. If this is not the case,
338 * you will get an error return code.
340 * \return If the data found makes sense (either in the numa node or the
341 * entire machine) the return value is 0, otherwise non-zero.
344 parseHwLocNuma(const hwloc_topology_t topo,
345 HardwareTopology::Machine * machine)
347 const hwloc_obj_t root = hwloc_get_root_obj(topo);
348 std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
349 bool topologyOk = true;
351 if (!hwlocNumaNodes.empty())
353 machine->numa.nodes.resize(hwlocNumaNodes.size());
355 for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
357 machine->numa.nodes[i].id = hwlocNumaNodes[i]->logical_index;
358 machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
359 machine->numa.nodes[i].logicalProcessorId.clear();
361 // Get list of PUs in this numa node
362 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
364 for (auto &p : hwlocPUs)
366 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
368 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
370 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
371 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
372 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
374 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
375 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
376 // Set numaNodeId in core too
377 machine->sockets[s].cores[c].numaNodeId = i;
381 int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
382 const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
383 if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
385 machine->numa.baseLatency = dist->latency_base;
386 machine->numa.maxRelativeLatency = dist->latency_max;
387 machine->numa.relativeLatency.resize(dist->nbobjs);
388 for (std::size_t i = 0; i < dist->nbobjs; i++)
390 machine->numa.relativeLatency[i].resize(dist->nbobjs);
391 for (std::size_t j = 0; j < dist->nbobjs; j++)
393 machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
397 else
399 topologyOk = false;
402 else
404 // No numa nodes found. Use the entire machine as a numa node.
405 const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
407 if (hwlocMachine != NULL)
409 machine->numa.nodes.resize(1);
410 machine->numa.nodes[0].id = 0;
411 machine->numa.nodes[0].memory = hwlocMachine->memory.total_memory;
412 machine->numa.baseLatency = 10;
413 machine->numa.maxRelativeLatency = 1;
414 machine->numa.relativeLatency = { { 1.0 } };
416 for (int i = 0; i < machine->logicalProcessorCount; i++)
418 machine->numa.nodes[0].logicalProcessorId.push_back(i);
420 for (auto &l : machine->logicalProcessors)
422 l.numaNodeId = 0;
424 for (auto &s : machine->sockets)
426 for (auto &c : s.cores)
428 c.numaNodeId = 0;
432 else
434 topologyOk = false;
438 if (topologyOk)
440 return 0;
442 else
444 machine->numa.nodes.clear();
445 return -1;
450 /*! \brief Read PCI device information from hwloc topology
452 * \param topo hwloc topology handle that has been initialized and loaded
453 * \param machine Pointer to the machine structure in the HardwareTopology
454 * class, where PCI device information will be filled.
456 * \return If any devices were found the return value is 0, otherwise non-zero.
459 parseHwLocDevices(const hwloc_topology_t topo,
460 HardwareTopology::Machine * machine)
462 const hwloc_obj_t root = hwloc_get_root_obj(topo);
463 std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
465 for (auto &p : pcidevs)
467 const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
468 int numaId;
469 if (ancestor != NULL)
471 numaId = ancestor->logical_index;
473 else
475 // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
476 numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
479 GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
481 machine->devices.push_back( {
482 p->attr->pcidev.vendor_id,
483 p->attr->pcidev.device_id,
484 p->attr->pcidev.class_id,
485 p->attr->pcidev.domain,
486 p->attr->pcidev.bus,
487 p->attr->pcidev.dev,
488 p->attr->pcidev.func,
489 numaId
490 } );
492 return pcidevs.empty();
495 void
496 parseHwLoc(HardwareTopology::Machine * machine,
497 HardwareTopology::SupportLevel * supportLevel,
498 bool * isThisSystem)
500 hwloc_topology_t topo;
502 // Initialize a hwloc object, set flags to request IO device information too,
503 // try to load the topology, and get the root object. If either step fails,
504 // return that we do not have any support at all from hwloc.
505 if (hwloc_topology_init(&topo) != 0)
507 hwloc_topology_destroy(topo);
508 return; // SupportLevel::None.
511 hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
513 if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
515 hwloc_topology_destroy(topo);
516 return; // SupportLevel::None.
519 // If we get here, we can get a valid root object for the topology
520 *isThisSystem = hwloc_topology_is_thissystem(topo);
522 // Parse basic information about sockets, cores, and hardware threads
523 if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
525 *supportLevel = HardwareTopology::SupportLevel::Basic;
527 else
529 hwloc_topology_destroy(topo);
530 return; // SupportLevel::None.
533 // Get information about cache and numa nodes
534 if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
536 *supportLevel = HardwareTopology::SupportLevel::Full;
538 else
540 hwloc_topology_destroy(topo);
541 return; // SupportLevel::Basic.
544 // PCI devices
545 if (parseHwLocDevices(topo, machine) == 0)
547 *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
550 hwloc_topology_destroy(topo);
551 return; // SupportLevel::Full or SupportLevel::FullWithDevices.
554 #endif
556 /*! \brief Try to detect the number of logical processors.
558 * \return The number of hardware processing units, or 0 if it fails.
561 detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr)
563 int count = 0;
566 #if GMX_NATIVE_WINDOWS
567 // Windows
568 SYSTEM_INFO sysinfo;
569 GetSystemInfo( &sysinfo );
570 count = sysinfo.dwNumberOfProcessors;
571 #elif defined HAVE_SYSCONF
572 // We are probably on Unix. Check if we have the argument to use before executing any calls
573 # if defined(_SC_NPROCESSORS_CONF)
574 count = sysconf(_SC_NPROCESSORS_CONF);
575 # if defined(_SC_NPROCESSORS_ONLN)
576 /* On e.g. Arm, the Linux kernel can use advanced power saving features where
577 * processors are brought online/offline dynamically. This will cause
578 * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
579 * reason we now warn if this mismatches with the detected core count. */
580 int countOnline = sysconf(_SC_NPROCESSORS_ONLN);
581 if (count != countOnline)
583 /* We assume that this scenario means that the kernel has
584 disabled threads or cores, and that the only safe course is
585 to assume that _SC_NPROCESSORS_ONLN should be used. Even
586 this may not be valid if running in a containerized
587 environment, such system calls may read from
588 /sys/devices/system/cpu and report what the OS sees, rather
589 than what the container cgroup is supposed to set up as
590 limits. But we're not sure right now whether there's any
591 (standard-ish) way to handle that.
593 On ARM, the kernel may have powered down the cores,
594 which we'll warn the user about now. On x86, this
595 means HT is disabled by the kernel, not in the
596 BIOS. We're not sure what it means on other
597 architectures, or even if it is possible, because
598 sysconf is rather non-standardized. */
599 if (isArm)
601 md_print_warn(cr, fplog,
602 "%d CPUs configured, but only %d of them are online.\n"
603 "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
604 "off to save power, and will turn them back on later when the load increases.\n"
605 "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
606 "will likely see much better performance by forcing all cores to be online, and\n"
607 "making sure they run at their full clock frequency.", count, countOnline);
609 else
611 md_print_warn(cr, fplog,
612 "Note: %d CPUs configured, but only %d of them are online, so GROMACS will use the latter.",
613 count, countOnline);
614 // We use the online count to avoid (potential) oversubscription.
615 count = countOnline;
618 # endif
619 # elif defined(_SC_NPROC_CONF)
620 count = sysconf(_SC_NPROC_CONF);
621 # elif defined(_SC_NPROCESSORS_ONLN)
622 count = sysconf(_SC_NPROCESSORS_ONLN);
623 # elif defined(_SC_NPROC_ONLN)
624 count = sysconf(_SC_NPROC_ONLN);
625 # else
626 # warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
627 # endif // End of check for sysconf argument values
629 #else
630 count = 0; // Neither windows nor Unix.
631 #endif
634 GMX_UNUSED_VALUE(cr);
635 GMX_UNUSED_VALUE(fplog);
636 return count;
639 } // namespace anonymous
641 // static
642 HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr)
644 HardwareTopology result;
646 // Default values for machine and numa stuff
647 result.machine_.logicalProcessorCount = 0;
648 result.machine_.numa.baseLatency = 0.0;
649 result.machine_.numa.maxRelativeLatency = 0.0;
650 result.supportLevel_ = SupportLevel::None;
651 result.isThisSystem_ = true;
653 #if GMX_HWLOC
654 parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
655 #endif
657 // If something went wrong in hwloc (or if it was not present) we might
658 // have more information in cpuInfo
659 if (result.supportLevel_ < SupportLevel::Basic)
661 // There might be topology information in cpuInfo
662 parseCpuInfo(&result.machine_, &result.supportLevel_);
664 // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
665 if (result.supportLevel_ == SupportLevel::None)
667 // No topology information; try to detect the number of logical processors at least
668 result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr);
669 if (result.machine_.logicalProcessorCount > 0)
671 result.supportLevel_ = SupportLevel::LogicalProcessorCount;
674 return result;
678 HardwareTopology::HardwareTopology()
679 : supportLevel_(SupportLevel::None)
683 int HardwareTopology::numberOfCores() const
685 if (supportLevel() >= SupportLevel::Basic)
687 // We assume all sockets have the same number of cores as socket 0.
688 // Since topology information is present, we can assume there is at least one socket.
689 return machine().sockets.size() * machine().sockets[0].cores.size();
691 else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
693 return machine().logicalProcessorCount;
695 else
697 return 0;
701 } // namespace gmx