Allow creating mock HardwareTopology objects
[gromacs.git] / src / gromacs / hardware / hardwaretopology.cpp
blob4501a86b3e9c5b3894232cd338dafc6a9b6ca7a5
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief
38 * Implements gmx::HardwareTopology.
40 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
44 #include "gmxpre.h"
46 #include "hardwaretopology.h"
48 #include "config.h"
50 #include <algorithm>
51 #include <vector>
53 #include <thread>
55 #if GMX_HWLOC
56 # include <hwloc.h>
57 #endif
59 #include "gromacs/hardware/cpuinfo.h"
60 #include "gromacs/utility/gmxassert.h"
62 #ifdef HAVE_UNISTD_H
63 # include <unistd.h> // sysconf()
64 #endif
65 #if GMX_NATIVE_WINDOWS
66 # include <windows.h> // GetSystemInfo()
67 #endif
69 namespace gmx
72 namespace
75 /*****************************************************************************
76 * *
77 * Utility functions for extracting hardware topology from CpuInfo object *
78 * *
79 *****************************************************************************/
81 /*! \brief Initialize machine data from basic information in cpuinfo
83 * \param machine Machine tree structure where information will be assigned
84 * if the cpuinfo object contains topology information.
85 * \param supportLevel If topology information is available in CpuInfo,
86 * this will be updated to reflect the amount of
87 * information written to the machine structure.
89 void
90 parseCpuInfo(HardwareTopology::Machine * machine,
91 HardwareTopology::SupportLevel * supportLevel)
93 CpuInfo cpuInfo(CpuInfo::detect());
95 if (!cpuInfo.logicalProcessors().empty())
97 int nSockets = 0;
98 int nCores = 0;
99 int nHwThreads = 0;
101 // Copy the logical processor information from cpuinfo
102 for (auto &l : cpuInfo.logicalProcessors())
104 machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
105 nSockets = std::max(nSockets, l.socketRankInMachine);
106 nCores = std::max(nCores, l.coreRankInSocket);
107 nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
110 // Fill info form sockets/cores/hwthreads
111 int socketId = 0;
112 int coreId = 0;
113 int hwThreadId = 0;
115 machine->sockets.resize(nSockets + 1);
116 for (auto &s : machine->sockets)
118 s.id = socketId++;
119 s.cores.resize(nCores + 1);
120 for (auto &c : s.cores)
122 c.id = coreId++;
123 c.numaNodeId = -1; // No numa information
124 c.hwThreads.resize(nHwThreads + 1);
125 for (auto &t : c.hwThreads)
127 t.id = hwThreadId++;
128 t.logicalProcessorId = -1; // set as unassigned for now
133 // Fill the logical processor id in the right place
134 for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
136 const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
137 machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
139 machine->logicalProcessorCount = machine->logicalProcessors.size();
140 *supportLevel = HardwareTopology::SupportLevel::Basic;
142 else
144 *supportLevel = HardwareTopology::SupportLevel::None;
148 #if GMX_HWLOC
150 #if HWLOC_API_VERSION < 0x00010b00
151 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
152 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
153 #endif
155 /*****************************************************************************
157 * Utility functions for extracting hardware topology from hwloc library *
159 *****************************************************************************/
161 /*! \brief Return vector of all descendants of a given type in hwloc topology
163 * \param obj Non-null hwloc object.
164 * \param type hwloc object type to find. The routine will only search
165 * on levels below obj.
167 * \return vector containing all the objects of given type that are
168 * descendants of the provided object. If no objects of this type
169 * were found, the vector will be empty.
171 const std::vector<hwloc_obj_t>
172 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
174 GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
176 std::vector<hwloc_obj_t> v;
178 // Go through children; if this object has no children obj->arity is 0,
179 // and we'll return an empty vector.
180 for (std::size_t i = 0; i < obj->arity; i++)
182 // If the child is the type we're looking for, add it directly.
183 // Otherwise call this routine recursively for each child.
184 if (obj->children[i]->type == type)
186 v.push_back(obj->children[i]);
188 else
190 std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
191 v.insert(v.end(), v2.begin(), v2.end());
194 return v;
197 /*! \brief Read information about sockets, cores and threads from hwloc topology
199 * \param topo hwloc topology handle that has been initialized and loaded
200 * \param machine Pointer to the machine structure in the HardwareTopology
201 * class, where the tree of sockets/cores/threads will be written.
203 * \return If all the data is found the return value is 0, otherwise non-zero.
206 parseHwLocSocketsCoresThreads(const hwloc_topology_t topo,
207 HardwareTopology::Machine * machine)
209 const hwloc_obj_t root = hwloc_get_root_obj(topo);
210 std::vector<hwloc_obj_t> hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
212 machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
213 machine->logicalProcessors.resize(machine->logicalProcessorCount);
214 machine->sockets.resize(hwlocSockets.size());
216 bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
218 for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
220 // Assign information about this socket
221 machine->sockets[i].id = hwlocSockets[i]->logical_index;
223 // Get children (cores)
224 std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
225 machine->sockets[i].cores.resize(hwlocCores.size());
227 topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
229 // Loop over child cores
230 for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
232 // Assign information about this core
233 machine->sockets[i].cores[j].id = hwlocCores[j]->logical_index;
234 machine->sockets[i].cores[j].numaNodeId = -1;
236 // Get children (hwthreads)
237 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
238 machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
240 topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
242 // Loop over child hwthreads
243 for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
245 // Assign information about this hwthread
246 std::size_t logicalProcessorId = hwlocPUs[k]->os_index;
247 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
248 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
250 if (logicalProcessorId < machine->logicalProcessors.size())
252 // Cross-assign data for this hwthread to the logicalprocess vector
253 machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
254 machine->logicalProcessors[logicalProcessorId].coreRankInSocket = static_cast<int>(j);
255 machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore = static_cast<int>(k);
256 machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
258 else
260 topologyOk = false;
266 if (topologyOk)
268 return 0;
270 else
272 machine->logicalProcessors.clear();
273 machine->sockets.clear();
274 return -1;
278 /*! \brief Read cache information from hwloc topology
280 * \param topo hwloc topology handle that has been initialized and loaded
281 * \param machine Pointer to the machine structure in the HardwareTopology
282 * class, where cache data will be filled.
284 * \return If any cache data is found the return value is 0, otherwise non-zero.
287 parseHwLocCache(const hwloc_topology_t topo,
288 HardwareTopology::Machine * machine)
290 // Parse caches up to L5
291 for (int cachelevel : { 1, 2, 3, 4, 5})
293 int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
295 if (depth >= 0)
297 hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
298 if (cache != NULL)
300 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
302 machine->caches.push_back( {
303 static_cast<int>(cache->attr->cache.depth),
304 static_cast<std::size_t>(cache->attr->cache.size),
305 static_cast<int>(cache->attr->cache.linesize),
306 static_cast<int>(cache->attr->cache.associativity),
307 std::max(static_cast<int>(hwThreads.size()), 1)
308 } );
312 return machine->caches.empty();
316 /*! \brief Read numa information from hwloc topology
318 * \param topo hwloc topology handle that has been initialized and loaded
319 * \param machine Pointer to the machine structure in the HardwareTopology
320 * class, where numa information will be filled.
322 * Hwloc should virtually always be able to detect numa information, but if
323 * there is only a single numa node in the system it is not reported at all.
324 * In this case we create a single numa node covering all cores.
326 * This function uses the basic socket/core/thread information detected by
327 * parseHwLocSocketsCoresThreads(), which means that routine must have
328 * completed successfully before calling this one. If this is not the case,
329 * you will get an error return code.
331 * \return If the data found makes sense (either in the numa node or the
332 * entire machine) the return value is 0, otherwise non-zero.
335 parseHwLocNuma(const hwloc_topology_t topo,
336 HardwareTopology::Machine * machine)
338 const hwloc_obj_t root = hwloc_get_root_obj(topo);
339 std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
340 bool topologyOk = true;
342 if (!hwlocNumaNodes.empty())
344 machine->numa.nodes.resize(hwlocNumaNodes.size());
346 for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
348 machine->numa.nodes[i].id = hwlocNumaNodes[i]->logical_index;
349 machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
350 machine->numa.nodes[i].logicalProcessorId.clear();
352 // Get list of PUs in this numa node
353 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
355 for (auto &p : hwlocPUs)
357 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
359 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
361 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
362 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
363 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
365 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
366 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
367 // Set numaNodeId in core too
368 machine->sockets[s].cores[c].numaNodeId = i;
372 int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
373 const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
374 if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
376 machine->numa.baseLatency = dist->latency_base;
377 machine->numa.maxRelativeLatency = dist->latency_max;
378 machine->numa.relativeLatency.resize(dist->nbobjs);
379 for (std::size_t i = 0; i < dist->nbobjs; i++)
381 machine->numa.relativeLatency[i].resize(dist->nbobjs);
382 for (std::size_t j = 0; j < dist->nbobjs; j++)
384 machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
388 else
390 topologyOk = false;
393 else
395 // No numa nodes found. Use the entire machine as a numa node.
396 const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
398 if (hwlocMachine != NULL)
400 machine->numa.nodes.resize(1);
401 machine->numa.nodes[0].id = 0;
402 machine->numa.nodes[0].memory = hwlocMachine->memory.total_memory;
403 machine->numa.baseLatency = 10;
404 machine->numa.maxRelativeLatency = 1;
405 machine->numa.relativeLatency = { { 1.0 } };
407 for (int i = 0; i < machine->logicalProcessorCount; i++)
409 machine->numa.nodes[0].logicalProcessorId.push_back(i);
411 for (auto &l : machine->logicalProcessors)
413 l.numaNodeId = 0;
415 for (auto &s : machine->sockets)
417 for (auto &c : s.cores)
419 c.numaNodeId = 0;
423 else
425 topologyOk = false;
429 if (topologyOk)
431 return 0;
433 else
435 machine->numa.nodes.clear();
436 return -1;
441 /*! \brief Read PCI device information from hwloc topology
443 * \param topo hwloc topology handle that has been initialized and loaded
444 * \param machine Pointer to the machine structure in the HardwareTopology
445 * class, where PCI device information will be filled.
447 * \return If any devices were found the return value is 0, otherwise non-zero.
450 parseHwLocDevices(const hwloc_topology_t topo,
451 HardwareTopology::Machine * machine)
453 const hwloc_obj_t root = hwloc_get_root_obj(topo);
454 std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
456 for (auto &p : pcidevs)
458 const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
459 int numaId;
460 if (ancestor != NULL)
462 numaId = ancestor->logical_index;
464 else
466 // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
467 numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
470 GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
472 machine->devices.push_back( {
473 p->attr->pcidev.vendor_id,
474 p->attr->pcidev.device_id,
475 p->attr->pcidev.class_id,
476 p->attr->pcidev.domain,
477 p->attr->pcidev.bus,
478 p->attr->pcidev.dev,
479 p->attr->pcidev.func,
480 numaId
481 } );
483 return pcidevs.empty();
486 void
487 parseHwLoc(HardwareTopology::Machine * machine,
488 HardwareTopology::SupportLevel * supportLevel)
490 hwloc_topology_t topo;
492 // Initialize a hwloc object, set flags to request IO device information too,
493 // try to load the topology, and get the root object. If either step fails,
494 // return that we do not have any support at all from hwloc.
495 if (hwloc_topology_init(&topo) != 0)
497 hwloc_topology_destroy(topo);
498 return; // SupportLevel::None.
501 hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
503 if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
505 hwloc_topology_destroy(topo);
506 return; // SupportLevel::None.
508 // If we get here, we can get a valid root object for the topology
510 // Parse basic information about sockets, cores, and hardware threads
511 if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
513 *supportLevel = HardwareTopology::SupportLevel::Basic;
515 else
517 hwloc_topology_destroy(topo);
518 return; // SupportLevel::None.
521 // Get information about cache and numa nodes
522 if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
524 *supportLevel = HardwareTopology::SupportLevel::Full;
526 else
528 hwloc_topology_destroy(topo);
529 return; // SupportLevel::Basic.
532 // PCI devices
533 if (parseHwLocDevices(topo, machine) == 0)
535 *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
538 hwloc_topology_destroy(topo);
539 return; // SupportLevel::Full or SupportLevel::FullWithDevices.
542 #endif
544 /*! \brief Try to detect the number of logical processors.
546 * \return The number of hardware processing units, or 0 if it fails.
549 detectLogicalProcessorCount()
551 // Try to use std::thread::hardware_concurrency() first. This result is only
552 // a hint, and it might be 0 if the information is not available.
553 // On Apple this will not compile with gcc-4.6, and since it just returns 0 on other
554 // platforms too we skip it entirely for gcc < 4.7
555 #if defined __GNUC__ && (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
556 int count = 0;
557 #else
558 int count = std::thread::hardware_concurrency();
559 #endif
561 if (count == 0)
563 #if GMX_NATIVE_WINDOWS
564 // Windows
565 SYSTEM_INFO sysinfo;
566 GetSystemInfo( &sysinfo );
567 count = sysinfo.dwNumberOfProcessors;
568 #elif defined HAVE_SYSCONF
569 // We are probably on Unix. Check if we have the argument to use before executing the call
570 # if defined(_SC_NPROCESSORS_CONF)
571 count = sysconf(_SC_NPROCESSORS_CONF);
572 # elif defined(_SC_NPROC_CONF)
573 count = sysconf(_SC_NPROC_CONF);
574 # elif defined(_SC_NPROCESSORS_ONLN)
575 count = sysconf(_SC_NPROCESSORS_ONLN);
576 # elif defined(_SC_NPROC_ONLN)
577 count = sysconf(_SC_NPROC_ONLN);
578 # else
579 # warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
580 # endif // End of check for sysconf argument values
582 #else
583 count = 0; // Neither windows nor Unix, and std::thread_hardware_concurrency() failed.
584 #endif
586 return count;
589 } // namespace anonymous
591 // static
592 HardwareTopology HardwareTopology::detect()
594 HardwareTopology result;
596 result.supportLevel_ = SupportLevel::None;
598 #if GMX_HWLOC
599 parseHwLoc(&result.machine_, &result.supportLevel_);
600 #endif
602 // If something went wrong in hwloc (or if it was not present) we might
603 // have more information in cpuInfo
604 if (result.supportLevel_ < SupportLevel::Basic)
606 // There might be topology information in cpuInfo
607 parseCpuInfo(&result.machine_, &result.supportLevel_);
609 // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
610 if (result.supportLevel_ == SupportLevel::None)
612 // No topology information; try to detect the number of logical processors at least
613 result.machine_.logicalProcessorCount = detectLogicalProcessorCount();
614 if (result.machine_.logicalProcessorCount > 0)
616 result.supportLevel_ = SupportLevel::LogicalProcessorCount;
619 return result;
622 HardwareTopology::Machine::Machine()
624 logicalProcessorCount = 0;
625 numa.baseLatency = 0.0;
626 numa.maxRelativeLatency = 0.0;
630 HardwareTopology::HardwareTopology()
631 : supportLevel_(SupportLevel::None)
635 HardwareTopology::HardwareTopology(int logicalProcessorCount)
636 : supportLevel_(SupportLevel::None)
638 if (logicalProcessorCount > 0)
640 machine_.logicalProcessorCount = logicalProcessorCount;
641 supportLevel_ = SupportLevel::LogicalProcessorCount;
645 int HardwareTopology::numberOfCores() const
647 if (supportLevel() >= SupportLevel::Basic)
649 // We assume all sockets have the same number of cores as socket 0.
650 // Since topology information is present, we can assume there is at least one socket.
651 return machine().sockets.size() * machine().sockets[0].cores.size();
653 else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
655 return machine().logicalProcessorCount;
657 else
659 return 0;
663 } // namespace gmx