2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements gmx::HardwareTopology.
40 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
46 #include "hardwaretopology.h"
59 #include "gromacs/hardware/cpuinfo.h"
60 #include "gromacs/utility/gmxassert.h"
63 # include <unistd.h> // sysconf()
65 #if GMX_NATIVE_WINDOWS
66 # include <windows.h> // GetSystemInfo()
69 //! Convenience macro to help us avoid ifdefs each time we use sysconf
70 #if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN)
71 # define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
80 /*****************************************************************************
82 * Utility functions for extracting hardware topology from CpuInfo object *
84 *****************************************************************************/
86 /*! \brief Initialize machine data from basic information in cpuinfo
88 * \param machine Machine tree structure where information will be assigned
89 * if the cpuinfo object contains topology information.
90 * \param supportLevel If topology information is available in CpuInfo,
91 * this will be updated to reflect the amount of
92 * information written to the machine structure.
95 parseCpuInfo(HardwareTopology::Machine
* machine
,
96 HardwareTopology::SupportLevel
* supportLevel
)
98 CpuInfo
cpuInfo(CpuInfo::detect());
100 if (!cpuInfo
.logicalProcessors().empty())
106 // Copy the logical processor information from cpuinfo
107 for (auto &l
: cpuInfo
.logicalProcessors())
109 machine
->logicalProcessors
.push_back( { l
.socketRankInMachine
, l
.coreRankInSocket
, l
.hwThreadRankInCore
, -1 } );
110 nSockets
= std::max(nSockets
, l
.socketRankInMachine
);
111 nCores
= std::max(nCores
, l
.coreRankInSocket
);
112 nHwThreads
= std::max(nHwThreads
, l
.hwThreadRankInCore
);
115 // Fill info form sockets/cores/hwthreads
120 machine
->sockets
.resize(nSockets
+ 1);
121 for (auto &s
: machine
->sockets
)
124 s
.cores
.resize(nCores
+ 1);
125 for (auto &c
: s
.cores
)
128 c
.numaNodeId
= -1; // No numa information
129 c
.hwThreads
.resize(nHwThreads
+ 1);
130 for (auto &t
: c
.hwThreads
)
133 t
.logicalProcessorId
= -1; // set as unassigned for now
138 // Fill the logical processor id in the right place
139 for (std::size_t i
= 0; i
< machine
->logicalProcessors
.size(); i
++)
141 const HardwareTopology::LogicalProcessor
&l
= machine
->logicalProcessors
[i
];
142 machine
->sockets
[l
.socketRankInMachine
].cores
[l
.coreRankInSocket
].hwThreads
[l
.hwThreadRankInCore
].logicalProcessorId
= static_cast<int>(i
);
144 machine
->logicalProcessorCount
= machine
->logicalProcessors
.size();
145 *supportLevel
= HardwareTopology::SupportLevel::Basic
;
149 *supportLevel
= HardwareTopology::SupportLevel::None
;
155 #if HWLOC_API_VERSION < 0x00010b00
156 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
157 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
160 /*****************************************************************************
162 * Utility functions for extracting hardware topology from hwloc library *
164 *****************************************************************************/
166 /*! \brief Return vector of all descendants of a given type in hwloc topology
168 * \param obj Non-null hwloc object.
169 * \param type hwloc object type to find. The routine will only search
170 * on levels below obj.
172 * \return vector containing all the objects of given type that are
173 * descendants of the provided object. If no objects of this type
174 * were found, the vector will be empty.
176 const std::vector
<hwloc_obj_t
>
177 getHwLocDescendantsByType(const hwloc_obj_t obj
, const hwloc_obj_type_t type
)
179 GMX_RELEASE_ASSERT(obj
, "NULL hwloc object provided to getHwLocDescendantsByType()");
181 std::vector
<hwloc_obj_t
> v
;
183 // Go through children; if this object has no children obj->arity is 0,
184 // and we'll return an empty vector.
185 for (std::size_t i
= 0; i
< obj
->arity
; i
++)
187 // If the child is the type we're looking for, add it directly.
188 // Otherwise call this routine recursively for each child.
189 if (obj
->children
[i
]->type
== type
)
191 v
.push_back(obj
->children
[i
]);
195 std::vector
<hwloc_obj_t
> v2
= getHwLocDescendantsByType(obj
->children
[i
], type
);
196 v
.insert(v
.end(), v2
.begin(), v2
.end());
202 /*! \brief Read information about sockets, cores and threads from hwloc topology
204 * \param topo hwloc topology handle that has been initialized and loaded
205 * \param machine Pointer to the machine structure in the HardwareTopology
206 * class, where the tree of sockets/cores/threads will be written.
208 * \return If all the data is found the return value is 0, otherwise non-zero.
211 parseHwLocSocketsCoresThreads(const hwloc_topology_t topo
,
212 HardwareTopology::Machine
* machine
)
214 const hwloc_obj_t root
= hwloc_get_root_obj(topo
);
215 std::vector
<hwloc_obj_t
> hwlocSockets
= getHwLocDescendantsByType(root
, HWLOC_OBJ_PACKAGE
);
217 machine
->logicalProcessorCount
= hwloc_get_nbobjs_by_type(topo
, HWLOC_OBJ_PU
);
218 machine
->logicalProcessors
.resize(machine
->logicalProcessorCount
);
219 machine
->sockets
.resize(hwlocSockets
.size());
221 bool topologyOk
= !hwlocSockets
.empty(); // Fail if we have no sockets in machine
223 for (std::size_t i
= 0; i
< hwlocSockets
.size() && topologyOk
; i
++)
225 // Assign information about this socket
226 machine
->sockets
[i
].id
= hwlocSockets
[i
]->logical_index
;
228 // Get children (cores)
229 std::vector
<hwloc_obj_t
> hwlocCores
= getHwLocDescendantsByType(hwlocSockets
[i
], HWLOC_OBJ_CORE
);
230 machine
->sockets
[i
].cores
.resize(hwlocCores
.size());
232 topologyOk
= topologyOk
&& !hwlocCores
.empty(); // Fail if we have no cores in socket
234 // Loop over child cores
235 for (std::size_t j
= 0; j
< hwlocCores
.size() && topologyOk
; j
++)
237 // Assign information about this core
238 machine
->sockets
[i
].cores
[j
].id
= hwlocCores
[j
]->logical_index
;
239 machine
->sockets
[i
].cores
[j
].numaNodeId
= -1;
241 // Get children (hwthreads)
242 std::vector
<hwloc_obj_t
> hwlocPUs
= getHwLocDescendantsByType(hwlocCores
[j
], HWLOC_OBJ_PU
);
243 machine
->sockets
[i
].cores
[j
].hwThreads
.resize(hwlocPUs
.size());
245 topologyOk
= topologyOk
&& !hwlocPUs
.empty(); // Fail if we have no hwthreads in core
247 // Loop over child hwthreads
248 for (std::size_t k
= 0; k
< hwlocPUs
.size() && topologyOk
; k
++)
250 // Assign information about this hwthread
251 std::size_t logicalProcessorId
= hwlocPUs
[k
]->os_index
;
252 machine
->sockets
[i
].cores
[j
].hwThreads
[k
].id
= hwlocPUs
[k
]->logical_index
;
253 machine
->sockets
[i
].cores
[j
].hwThreads
[k
].logicalProcessorId
= logicalProcessorId
;
255 if (logicalProcessorId
< machine
->logicalProcessors
.size())
257 // Cross-assign data for this hwthread to the logicalprocess vector
258 machine
->logicalProcessors
[logicalProcessorId
].socketRankInMachine
= static_cast<int>(i
);
259 machine
->logicalProcessors
[logicalProcessorId
].coreRankInSocket
= static_cast<int>(j
);
260 machine
->logicalProcessors
[logicalProcessorId
].hwThreadRankInCore
= static_cast<int>(k
);
261 machine
->logicalProcessors
[logicalProcessorId
].numaNodeId
= -1;
277 machine
->logicalProcessors
.clear();
278 machine
->sockets
.clear();
283 /*! \brief Read cache information from hwloc topology
285 * \param topo hwloc topology handle that has been initialized and loaded
286 * \param machine Pointer to the machine structure in the HardwareTopology
287 * class, where cache data will be filled.
289 * \return If any cache data is found the return value is 0, otherwise non-zero.
292 parseHwLocCache(const hwloc_topology_t topo
,
293 HardwareTopology::Machine
* machine
)
295 // Parse caches up to L5
296 for (int cachelevel
: { 1, 2, 3, 4, 5})
298 int depth
= hwloc_get_cache_type_depth(topo
, cachelevel
, HWLOC_OBJ_CACHE_DATA
);
302 hwloc_obj_t cache
= hwloc_get_next_obj_by_depth(topo
, depth
, nullptr);
303 if (cache
!= nullptr)
305 std::vector
<hwloc_obj_t
> hwThreads
= getHwLocDescendantsByType(cache
, HWLOC_OBJ_PU
);
307 machine
->caches
.push_back( {
308 static_cast<int>(cache
->attr
->cache
.depth
),
309 static_cast<std::size_t>(cache
->attr
->cache
.size
),
310 static_cast<int>(cache
->attr
->cache
.linesize
),
311 static_cast<int>(cache
->attr
->cache
.associativity
),
312 std::max(static_cast<int>(hwThreads
.size()), 1)
317 return machine
->caches
.empty();
321 /*! \brief Read numa information from hwloc topology
323 * \param topo hwloc topology handle that has been initialized and loaded
324 * \param machine Pointer to the machine structure in the HardwareTopology
325 * class, where numa information will be filled.
327 * Hwloc should virtually always be able to detect numa information, but if
328 * there is only a single numa node in the system it is not reported at all.
329 * In this case we create a single numa node covering all cores.
331 * This function uses the basic socket/core/thread information detected by
332 * parseHwLocSocketsCoresThreads(), which means that routine must have
333 * completed successfully before calling this one. If this is not the case,
334 * you will get an error return code.
336 * \return If the data found makes sense (either in the numa node or the
337 * entire machine) the return value is 0, otherwise non-zero.
340 parseHwLocNuma(const hwloc_topology_t topo
,
341 HardwareTopology::Machine
* machine
)
343 const hwloc_obj_t root
= hwloc_get_root_obj(topo
);
344 std::vector
<hwloc_obj_t
> hwlocNumaNodes
= getHwLocDescendantsByType(root
, HWLOC_OBJ_NUMANODE
);
345 bool topologyOk
= true;
347 if (!hwlocNumaNodes
.empty())
349 machine
->numa
.nodes
.resize(hwlocNumaNodes
.size());
351 for (std::size_t i
= 0; i
< hwlocNumaNodes
.size(); i
++)
353 machine
->numa
.nodes
[i
].id
= hwlocNumaNodes
[i
]->logical_index
;
354 machine
->numa
.nodes
[i
].memory
= hwlocNumaNodes
[i
]->memory
.total_memory
;
355 machine
->numa
.nodes
[i
].logicalProcessorId
.clear();
357 // Get list of PUs in this numa node
358 std::vector
<hwloc_obj_t
> hwlocPUs
= getHwLocDescendantsByType(hwlocNumaNodes
[i
], HWLOC_OBJ_PU
);
360 for (auto &p
: hwlocPUs
)
362 machine
->numa
.nodes
[i
].logicalProcessorId
.push_back(p
->os_index
);
364 GMX_RELEASE_ASSERT(p
->os_index
< machine
->logicalProcessors
.size(), "OS index of PU in hwloc larger than processor count");
366 machine
->logicalProcessors
[p
->os_index
].numaNodeId
= static_cast<int>(i
);
367 std::size_t s
= machine
->logicalProcessors
[p
->os_index
].socketRankInMachine
;
368 std::size_t c
= machine
->logicalProcessors
[p
->os_index
].coreRankInSocket
;
370 GMX_RELEASE_ASSERT(s
< machine
->sockets
.size(), "Socket index in logicalProcessors larger than socket count");
371 GMX_RELEASE_ASSERT(c
< machine
->sockets
[s
].cores
.size(), "Core index in logicalProcessors larger than core count");
372 // Set numaNodeId in core too
373 machine
->sockets
[s
].cores
[c
].numaNodeId
= i
;
377 int depth
= hwloc_get_type_depth(topo
, HWLOC_OBJ_NUMANODE
);
378 const struct hwloc_distances_s
* dist
= hwloc_get_whole_distance_matrix_by_depth(topo
, depth
);
379 if (dist
!= nullptr && dist
->nbobjs
== hwlocNumaNodes
.size())
381 machine
->numa
.baseLatency
= dist
->latency_base
;
382 machine
->numa
.maxRelativeLatency
= dist
->latency_max
;
383 machine
->numa
.relativeLatency
.resize(dist
->nbobjs
);
384 for (std::size_t i
= 0; i
< dist
->nbobjs
; i
++)
386 machine
->numa
.relativeLatency
[i
].resize(dist
->nbobjs
);
387 for (std::size_t j
= 0; j
< dist
->nbobjs
; j
++)
389 machine
->numa
.relativeLatency
[i
][j
] = dist
->latency
[i
*dist
->nbobjs
+j
];
400 // No numa nodes found. Use the entire machine as a numa node.
401 const hwloc_obj_t hwlocMachine
= hwloc_get_next_obj_by_type(topo
, HWLOC_OBJ_MACHINE
, nullptr);
403 if (hwlocMachine
!= nullptr)
405 machine
->numa
.nodes
.resize(1);
406 machine
->numa
.nodes
[0].id
= 0;
407 machine
->numa
.nodes
[0].memory
= hwlocMachine
->memory
.total_memory
;
408 machine
->numa
.baseLatency
= 10;
409 machine
->numa
.maxRelativeLatency
= 1;
410 machine
->numa
.relativeLatency
= { { 1.0 } };
412 for (int i
= 0; i
< machine
->logicalProcessorCount
; i
++)
414 machine
->numa
.nodes
[0].logicalProcessorId
.push_back(i
);
416 for (auto &l
: machine
->logicalProcessors
)
420 for (auto &s
: machine
->sockets
)
422 for (auto &c
: s
.cores
)
440 machine
->numa
.nodes
.clear();
446 /*! \brief Read PCI device information from hwloc topology
448 * \param topo hwloc topology handle that has been initialized and loaded
449 * \param machine Pointer to the machine structure in the HardwareTopology
450 * class, where PCI device information will be filled.
452 * \return If any devices were found the return value is 0, otherwise non-zero.
455 parseHwLocDevices(const hwloc_topology_t topo
,
456 HardwareTopology::Machine
* machine
)
458 const hwloc_obj_t root
= hwloc_get_root_obj(topo
);
459 std::vector
<hwloc_obj_t
> pcidevs
= getHwLocDescendantsByType(root
, HWLOC_OBJ_PCI_DEVICE
);
461 for (auto &p
: pcidevs
)
463 const hwloc_obj_t ancestor
= hwloc_get_ancestor_obj_by_type(topo
, HWLOC_OBJ_NUMANODE
, p
);
465 if (ancestor
!= nullptr)
467 numaId
= ancestor
->logical_index
;
471 // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
472 numaId
= (machine
->numa
.nodes
.size() == 1) ? 0 : -1;
475 GMX_RELEASE_ASSERT(p
->attr
, "Attributes should not be NULL for hwloc PCI object");
477 machine
->devices
.push_back( {
478 p
->attr
->pcidev
.vendor_id
,
479 p
->attr
->pcidev
.device_id
,
480 p
->attr
->pcidev
.class_id
,
481 p
->attr
->pcidev
.domain
,
484 p
->attr
->pcidev
.func
,
488 return pcidevs
.empty();
492 parseHwLoc(HardwareTopology::Machine
* machine
,
493 HardwareTopology::SupportLevel
* supportLevel
,
496 hwloc_topology_t topo
;
498 // Initialize a hwloc object, set flags to request IO device information too,
499 // try to load the topology, and get the root object. If either step fails,
500 // return that we do not have any support at all from hwloc.
501 if (hwloc_topology_init(&topo
) != 0)
503 hwloc_topology_destroy(topo
);
504 return; // SupportLevel::None.
507 hwloc_topology_set_flags(topo
, HWLOC_TOPOLOGY_FLAG_IO_DEVICES
);
509 if (hwloc_topology_load(topo
) != 0 || hwloc_get_root_obj(topo
) == nullptr)
511 hwloc_topology_destroy(topo
);
512 return; // SupportLevel::None.
515 // If we get here, we can get a valid root object for the topology
516 *isThisSystem
= hwloc_topology_is_thissystem(topo
);
518 // Parse basic information about sockets, cores, and hardware threads
519 if (parseHwLocSocketsCoresThreads(topo
, machine
) == 0)
521 *supportLevel
= HardwareTopology::SupportLevel::Basic
;
525 hwloc_topology_destroy(topo
);
526 return; // SupportLevel::None.
529 // Get information about cache and numa nodes
530 if (parseHwLocCache(topo
, machine
) == 0 && parseHwLocNuma(topo
, machine
) == 0)
532 *supportLevel
= HardwareTopology::SupportLevel::Full
;
536 hwloc_topology_destroy(topo
);
537 return; // SupportLevel::Basic.
541 if (parseHwLocDevices(topo
, machine
) == 0)
543 *supportLevel
= HardwareTopology::SupportLevel::FullWithDevices
;
546 hwloc_topology_destroy(topo
);
547 return; // SupportLevel::Full or SupportLevel::FullWithDevices.
552 /*! \brief Try to detect the number of logical processors.
554 * \return The number of hardware processing units, or 0 if it fails.
557 detectLogicalProcessorCount()
562 #if GMX_NATIVE_WINDOWS
565 GetSystemInfo( &sysinfo
);
566 count
= sysinfo
.dwNumberOfProcessors
;
567 #elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
568 // We are probably on Unix. Check if we have the argument to use before executing any calls
569 count
= sysconf(_SC_NPROCESSORS_ONLN
);
571 count
= 0; // Neither windows nor Unix.
578 } // namespace anonymous
581 HardwareTopology
HardwareTopology::detect()
583 HardwareTopology result
;
586 parseHwLoc(&result
.machine_
, &result
.supportLevel_
, &result
.isThisSystem_
);
589 // If something went wrong in hwloc (or if it was not present) we might
590 // have more information in cpuInfo
591 if (result
.supportLevel_
< SupportLevel::Basic
)
593 // There might be topology information in cpuInfo
594 parseCpuInfo(&result
.machine_
, &result
.supportLevel_
);
596 // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
597 if (result
.supportLevel_
== SupportLevel::None
)
599 // No topology information; try to detect the number of logical processors at least
600 result
.machine_
.logicalProcessorCount
= detectLogicalProcessorCount();
601 if (result
.machine_
.logicalProcessorCount
> 0)
603 result
.supportLevel_
= SupportLevel::LogicalProcessorCount
;
609 HardwareTopology::Machine::Machine()
611 logicalProcessorCount
= 0;
612 numa
.baseLatency
= 0.0;
613 numa
.maxRelativeLatency
= 0.0;
617 HardwareTopology::HardwareTopology()
618 : supportLevel_(SupportLevel::None
),
624 HardwareTopology::HardwareTopology(int logicalProcessorCount
)
625 : supportLevel_(SupportLevel::None
),
629 if (logicalProcessorCount
> 0)
631 machine_
.logicalProcessorCount
= logicalProcessorCount
;
632 supportLevel_
= SupportLevel::LogicalProcessorCount
;
636 int HardwareTopology::numberOfCores() const
638 if (supportLevel() >= SupportLevel::Basic
)
640 // We assume all sockets have the same number of cores as socket 0.
641 // Since topology information is present, we can assume there is at least one socket.
642 return machine().sockets
.size() * machine().sockets
[0].cores
.size();
644 else if (supportLevel() >= SupportLevel::LogicalProcessorCount
)
646 return machine().logicalProcessorCount
;