src/gromacs/hardware/hardwaretopology.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements gmx::HardwareTopology.
  39  *
  40  * \author Erik Lindahl <erik.lindahl@gmail.com>
  41  * \ingroup module_hardware
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "hardwaretopology.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <vector>
  54
  55 #if GMX_HWLOC
  56 #    include <hwloc.h>
  57 #endif
  58
  59 #include "gromacs/gmxlib/md_logging.h"
  60 #include "gromacs/hardware/cpuinfo.h"
  61 #include "gromacs/utility/gmxassert.h"
  62
  63 #ifdef HAVE_UNISTD_H
  64 #    include <unistd.h>       // sysconf()
  65 #endif
  66 #if GMX_NATIVE_WINDOWS
  67 #    include <windows.h>      // GetSystemInfo()
  68 #endif
  69
  70 #if defined(_M_ARM) || defined(__arm__) || defined(__ARM_ARCH) || defined (__aarch64__)
  71 //! Constant used to help minimize preprocessed code
  72 static const bool isArm = true;
  73 #else
  74 //! Constant used to help minimize preprocessed code
  75 static const bool isArm = false;
  76 #endif
  77
  78 namespace gmx
  79 {
  80
  81 namespace
  82 {
  83
  84 /*****************************************************************************
  85  *                                                                           *
  86  *   Utility functions for extracting hardware topology from CpuInfo object  *
  87  *                                                                           *
  88  *****************************************************************************/
  89
  90 /*! \brief Initialize machine data from basic information in cpuinfo
  91  *
  92  *  \param  machine      Machine tree structure where information will be assigned
  93  *                       if the cpuinfo object contains topology information.
  94  *  \param  supportLevel If topology information is available in CpuInfo,
  95  *                       this will be updated to reflect the amount of
  96  *                       information written to the machine structure.
  97  */
  98 void
  99 parseCpuInfo(HardwareTopology::Machine *        machine,
 100              HardwareTopology::SupportLevel *   supportLevel)
 101 {
 102     CpuInfo cpuInfo(CpuInfo::detect());
 103
 104     if (!cpuInfo.logicalProcessors().empty())
 105     {
 106         int nSockets   = 0;
 107         int nCores     = 0;
 108         int nHwThreads = 0;
 109
 110         // Copy the logical processor information from cpuinfo
 111         for (auto &l : cpuInfo.logicalProcessors())
 112         {
 113             machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
 114             nSockets   = std::max(nSockets, l.socketRankInMachine);
 115             nCores     = std::max(nCores, l.coreRankInSocket);
 116             nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
 117         }
 118
 119         // Fill info form sockets/cores/hwthreads
 120         int socketId   = 0;
 121         int coreId     = 0;
 122         int hwThreadId = 0;
 123
 124         machine->sockets.resize(nSockets + 1);
 125         for (auto &s : machine->sockets)
 126         {
 127             s.id = socketId++;
 128             s.cores.resize(nCores + 1);
 129             for (auto &c : s.cores)
 130             {
 131                 c.id         = coreId++;
 132                 c.numaNodeId = -1; // No numa information
 133                 c.hwThreads.resize(nHwThreads + 1);
 134                 for (auto &t : c.hwThreads)
 135                 {
 136                     t.id                 = hwThreadId++;
 137                     t.logicalProcessorId = -1; // set as unassigned for now
 138                 }
 139             }
 140         }
 141
 142         // Fill the logical processor id in the right place
 143         for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
 144         {
 145             const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
 146             machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
 147         }
 148         machine->logicalProcessorCount = machine->logicalProcessors.size();
 149         *supportLevel                  = HardwareTopology::SupportLevel::Basic;
 150     }
 151     else
 152     {
 153         *supportLevel = HardwareTopology::SupportLevel::None;
 154     }
 155 }
 156
 157 #if GMX_HWLOC
 158
 159 #if HWLOC_API_VERSION < 0x00010b00
 160 #    define HWLOC_OBJ_PACKAGE  HWLOC_OBJ_SOCKET
 161 #    define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 162 #endif
 163
 164 /*****************************************************************************
 165  *                                                                           *
 166  *   Utility functions for extracting hardware topology from hwloc library   *
 167  *                                                                           *
 168  *****************************************************************************/
 169
 170 /*! \brief Return vector of all descendants of a given type in hwloc topology
 171  *
 172  *  \param obj   Non-null hwloc object.
 173  *  \param type  hwloc object type to find. The routine will only search
 174  *               on levels below obj.
 175  *
 176  *  \return vector containing all the objects of given type that are
 177  *          descendants of the provided object. If no objects of this type
 178  *          were found, the vector will be empty.
 179  */
 180 const std::vector<hwloc_obj_t>
 181 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
 182 {
 183     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 184
 185     std::vector<hwloc_obj_t> v;
 186
 187     // Go through children; if this object has no children obj->arity is 0,
 188     // and we'll return an empty vector.
 189     for (std::size_t i = 0; i < obj->arity; i++)
 190     {
 191         // If the child is the type we're looking for, add it directly.
 192         // Otherwise call this routine recursively for each child.
 193         if (obj->children[i]->type == type)
 194         {
 195             v.push_back(obj->children[i]);
 196         }
 197         else
 198         {
 199             std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
 200             v.insert(v.end(), v2.begin(), v2.end());
 201         }
 202     }
 203     return v;
 204 }
 205
 206 /*! \brief Read information about sockets, cores and threads from hwloc topology
 207  *
 208  *  \param topo    hwloc topology handle that has been initialized and loaded
 209  *  \param machine Pointer to the machine structure in the HardwareTopology
 210  *                 class, where the tree of sockets/cores/threads will be written.
 211  *
 212  *  \return If all the data is found the return value is 0, otherwise non-zero.
 213  */
 214 int
 215 parseHwLocSocketsCoresThreads(const hwloc_topology_t             topo,
 216                               HardwareTopology::Machine *        machine)
 217 {
 218     const hwloc_obj_t              root         = hwloc_get_root_obj(topo);
 219     std::vector<hwloc_obj_t>       hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
 220
 221     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
 222     machine->logicalProcessors.resize(machine->logicalProcessorCount);
 223     machine->sockets.resize(hwlocSockets.size());
 224
 225     bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
 226
 227     for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
 228     {
 229         // Assign information about this socket
 230         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 231
 232         // Get children (cores)
 233         std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
 234         machine->sockets[i].cores.resize(hwlocCores.size());
 235
 236         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
 237
 238         // Loop over child cores
 239         for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
 240         {
 241             // Assign information about this core
 242             machine->sockets[i].cores[j].id         = hwlocCores[j]->logical_index;
 243             machine->sockets[i].cores[j].numaNodeId = -1;
 244
 245             // Get children (hwthreads)
 246             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
 247             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 248
 249             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
 250
 251             // Loop over child hwthreads
 252             for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
 253             {
 254                 // Assign information about this hwthread
 255                 std::size_t logicalProcessorId                               = hwlocPUs[k]->os_index;
 256                 machine->sockets[i].cores[j].hwThreads[k].id                 = hwlocPUs[k]->logical_index;
 257                 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
 258
 259                 if (logicalProcessorId < machine->logicalProcessors.size())
 260                 {
 261                     // Cross-assign data for this hwthread to the logicalprocess vector
 262                     machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
 263                     machine->logicalProcessors[logicalProcessorId].coreRankInSocket    = static_cast<int>(j);
 264                     machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore  = static_cast<int>(k);
 265                     machine->logicalProcessors[logicalProcessorId].numaNodeId          = -1;
 266                 }
 267                 else
 268                 {
 269                     topologyOk = false;
 270                 }
 271             }
 272         }
 273     }
 274
 275     if (topologyOk)
 276     {
 277         return 0;
 278     }
 279     else
 280     {
 281         machine->logicalProcessors.clear();
 282         machine->sockets.clear();
 283         return -1;
 284     }
 285 }
 286
 287 /*! \brief Read cache information from hwloc topology
 288  *
 289  *  \param topo    hwloc topology handle that has been initialized and loaded
 290  *  \param machine Pointer to the machine structure in the HardwareTopology
 291  *                 class, where cache data will be filled.
 292  *
 293  *  \return If any cache data is found the return value is 0, otherwise non-zero.
 294  */
 295 int
 296 parseHwLocCache(const hwloc_topology_t             topo,
 297                 HardwareTopology::Machine *        machine)
 298 {
 299     // Parse caches up to L5
 300     for (int cachelevel : { 1, 2, 3, 4, 5})
 301     {
 302         int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
 303
 304         if (depth >= 0)
 305         {
 306             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
 307             if (cache != NULL)
 308             {
 309                 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
 310
 311                 machine->caches.push_back( {
 312                                                static_cast<int>(cache->attr->cache.depth),
 313                                                static_cast<std::size_t>(cache->attr->cache.size),
 314                                                static_cast<int>(cache->attr->cache.linesize),
 315                                                static_cast<int>(cache->attr->cache.associativity),
 316                                                std::max(static_cast<int>(hwThreads.size()), 1)
 317                                            } );
 318             }
 319         }
 320     }
 321     return machine->caches.empty();
 322 }
 323
 324
 325 /*! \brief Read numa information from hwloc topology
 326  *
 327  *  \param topo    hwloc topology handle that has been initialized and loaded
 328  *  \param machine Pointer to the machine structure in the HardwareTopology
 329  *                 class, where numa information will be filled.
 330  *
 331  *  Hwloc should virtually always be able to detect numa information, but if
 332  *  there is only a single numa node in the system it is not reported at all.
 333  *  In this case we create a single numa node covering all cores.
 334  *
 335  *  This function uses the basic socket/core/thread information detected by
 336  *  parseHwLocSocketsCoresThreads(), which means that routine must have
 337  *  completed successfully before calling this one. If this is not the case,
 338  *  you will get an error return code.
 339  *
 340  *  \return If the data found makes sense (either in the numa node or the
 341  *          entire machine) the return value is 0, otherwise non-zero.
 342  */
 343 int
 344 parseHwLocNuma(const hwloc_topology_t             topo,
 345                HardwareTopology::Machine *        machine)
 346 {
 347     const hwloc_obj_t        root           = hwloc_get_root_obj(topo);
 348     std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
 349     bool                     topologyOk     = true;
 350
 351     if (!hwlocNumaNodes.empty())
 352     {
 353         machine->numa.nodes.resize(hwlocNumaNodes.size());
 354
 355         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
 356         {
 357             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
 358             machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
 359             machine->numa.nodes[i].logicalProcessorId.clear();
 360
 361             // Get list of PUs in this numa node
 362             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
 363
 364             for (auto &p : hwlocPUs)
 365             {
 366                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
 367
 368                 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
 369
 370                 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
 371                 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
 372                 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
 373
 374                 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
 375                 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
 376                 // Set numaNodeId in core too
 377                 machine->sockets[s].cores[c].numaNodeId = i;
 378             }
 379         }
 380
 381         int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
 382         const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
 383         if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
 384         {
 385             machine->numa.baseLatency        = dist->latency_base;
 386             machine->numa.maxRelativeLatency = dist->latency_max;
 387             machine->numa.relativeLatency.resize(dist->nbobjs);
 388             for (std::size_t i = 0; i < dist->nbobjs; i++)
 389             {
 390                 machine->numa.relativeLatency[i].resize(dist->nbobjs);
 391                 for (std::size_t j = 0; j < dist->nbobjs; j++)
 392                 {
 393                     machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
 394                 }
 395             }
 396         }
 397         else
 398         {
 399             topologyOk = false;
 400         }
 401     }
 402     else
 403     {
 404         // No numa nodes found. Use the entire machine as a numa node.
 405         const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
 406
 407         if (hwlocMachine != NULL)
 408         {
 409             machine->numa.nodes.resize(1);
 410             machine->numa.nodes[0].id           = 0;
 411             machine->numa.nodes[0].memory       = hwlocMachine->memory.total_memory;
 412             machine->numa.baseLatency           = 10;
 413             machine->numa.maxRelativeLatency    = 1;
 414             machine->numa.relativeLatency       = { { 1.0 } };
 415
 416             for (int i = 0; i < machine->logicalProcessorCount; i++)
 417             {
 418                 machine->numa.nodes[0].logicalProcessorId.push_back(i);
 419             }
 420             for (auto &l : machine->logicalProcessors)
 421             {
 422                 l.numaNodeId = 0;
 423             }
 424             for (auto &s : machine->sockets)
 425             {
 426                 for (auto &c : s.cores)
 427                 {
 428                     c.numaNodeId = 0;
 429                 }
 430             }
 431         }
 432         else
 433         {
 434             topologyOk = false;
 435         }
 436     }
 437
 438     if (topologyOk)
 439     {
 440         return 0;
 441     }
 442     else
 443     {
 444         machine->numa.nodes.clear();
 445         return -1;
 446     }
 447
 448 }
 449
 450 /*! \brief Read PCI device information from hwloc topology
 451  *
 452  *  \param topo    hwloc topology handle that has been initialized and loaded
 453  *  \param machine Pointer to the machine structure in the HardwareTopology
 454  *                 class, where PCI device information will be filled.
 455  * *
 456  *  \return If any devices were found the return value is 0, otherwise non-zero.
 457  */
 458 int
 459 parseHwLocDevices(const hwloc_topology_t             topo,
 460                   HardwareTopology::Machine *        machine)
 461 {
 462     const hwloc_obj_t        root    = hwloc_get_root_obj(topo);
 463     std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
 464
 465     for (auto &p : pcidevs)
 466     {
 467         const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
 468         int               numaId;
 469         if (ancestor != NULL)
 470         {
 471             numaId = ancestor->logical_index;
 472         }
 473         else
 474         {
 475             // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
 476             numaId = (machine->numa.nodes.size() == 1) ?  0 : -1;
 477         }
 478
 479         GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
 480
 481         machine->devices.push_back( {
 482                                         p->attr->pcidev.vendor_id,
 483                                         p->attr->pcidev.device_id,
 484                                         p->attr->pcidev.class_id,
 485                                         p->attr->pcidev.domain,
 486                                         p->attr->pcidev.bus,
 487                                         p->attr->pcidev.dev,
 488                                         p->attr->pcidev.func,
 489                                         numaId
 490                                     } );
 491     }
 492     return pcidevs.empty();
 493 }
 494
 495 void
 496 parseHwLoc(HardwareTopology::Machine *        machine,
 497            HardwareTopology::SupportLevel *   supportLevel,
 498            bool *                             isThisSystem)
 499 {
 500     hwloc_topology_t    topo;
 501
 502     // Initialize a hwloc object, set flags to request IO device information too,
 503     // try to load the topology, and get the root object. If either step fails,
 504     // return that we do not have any support at all from hwloc.
 505     if (hwloc_topology_init(&topo) != 0)
 506     {
 507         hwloc_topology_destroy(topo);
 508         return; // SupportLevel::None.
 509     }
 510
 511     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 512
 513     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
 514     {
 515         hwloc_topology_destroy(topo);
 516         return; // SupportLevel::None.
 517     }
 518
 519     // If we get here, we can get a valid root object for the topology
 520     *isThisSystem = hwloc_topology_is_thissystem(topo);
 521
 522     // Parse basic information about sockets, cores, and hardware threads
 523     if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
 524     {
 525         *supportLevel = HardwareTopology::SupportLevel::Basic;
 526     }
 527     else
 528     {
 529         hwloc_topology_destroy(topo);
 530         return; // SupportLevel::None.
 531     }
 532
 533     // Get information about cache and numa nodes
 534     if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
 535     {
 536         *supportLevel = HardwareTopology::SupportLevel::Full;
 537     }
 538     else
 539     {
 540         hwloc_topology_destroy(topo);
 541         return; // SupportLevel::Basic.
 542     }
 543
 544     // PCI devices
 545     if (parseHwLocDevices(topo, machine) == 0)
 546     {
 547         *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
 548     }
 549
 550     hwloc_topology_destroy(topo);
 551     return; // SupportLevel::Full or SupportLevel::FullWithDevices.
 552 }
 553
 554 #endif
 555
 556 /*! \brief Try to detect the number of logical processors.
 557  *
 558  *  \return The number of hardware processing units, or 0 if it fails.
 559  */
 560 int
 561 detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr)
 562 {
 563     int count = 0;
 564
 565     {
 566 #if GMX_NATIVE_WINDOWS
 567         // Windows
 568         SYSTEM_INFO sysinfo;
 569         GetSystemInfo( &sysinfo );
 570         count = sysinfo.dwNumberOfProcessors;
 571 #elif defined HAVE_SYSCONF
 572         // We are probably on Unix. Check if we have the argument to use before executing any calls
 573 #    if defined(_SC_NPROCESSORS_CONF)
 574         count = sysconf(_SC_NPROCESSORS_CONF);
 575 #        if defined(_SC_NPROCESSORS_ONLN)
 576         /* On e.g. Arm, the Linux kernel can use advanced power saving features where
 577          * processors are brought online/offline dynamically. This will cause
 578          * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
 579          * reason we now warn if this mismatches with the detected core count. */
 580         int countOnline = sysconf(_SC_NPROCESSORS_ONLN);
 581         if (count != countOnline)
 582         {
 583             /* We assume that this scenario means that the kernel has
 584                disabled threads or cores, and that the only safe course is
 585                to assume that _SC_NPROCESSORS_ONLN should be used. Even
 586                this may not be valid if running in a containerized
 587                environment, such system calls may read from
 588                /sys/devices/system/cpu and report what the OS sees, rather
 589                than what the container cgroup is supposed to set up as
 590                limits. But we're not sure right now whether there's any
 591                (standard-ish) way to handle that.
 592
 593                On ARM, the kernel may have powered down the cores,
 594                which we'll warn the user about now. On x86, this
 595                means HT is disabled by the kernel, not in the
 596                BIOS. We're not sure what it means on other
 597                architectures, or even if it is possible, because
 598                sysconf is rather non-standardized. */
 599             if (isArm)
 600             {
 601                 md_print_warn(cr, fplog,
 602                               "%d CPUs configured, but only %d of them are online.\n"
 603                               "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
 604                               "off to save power, and will turn them back on later when the load increases.\n"
 605                               "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
 606                               "will likely see much better performance by forcing all cores to be online, and\n"
 607                               "making sure they run at their full clock frequency.", count, countOnline);
 608             }
 609             else
 610             {
 611                 md_print_warn(cr, fplog,
 612                               "Note: %d CPUs configured, but only %d of them are online, so GROMACS will use the latter.",
 613                               count, countOnline);
 614                 // We use the online count to avoid (potential) oversubscription.
 615                 count = countOnline;
 616             }
 617         }
 618 #        endif
 619 #    elif defined(_SC_NPROC_CONF)
 620         count = sysconf(_SC_NPROC_CONF);
 621 #    elif defined(_SC_NPROCESSORS_ONLN)
 622         count = sysconf(_SC_NPROCESSORS_ONLN);
 623 #    elif defined(_SC_NPROC_ONLN)
 624         count = sysconf(_SC_NPROC_ONLN);
 625 #    else
 626 #       warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
 627 #    endif      // End of check for sysconf argument values
 628
 629 #else
 630         count = 0; // Neither windows nor Unix.
 631 #endif
 632     }
 633
 634     GMX_UNUSED_VALUE(cr);
 635     GMX_UNUSED_VALUE(fplog);
 636     return count;
 637 }
 638
 639 }   // namespace anonymous
 640
 641 // static
 642 HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr)
 643 {
 644     HardwareTopology result;
 645
 646     // Default values for machine and numa stuff
 647     result.machine_.logicalProcessorCount   = 0;
 648     result.machine_.numa.baseLatency        = 0.0;
 649     result.machine_.numa.maxRelativeLatency = 0.0;
 650     result.supportLevel_                    = SupportLevel::None;
 651     result.isThisSystem_                    = true;
 652
 653 #if GMX_HWLOC
 654     parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
 655 #endif
 656
 657     // If something went wrong in hwloc (or if it was not present) we might
 658     // have more information in cpuInfo
 659     if (result.supportLevel_ < SupportLevel::Basic)
 660     {
 661         // There might be topology information in cpuInfo
 662         parseCpuInfo(&result.machine_, &result.supportLevel_);
 663     }
 664     // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
 665     if (result.supportLevel_ == SupportLevel::None)
 666     {
 667         // No topology information; try to detect the number of logical processors at least
 668         result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr);
 669         if (result.machine_.logicalProcessorCount > 0)
 670         {
 671             result.supportLevel_ = SupportLevel::LogicalProcessorCount;
 672         }
 673     }
 674     return result;
 675 }
 676
 677
 678 HardwareTopology::HardwareTopology()
 679     : supportLevel_(SupportLevel::None)
 680 {
 681 }
 682
 683 int HardwareTopology::numberOfCores() const
 684 {
 685     if (supportLevel() >= SupportLevel::Basic)
 686     {
 687         // We assume all sockets have the same number of cores as socket 0.
 688         // Since topology information is present, we can assume there is at least one socket.
 689         return machine().sockets.size() * machine().sockets[0].cores.size();
 690     }
 691     else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
 692     {
 693         return machine().logicalProcessorCount;
 694     }
 695     else
 696     {
 697         return 0;
 698     }
 699 }
 700
 701 } // namespace gmx