2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "printhardware.h"
47 #include "gromacs/hardware/cpuinfo.h"
48 #include "gromacs/hardware/device_management.h"
49 #include "gromacs/hardware/hardwaretopology.h"
50 #include "gromacs/hardware/hw_info.h"
51 #include "gromacs/hardware/identifyavx512fmaunits.h"
52 #include "gromacs/simd/support.h"
53 #include "gromacs/utility/basedefinitions.h"
54 #include "gromacs/utility/basenetwork.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/gmxmpi.h"
58 #include "gromacs/utility/logger.h"
59 #include "gromacs/utility/programcontext.h"
60 #include "gromacs/utility/stringutil.h"
61 #include "gromacs/utility/sysinfo.h"
63 //! Constant used to help minimize preprocessed code
64 static constexpr bool bGPUBinary
= (GMX_GPU
!= 0);
67 * Returns the GPU information text, one GPU per line.
69 static std::string
sprint_gpus(const gmx_gpu_info_t
& gpu_info
)
72 std::vector
<std::string
> gpuStrings
;
73 for (int i
= 0; i
< gpu_info
.n_dev
; i
++)
75 get_gpu_device_info_string(stmp
, gpu_info
, i
);
76 gpuStrings
.push_back(gmx::formatString(" %s", stmp
));
78 return gmx::joinStrings(gpuStrings
, "\n");
81 /* Give a suitable fatal error or warning if the build configuration
82 and runtime CPU do not match. */
83 static void check_use_of_rdtscp_on_this_cpu(const gmx::MDLogger
& mdlog
, const gmx::CpuInfo
& cpuInfo
)
85 bool binaryUsesRdtscp
= GMX_USE_RDTSCP
;
87 const char* programName
= gmx::getProgramContext().displayName();
89 if (cpuInfo
.supportLevel() < gmx::CpuInfo::SupportLevel::Features
)
93 GMX_LOG(mdlog
.warning
)
96 "The %s executable was compiled to use the rdtscp CPU instruction. "
97 "We cannot detect the features of your current CPU, but will proceed "
99 "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake "
106 bool cpuHasRdtscp
= cpuInfo
.feature(gmx::CpuInfo::Feature::X86_Rdtscp
);
108 if (!cpuHasRdtscp
&& binaryUsesRdtscp
)
111 "The %s executable was compiled to use the rdtscp CPU instruction. "
112 "However, this is not supported by the current hardware and continuing would "
114 "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
118 if (cpuHasRdtscp
&& !binaryUsesRdtscp
)
120 GMX_LOG(mdlog
.warning
)
122 .appendTextFormatted(
123 "The current CPU can measure timings more accurately than the code in\n"
124 "%s was configured to use. This might affect your simulation\n"
125 "speed as accurate timings are needed for load-balancing.\n"
126 "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake "
128 programName
, programName
);
133 static std::string
detected_hardware_string(const gmx_hw_info_t
* hwinfo
, bool bFullCpuInfo
)
137 const gmx::CpuInfo
& cpuInfo
= *hwinfo
->cpuInfo
;
138 const gmx::HardwareTopology
& hwTop
= *hwinfo
->hardwareTopology
;
140 s
= gmx::formatString("\n");
141 s
+= gmx::formatString("Running on %d node%s with total", hwinfo
->nphysicalnode
,
142 hwinfo
->nphysicalnode
== 1 ? "" : "s");
143 if (hwinfo
->ncore_tot
> 0)
145 s
+= gmx::formatString(" %d cores,", hwinfo
->ncore_tot
);
147 s
+= gmx::formatString(" %d logical cores", hwinfo
->nhwthread_tot
);
148 if (hwinfo
->gpu_info
.bDetectGPUs
)
150 s
+= gmx::formatString(", %d compatible GPU%s", hwinfo
->ngpu_compatible_tot
,
151 hwinfo
->ngpu_compatible_tot
== 1 ? "" : "s");
155 s
+= gmx::formatString(" (GPU detection deactivated)");
157 s
+= gmx::formatString("\n");
159 if (hwinfo
->nphysicalnode
> 1)
161 /* Print per node hardware feature counts */
162 if (hwinfo
->ncore_max
> 0)
164 s
+= gmx::formatString(" Cores per node: %2d", hwinfo
->ncore_min
);
165 if (hwinfo
->ncore_max
> hwinfo
->ncore_min
)
167 s
+= gmx::formatString(" - %2d", hwinfo
->ncore_max
);
169 s
+= gmx::formatString("\n");
171 s
+= gmx::formatString(" Logical cores per node: %2d", hwinfo
->nhwthread_min
);
172 if (hwinfo
->nhwthread_max
> hwinfo
->nhwthread_min
)
174 s
+= gmx::formatString(" - %2d", hwinfo
->nhwthread_max
);
176 s
+= gmx::formatString("\n");
179 s
+= gmx::formatString(" Compatible GPUs per node: %2d", hwinfo
->ngpu_compatible_min
);
180 if (hwinfo
->ngpu_compatible_max
> hwinfo
->ngpu_compatible_min
)
182 s
+= gmx::formatString(" - %2d", hwinfo
->ngpu_compatible_max
);
184 s
+= gmx::formatString("\n");
185 if (hwinfo
->ngpu_compatible_tot
> 0)
187 if (hwinfo
->bIdenticalGPUs
)
189 s
+= gmx::formatString(" All nodes have identical type(s) of GPUs\n");
193 /* This message will also appear with identical GPU types
194 * when at least one node has no GPU.
196 s
+= gmx::formatString(
197 " Different nodes have different type(s) and/or order of GPUs\n");
207 gmx_gethostname(host
, STRLEN
);
209 MPI_Comm_rank(MPI_COMM_WORLD
, &rank
);
211 // TODO Use a wrapper around MPI_Get_processor_name instead.
212 s
+= gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n", host
, rank
);
214 s
+= gmx::formatString("Hardware detected:\n");
216 s
+= gmx::formatString(" CPU info:\n");
218 s
+= gmx::formatString(" Vendor: %s\n", cpuInfo
.vendorString().c_str());
220 s
+= gmx::formatString(" Brand: %s\n", cpuInfo
.brandString().c_str());
224 s
+= gmx::formatString(" Family: %d Model: %d Stepping: %d\n", cpuInfo
.family(),
225 cpuInfo
.model(), cpuInfo
.stepping());
227 s
+= gmx::formatString(" Features:");
228 for (auto& f
: cpuInfo
.featureSet())
230 s
+= gmx::formatString(" %s", gmx::CpuInfo::featureString(f
).c_str());
232 s
+= gmx::formatString("\n");
235 if (cpuInfo
.feature(gmx::CpuInfo::Feature::X86_Avx512F
))
237 int avx512fmaunits
= gmx::identifyAvx512FmaUnits();
238 s
+= gmx::formatString(" Number of AVX-512 FMA units:");
239 if (avx512fmaunits
> 0)
241 s
+= gmx::formatString(" %d", avx512fmaunits
);
242 if (avx512fmaunits
== 1)
244 s
+= gmx::formatString(" (AVX2 is faster w/o 2 AVX-512 FMA units)");
249 s
+= gmx::formatString(" Cannot run AVX-512 detection - assuming 2");
251 s
+= gmx::formatString("\n");
254 s
+= gmx::formatString(" Hardware topology: ");
255 switch (hwTop
.supportLevel())
257 case gmx::HardwareTopology::SupportLevel::None
: s
+= gmx::formatString("None\n"); break;
258 case gmx::HardwareTopology::SupportLevel::LogicalProcessorCount
:
259 s
+= gmx::formatString("Only logical processor count\n");
261 case gmx::HardwareTopology::SupportLevel::Basic
: s
+= gmx::formatString("Basic\n"); break;
262 case gmx::HardwareTopology::SupportLevel::Full
: s
+= gmx::formatString("Full\n"); break;
263 case gmx::HardwareTopology::SupportLevel::FullWithDevices
:
264 s
+= gmx::formatString("Full, with devices\n");
268 if (!hwTop
.isThisSystem())
270 s
+= gmx::formatString(" NOTE: Hardware topology cached or synthetic, not detected.\n");
271 if (char* p
= std::getenv("HWLOC_XMLFILE"))
273 s
+= gmx::formatString(" HWLOC_XMLFILE=%s\n", p
);
279 if (hwTop
.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic
)
281 s
+= gmx::formatString(" Sockets, cores, and logical processors:\n");
283 for (auto& socket
: hwTop
.machine().sockets
)
285 s
+= gmx::formatString(" Socket %2d:", socket
.id
);
286 for (auto& c
: socket
.cores
)
288 s
+= gmx::formatString(" [");
289 for (auto& t
: c
.hwThreads
)
291 s
+= gmx::formatString(" %3d", t
.logicalProcessorId
);
293 s
+= gmx::formatString("]");
295 s
+= gmx::formatString("\n");
298 if (hwTop
.supportLevel() >= gmx::HardwareTopology::SupportLevel::Full
)
300 s
+= gmx::formatString(" Numa nodes:\n");
301 for (auto& n
: hwTop
.machine().numa
.nodes
)
303 s
+= gmx::formatString(" Node %2d (%zu bytes mem):", n
.id
, n
.memory
);
304 for (auto& l
: n
.logicalProcessorId
)
306 s
+= gmx::formatString(" %3d", l
);
308 s
+= gmx::formatString("\n");
310 s
+= gmx::formatString(" Latency:\n ");
311 for (std::size_t j
= 0; j
< hwTop
.machine().numa
.nodes
.size(); j
++)
313 s
+= gmx::formatString(" %5zu", j
);
315 s
+= gmx::formatString("\n");
316 for (std::size_t i
= 0; i
< hwTop
.machine().numa
.nodes
.size(); i
++)
318 s
+= gmx::formatString(" %5zu", i
);
319 for (std::size_t j
= 0; j
< hwTop
.machine().numa
.nodes
.size(); j
++)
321 s
+= gmx::formatString(" %5.2f", hwTop
.machine().numa
.relativeLatency
[i
][j
]);
323 s
+= gmx::formatString("\n");
327 s
+= gmx::formatString(" Caches:\n");
328 for (auto& c
: hwTop
.machine().caches
)
330 s
+= gmx::formatString(
331 " L%d: %zu bytes, linesize %d bytes, assoc. %d, shared %d ways\n",
332 c
.level
, c
.size
, c
.linesize
, c
.associativity
, c
.shared
);
335 if (hwTop
.supportLevel() >= gmx::HardwareTopology::SupportLevel::FullWithDevices
)
337 s
+= gmx::formatString(" PCI devices:\n");
338 for (auto& d
: hwTop
.machine().devices
)
340 s
+= gmx::formatString(
341 " %04x:%02x:%02x.%1x Id: %04x:%04x Class: 0x%04x Numa: %d\n", d
.domain
,
342 d
.bus
, d
.dev
, d
.func
, d
.vendorId
, d
.deviceId
, d
.classId
, d
.numaNodeId
);
347 if (bGPUBinary
&& hwinfo
->gpu_info
.n_dev
> 0)
349 s
+= gmx::formatString(" GPU info:\n");
350 s
+= gmx::formatString(" Number of GPUs detected: %d\n", hwinfo
->gpu_info
.n_dev
);
351 s
+= sprint_gpus(hwinfo
->gpu_info
) + "\n";
356 void gmx_print_detected_hardware(FILE* fplog
,
357 const bool warnToStdErr
,
358 const gmx::MDLogger
& mdlog
,
359 const gmx_hw_info_t
* hwinfo
)
361 const gmx::CpuInfo
& cpuInfo
= *hwinfo
->cpuInfo
;
363 if (fplog
!= nullptr)
365 std::string detected
;
367 detected
= detected_hardware_string(hwinfo
, TRUE
);
369 fprintf(fplog
, "%s\n", detected
.c_str());
372 // Do not spam stderr with all our internal information unless
373 // there was something that actually went wrong; general information
374 // belongs in the logfile.
376 /* Check the compiled SIMD instruction set against that of the node
377 * with the lowest SIMD level support (skip if SIMD detection did not work)
379 if (cpuInfo
.supportLevel() >= gmx::CpuInfo::SupportLevel::Features
)
381 gmx::simdCheck(static_cast<gmx::SimdType
>(hwinfo
->simd_suggest_min
), fplog
, warnToStdErr
);
384 /* For RDTSCP we only check on our local node and skip the MPI reduction */
385 check_use_of_rdtscp_on_this_cpu(mdlog
, cpuInfo
);