src/gromacs/hardware/cpuinfo.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  * \brief
  37  * Declares gmx::CpuInfo
  38  *
  39  * \author Erik Lindahl <erik.lindahl@gmail.com>
  40  * \inlibraryapi
  41  * \ingroup module_hardware
  42  */
  43 #ifndef GMX_HARDWARE_CPUINFO_H
  44 #define GMX_HARDWARE_CPUINFO_H
  45
  46 #include <map>
  47 #include <set>
  48 #include <string>
  49 #include <vector>
  50
  51 namespace gmx
  52 {
  53
  54 /*! \libinternal \brief Detect CPU capabilities and basic logical processor info
  55  *
  56  *  This class provides a lot of information about x86 CPUs, and some very
  57  *  limited information about other hardware. The logical processor information
  58  *  is only available on x86, and is used as a fallback implementation in
  59  *  the HardwareTopology class.
  60  *  If you actually need information about the hardware topology, use the much
  61  *  more general implementation in the HardwareTopology class instead, since
  62  *  that will both be more portable and contain more information.
  63  *
  64  * \ingroup module_hardware
  65  */
  66 class CpuInfo
  67 {
  68
  69     public:
  70
  71         /*! \brief Amount of cpu information present (incremental) */
  72         enum class SupportLevel
  73         {
  74             None,                  //!< No cpu information whatsoever. Sorry.
  75             Name,                  //!< Only vendor and/or brand is set
  76             Features,              //!< Some features are set
  77             LogicalProcessorInfo   //!< Everything includling logical processor information
  78         };
  79
  80         /*! \brief Processor/system vendors */
  81         enum class Vendor
  82         {
  83             Unknown,      //!< Unidentified
  84             Intel,        //!< GenuineIntel
  85             Amd,          //!< AuthenticAMD
  86             Fujitsu,      //!< Only works on Linux (parsed from /proc/cpuinfo)
  87             Ibm,          //!< Only works on Linux (parsed from /proc/cpuinfo)
  88             Arm,          //!< Only works on Linux (parsed from /proc/cpuinfo)
  89         };
  90
  91         /*! \brief List of CPU features
  92          *
  93          *  These values can be used as arguments to the feature() method
  94          *  to check whether a specific feature was found on the CPU we are
  95          *  running on.
  96          */
  97         enum class Feature
  98         {
  99             X86_Aes,         //!< x86 advanced encryption standard accel.
 100             X86_Apic,        //!< APIC support
 101             X86_Avx,         //!< Advanced vector extensions
 102             X86_Avx2,        //!< AVX2 including gather support (not used yet)
 103             X86_Avx512F,     //!< Foundation AVX-512 instructions
 104             X86_Avx512PF,    //!< Extended gather/scatter for AVX-512
 105             X86_Avx512ER,    //!< AVX-512 exponential and recpirocal extensions
 106             X86_Avx512CD,    //!< Memory conflict-detection for AVX-512
 107             X86_Avx512BW,    //!< AVX-512 byte and word instructions
 108             X86_Avx512VL,    //!< AVX-512 vector length extensions
 109             X86_Clfsh,       //!< Supports CLFLUSH instruction
 110             X86_Cmov,        //!< Conditional move insn support
 111             X86_Cx8,         //!< Supports CMPXCHG8B (8-byte compare-exchange)
 112             X86_Cx16,        //!< Supports CMPXCHG16B (16-byte compare-exchg)
 113             X86_F16C,        //!< Supports 16-bit FP conversion instructions
 114             X86_Fma,         //!< Fused-multiply add support (mainly for AVX)
 115             X86_Fma4,        //!< 4-operand FMA, only on AMD for now
 116             X86_Hle,         //!< Hardware lock elision
 117             X86_Htt,         //!< Hyper-Threading supported (but maybe not enabled)
 118             X86_Lahf,        //!< LAHF/SAHF support in 64 bits
 119             X86_MisalignSse, //!< Support for misaligned SSE data instructions
 120             X86_Mmx,         //!< MMX registers and instructions
 121             X86_Msr,         //!< Supports Intel model-specific-registers
 122             X86_NonstopTsc,  //!< Invariant TSC (constant rate in ACPI states)
 123             X86_Pcid,        //!< Process context identifier support
 124             X86_Pclmuldq,    //!< Carry-less 64-bit multiplication supported
 125             X86_Pdcm,        //!< Perfmon and Debug Capability
 126             X86_PDPE1GB,     //!< Support for 1GB pages
 127             X86_Popcnt,      //!< Supports the POPCNT (population count) insn
 128             X86_Pse,         //!< Supports 4MB-pages (page size extension)
 129             X86_Rdrnd,       //!< RDRAND high-quality hardware random numbers
 130             X86_Rdtscp,      //!< Serializing rdtscp instruction available
 131             X86_Rtm,         //!< Restricted transactional memory
 132             X86_Sha,         //!< Intel SHA extensions
 133             X86_Sse2,        //!< SSE 2
 134             X86_Sse3,        //!< SSE 3
 135             X86_Sse4A,       //!< SSE 4A
 136             X86_Sse4_1,      //!< SSE 4.1
 137             X86_Sse4_2,      //!< SSE 4.2
 138             X86_Ssse3,       //!< Supplemental SSE3
 139             X86_Tdt,         //!< TSC deadline timer
 140             X86_X2Apic,      //!< Extended xAPIC Support
 141             X86_Xop,         //!< AMD extended instructions, only AMD for now
 142             Arm_Neon,        //!< 32-bit ARM NEON
 143             Arm_NeonAsimd,   //!< 64-bit ARM AArch64 Advanced SIMD
 144             Ibm_Qpx,         //!< IBM QPX SIMD (BlueGene/Q and later)
 145             Ibm_Vmx,         //!< IBM VMX SIMD (Altivec on Power6 and later)
 146             Ibm_Vsx,         //!< IBM VSX SIMD (Power7 and later)
 147             Fujitsu_HpcAce   //!< Fujitsu Sparc64 HPC-ACE
 148         };
 149
 150         /*! \libinternal \brief Entry with basic information for a single logical processor */
 151         struct LogicalProcessor
 152         {
 153             int socketRankInMachine; //!< Relative rank of the current socket in the system
 154             int coreRankInSocket;    //!< Relative rank of the current core in its socket
 155             int hwThreadRankInCore;  //!< Relative rank of logical processor in its core
 156         };
 157
 158     public:
 159         /*! \brief Perform detection and construct a CpuInfo class from the results.
 160          *
 161          *  \note The detection should generally be performed again in different
 162          *        contexts.  This might seem like overkill, but there
 163          *        are systems (e.g. Arm) where processors can go completely offline
 164          *        during deep sleep, so at least in theory it is good to have a
 165          *        possibility of forcing re-detection if necessary.
 166          */
 167         static CpuInfo detect();
 168
 169         /*! \brief Check what cpu information is available
 170          *
 171          *  The amount of cpu information that can be detected depends on the
 172          *  OS, compiler, and CPU, and on non-x86 platforms it can be fragile.
 173          *  Before basing decisions on the output or warning the user about
 174          *  optimizations, you want to check whether it was possible to detect
 175          *  the information you need.
 176          */
 177         SupportLevel
 178         supportLevel() const { return supportLevel_; }
 179
 180         /*! \brief Enumerated value for vendor */
 181         Vendor
 182         vendor() const { return vendor_; }
 183
 184         /*! \brief String description of vendor:
 185          *
 186          *  \throws std::out_of_range if the vendor is not present in the internal
 187          *          map of vendor names. This can only happen if we extend the enum
 188          *          type but forget to add the string with the vendor name.
 189          */
 190         const std::string &
 191         vendorString() const
 192         {
 193             return s_vendorStrings_.at(vendor_);
 194         }
 195
 196         /*! \brief String description of processor */
 197         const std::string &
 198         brandString() const { return brandString_; }
 199
 200         /*! \brief Major version/generation of the processor */
 201         int
 202         family() const { return family_; }
 203
 204         /*! \brief Middle version of the processor */
 205         int
 206         model() const { return model_; }
 207
 208         /*! \brief Minor version of the processor */
 209         int
 210         stepping() const { return stepping_; }
 211
 212         /*! \brief Check for availability of specific feature
 213          *
 214          *  \param f  feature to query support for
 215          *
 216          *  \return True if the feature is available, otherwise false.
 217          */
 218         bool
 219         feature(Feature f) const
 220         {
 221             // If the entry is present in the set it is supported
 222             return (features_.count(f) != 0);
 223         }
 224
 225         /*! \brief String description of a specific feature
 226          *
 227          *  \throws std::out_of_range if the feature is not present in the internal
 228          *          map of feature names. This can only happen if we extend the enum
 229          *          type but forget to add the string with the feature name.
 230          */
 231         static const std::string &
 232         featureString(Feature f)
 233         {
 234             return s_featureStrings_.at(f);
 235         }
 236
 237         /*! \brief Set of all supported features on this processor
 238          *
 239          *  This is only intended for logfiles, debugging or similar output when we
 240          *  need a full list of all the features available on the CPU.
 241          */
 242         const std::set<Feature> &
 243         featureSet() const
 244         {
 245             return features_;
 246         }
 247
 248         /*! \brief Reference to processing unit topology
 249          *
 250          *  Only a few systems (x86) provide logical processor information in cpuinfo.
 251          *  This method returns a reference to a vector, whose length will either be
 252          *  zero (if topology information is not available) or the number of enabled
 253          *  processing units, as defined by the operating system. In the latter
 254          *  case, each entry will contain information about the relative rank in the
 255          *  core and socket of this hardware thread.
 256          *
 257          *  This is only meant to be use as a fallback implementation for our
 258          *  HardwareTopology class; any user code that needs access to hardware
 259          *  topology information should use that class instead.
 260          *
 261          *  \note For clarity, it is likely better to use the supportLevel()
 262          *        method to check if this information is available rather than
 263          *        relying on the length of the vector.
 264          */
 265         const std::vector<LogicalProcessor> &
 266         logicalProcessors() const { return logicalProcessors_; }
 267
 268     private:
 269         CpuInfo();
 270
 271         SupportLevel                                 supportLevel_;      //!< Available cpuinfo information
 272         Vendor                                       vendor_;            //!<  Value of vendor for current cpu
 273         std::string                                  brandString_;       //!<  Text description of cpu
 274         int                                          family_;            //!<  Major version of current cpu
 275         int                                          model_;             //!<  Middle version of current cpu
 276         int                                          stepping_;          //!<  Minor version of current cpu
 277         std::set<Feature>                            features_;          //!< Set of features supported on this cpu
 278         std::vector<LogicalProcessor>                logicalProcessors_; //!< Simple logical processor topology
 279         static const std::map<Vendor, std::string>   s_vendorStrings_;   //!< Text description of each vendor
 280         static const std::map<Feature, std::string>  s_featureStrings_;  //!< Text description of each feature
 281 };                                                                       // class CpuInfo
 282
 283 /*! \brief Return true if the CPU is an Intel x86 Nehalem
 284  *
 285  * \param cpuInfo  Object with cpu information
 286  *
 287  * \returns  True if running on Nehalem CPU
 288  */
 289 bool
 290 cpuIsX86Nehalem(const CpuInfo &cpuInfo);
 291
 292 }                                                                        // namespace gmx
 293
 294 #endif                                                                   // GMX_HARDWARE_CPUINFO_H