From cf2ece9134926e7e6c0f20bffd08a465a5bf7971 Mon Sep 17 00:00:00 2001
From: Erik Lindahl <erik@kth.se>
Date: Tue, 12 Dec 2017 20:29:41 +0100
Subject: [PATCH] Fix builds on ARM & clarify (ARM) GPU support

Fixed a typo in architecture.h that prevented
the Neon Asimd instructions from being selected,
and updated the CPU brand detection to also look
for a new label with Tegra X1 on Ubuntu 16.04

Clarified in error messages and documentation that
Gromacs in fact does not build all supported GPU
architectures by default, explain the common cases
when things might fail, exactly what the user
should do to enable the support, and how the
support strings should be formatted.

Fixes #2287.

Change-Id: I87a2eb81ee11b78f072e3ef359a00c75eb7ec24b
---
 docs/install-guide/index.rst        | 23 ++++++++++++-----------
 src/gromacs/gpu_utils/gpu_utils.cu  | 12 ++++++------
 src/gromacs/hardware/architecture.h |  2 +-
 src/gromacs/hardware/cpuinfo.cpp    |  5 +++++
 4 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/docs/install-guide/index.rst b/docs/install-guide/index.rst
index cb7d54d65c..30c7660c15 100644
--- a/docs/install-guide/index.rst
+++ b/docs/install-guide/index.rst
@@ -601,17 +601,18 @@ this `NVIDIA blog post
 NVML support is only available if detected, and may be disabled by
 turning off the ``GMX_USE_NVML`` CMake advanced option.
 
-By default, optimized code will be generated for CUDA architectures
-supported by the nvcc compiler (and the |Gromacs| build system). 
-However, it can be beneficial to manually pick the specific CUDA architecture(s)
-to generate code for either to reduce compilation time (and binary size) or to
-target a new architecture not yet supported by the |Gromacs| build system.
-Setting the desired CUDA architecture(s) and virtual architecture(s)
-can be done using the ``GMX_CUDA_TARGET_SM`` and ``GMX_CUDA_TARGET_COMPUTE``
-variables, respectively. These take a semicolon delimited string with 
-the two digit suffixes of CUDA (virtual) architectures names
-(for details see the "Options for steering GPU code generation" section of the
-nvcc man / help or Chapter 6. of the nvcc manual).
+By default, code will be generated for the most common CUDA architectures.
+However, to reduce build time and binary size we do not generate code for
+every single possible architecture, which in rare cases (say, Tegra systems)
+can result in the default build not being able to use some GPUs.
+If this happens, or if you want to remove some architectures to reduce
+binary size and build time, you can alter the target CUDA architectures. 
+This can be done either with the ``GMX_CUDA_TARGET_SM`` or
+``GMX_CUDA_TARGET_COMPUTE`` CMake variables, which take a semicolon delimited
+string with the two digit suffixes of CUDA (virtual) architectures names, for
+instance "35;50;51;52;53;60". For details, see the "Options for steering GPU
+code generation" section of the nvcc man / help or Chapter 6. of the nvcc
+manual.
 
 The GPU acceleration has been tested on AMD64/x86-64 platforms with
 Linux, Mac OS X and Windows operating systems, but Linux is the
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu
index 84e918fc01..8aa59a45e5 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -113,12 +113,12 @@ static void checkCompiledTargetCompatibility(const gmx_device_info_t *devInfo)
     if (cudaErrorInvalidDeviceFunction == stat)
     {
         gmx_fatal(FARGS,
-                  "The %s binary was not compiled for the selected GPU "
-                  "(device ID #%d, compute capability %d.%d).\n"
-                  "When selecting target GPU architectures with GMX_CUDA_TARGET_SM, "
-                  "make sure to pass the appropriate architecture(s) corresponding to the "
-                  "device(s) intended to be used (see in the GPU info listing) or alternatively "
-                  "pass in GMX_CUDA_TARGET_COMPUTE an appropriate virtual architecture. ",
+                  "The %s binary does not include support for the CUDA architecture "
+                  "of the selected GPU (device ID #%d, compute capability %d.%d). "
+                  "By default, GROMACS supports all common architectures, so your GPU "
+                  "might be rare, or some architectures were disabled in the build. ",
+                  "Consult the install guide for how to use the GMX_CUDA_TARGET_SM and ",
+                  "GMX_CUDA_TARGET_COMPUTE CMake variables to add this architecture.",
                   gmx::getProgramContext().displayName(), devInfo->id,
                   devInfo->prop.major, devInfo->prop.minor);
     }
diff --git a/src/gromacs/hardware/architecture.h b/src/gromacs/hardware/architecture.h
index aeda6c3a91..fc897a077c 100644
--- a/src/gromacs/hardware/architecture.h
+++ b/src/gromacs/hardware/architecture.h
@@ -71,7 +71,7 @@ enum class Architecture
 static constexpr Architecture c_architecture =
 #if GMX_IS_X86_32 || GMX_IS_X86_64
     Architecture::X86;
-#elif defined __arm__ || defined __arm || defined _M_ARM || defined __aarch64_
+#elif defined __arm__ || defined __arm || defined _M_ARM || defined __aarch64__
     Architecture::Arm;
 #elif defined __powerpc__ || defined __ppc__ || defined __PPC__
     Architecture::PowerPC;
diff --git a/src/gromacs/hardware/cpuinfo.cpp b/src/gromacs/hardware/cpuinfo.cpp
index 273d8d8e2c..6f16a041c5 100644
--- a/src/gromacs/hardware/cpuinfo.cpp
+++ b/src/gromacs/hardware/cpuinfo.cpp
@@ -778,6 +778,11 @@ detectProcCpuInfoArm(const std::map<std::string, std::string>   &cpuInfo,
     {
         *brand = cpuInfo.at("Processor");
     }
+    else if (cpuInfo.count("model name"))
+    {
+        *brand = cpuInfo.at("model name");
+    }
+
     if (cpuInfo.count("CPU architecture"))
     {
         *family = std::strtol(cpuInfo.at("CPU architecture").c_str(), nullptr, 10);
-- 
2.11.4.GIT