From 29ba77b8483f803766806b4f6987aeeef00747e5 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= <pall.szilard@gmail.com>
Date: Mon, 16 Oct 2017 17:40:23 +0200
Subject: [PATCH] Check CUDA available/compiled code compatibility

Added an early check to detect when the gmx binary does not embed code
compatible with the GPU device it tries to use nor does it have PTX that
could have been JIT-ed.

Additionally, if the user manually sets GMX_CUDA_TARGET_COMPUTE=20 and
no later SM or COMPUTE but runs on >2.0 hardware, we'd be executing
JIT-ed Fermi kernels with incorrect host-side code assumptions
(e.g amount of shared memory allocated or texture type).
This change also prevents such cases.

Fixes #2273

Change-Id: I5472b1a33e584a75f451e21e9fd25992633fbea9
---
 src/gromacs/gpu_utils/gpu_utils.cu | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu
index 443aa70540..5e0afd3f2b 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -55,7 +55,9 @@
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/logger.h"
+#include "gromacs/utility/programcontext.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/snprintf.h"
 
@@ -99,6 +101,38 @@ static __global__ void k_dummy_test(void)
 {
 }
 
+static void checkCompiledTargetCompatibility(const gmx_device_info_t *devInfo)
+{
+    assert(devInfo);
+
+    cudaFuncAttributes attributes;
+    cudaError_t        stat = cudaFuncGetAttributes(&attributes, k_dummy_test);
+
+    if (cudaErrorInvalidDeviceFunction == stat)
+    {
+        gmx_fatal(FARGS,
+                  "The %s binary was not compiled for the selected GPU "
+                  "(device ID #%d, compute capability %d.%d).\n"
+                  "When selecting target GPU architectures with GMX_CUDA_TARGET_SM, "
+                  "make sure to pass the appropriate architecture(s) corresponding to the "
+                  "device(s) intended to be used (see in the GPU info listing) or alternatively "
+                  "pass in GMX_CUDA_TARGET_COMPUTE an appropriate virtual architecture. ",
+                  gmx::getProgramContext().displayName(), devInfo->id,
+                  devInfo->prop.major, devInfo->prop.minor);
+    }
+
+    CU_RET_ERR(stat, "cudaFuncGetAttributes failed");
+
+    if (devInfo->prop.major >= 3 && attributes.ptxVersion < 30)
+    {
+        gmx_fatal(FARGS,
+                  "The GPU device code was compiled at runtime from 2.0 source which is "
+                  "not compatible with the selected GPU (device ID #%d, compute capability %d.%d). "
+                  "Pass the appropriate target in GMX_CUDA_TARGET_SM or a >=30 value to GMX_CUDA_TARGET_COMPUTE.",
+                  devInfo->id,
+                  devInfo->prop.major, devInfo->prop.minor);
+    }
+}
 
 /*!
  * \brief Runs GPU sanity checks.
@@ -469,6 +503,8 @@ void init_gpu(const gmx::MDLogger &mdlog, int rank,
         fprintf(stderr, "Initialized GPU ID #%d: %s\n", deviceInfo->id, deviceInfo->prop.name);
     }
 
+    checkCompiledTargetCompatibility(deviceInfo);
+
     //Ignoring return value as NVML errors should be treated not critical.
     init_gpu_application_clocks(mdlog, deviceInfo);
 }
-- 
2.11.4.GIT