1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2010, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
40 #include "cuda_runtime_api.h"
42 #include "memtestG80_core.h"
52 #define DUPME(msg) printf("---> %s\n", msg);
58 #if _DEBUG_ == 0/* no gromacs utils in debug mode */
59 #include "gmx_fatal.h"
63 #define QUICK_MEM 250 /*!< Amount of memory to be used in quick memtest. */
64 #define QUICK_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS /*!< Bitflag with type of tests
65 to run in quick memtest. */
66 #define QUICK_ITER 3 /*!< Number of iterations in quick memtest. */
68 #define FULL_TESTS 0x3FFF /*!< Bitflag with all test set on for full memetest. */
69 #define FULL_ITER 25 /*!< Number of iterations in full memtest. */
71 #define TIMED_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS /*!< Bitflag with type of tests to
72 run in time constrained memtest. */
74 /*! Number of supported GPUs */
75 #define NB_GPUS (sizeof(SupportedGPUs)/sizeof(SupportedGPUs[0]))
78 TODO add proper gromacs logging?
81 /*! Bit-flags which refer to memtestG80 test types and are used in do_memtest to specify which tests to run. */
82 enum memtest_G80_test_types {
83 MOVING_INVERSIONS_10 = 0x1,
84 MOVING_INVERSIONS_RAND = 0x2,
85 WALKING_8BIT_M86 = 0x4,
87 WALKING_1_8BIT = 0x10,
88 WALKING_0_32BIT = 0x20,
89 WALKING_1_32BIT = 0x40,
94 LOGIC_1_ITER_SHMEM = 0x800,
95 LOGIC_4_ITER_SHMEM = 0x1000
98 // TODO put this list into an external file and include it so that the list is easily accessible
99 /*! List of supported GPUs. */
100 static const char * const SupportedGPUs[] = {
129 "Quadro Plex 2200 D2",
130 "Quadro Plex 2200 S4",
133 "GeForce 9800 G", /* GX2, GTX, GTX+, GT */
137 "Quadro Plex 2100 D4"
142 /* debug functions, see @the end */
146 int gmx_strncasecmp(const char*, const char*, int);
152 * \brief Runs GPU sanity checks.
153 * Returnes properties of a device with given id or the one that has
154 * already been initialized earlier in the case if of dev_id == -1.
156 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
157 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
159 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
164 cu_err = cudaGetDeviceCount(&dev_count);
165 if (cu_err != cudaSuccess)
167 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
168 cudaGetErrorString(cu_err));
172 /* no CUDA compatible device at all */
176 /* things might go horribly wrong if cudart is not compatible with the driver */
177 if (dev_count < 0 || dev_count > 20)
180 if (dev_id == -1) /* device already selected let's do not destroy the context */
182 cu_err = cudaGetDevice(&id);
183 if (cu_err != cudaSuccess)
185 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
186 cudaGetErrorString(cu_err));
193 if (id > dev_count - 1) /* pfff there's no such device */
195 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
201 memset(dev_prop, 0, sizeof(cudaDeviceProp));
202 cu_err = cudaGetDeviceProperties(dev_prop, id);
203 if (cu_err != cudaSuccess)
205 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
206 cudaGetErrorString(cu_err));
210 /* both major & minor is 9999 if no CUDA capable devices are present */
211 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
213 /* we don't care about emulation mode */
214 if (dev_prop->major == 0)
217 if ((dev_id != -1) && (cu_err = cudaSetDevice(dev_id)) != cudaSuccess)
219 fprintf(stderr, "Error %d while switching to device #%d: %s\n", cu_err, dev_id,
220 cudaGetErrorString(cu_err));
228 * \brief Checks whether the GPU with the given name is supported.
230 * \param[in] gpu_name the name of the CUDA device
231 * \returns 1 if the device is supported, otherwise 0
233 static int is_supported_gpu_n(char *gpuName)
236 for (i = 0; i < NB_GPUS; i++)
239 if (gmx_strncasecmp(gpuName, SupportedGPUs[i], strlen(SupportedGPUs[i])) == 0)
245 /*! \brief Checks whether the GPU with the given device id is supported.
247 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
248 * \param[out] gpu_name Set to contain the name of the CUDA device, if NULL passed, no device name is set.
249 * \returns 1 if the device is supported, otherwise 0
251 int is_supported_cuda_gpu(int dev_id, char *gpu_name)
253 cudaDeviceProp dev_prop;
255 if (debug) fprintf(debug, "Checking compatibility with device #%d, %s\n", dev_id, gpu_name);
257 if (do_sanity_checks(dev_id, &dev_prop) != 0)
260 if (gpu_name != NULL)
262 strcpy(gpu_name, dev_prop.name);
264 return is_supported_gpu_n(dev_prop.name);
269 * \brief Runs a set of memory tests specified by the given bit-flags.
270 * Tries to allocate and do the test on \p megs Mb memory or
271 * the greatest amount that can be allocated (>10Mb).
272 * In case if an error is detected it stops without finishing the remainings
273 * steps/iterations and returns greater then zero value.
274 * In case of other errors (e.g. kernel launch errors, device querying erros)
277 * \param[in] which_tests variable with bit-flags of the requested tests
278 * \param[in] megs amount of memory that will be tested in MB
279 * \param[in] iter number of iterations
280 * \returns 0 if no error was detected, otherwise >0
282 static int do_memtest(unsigned int which_tests, int megs, int iter)
286 uint err_count; //, err_iter;
288 // no parameter check as this fn won't be called externally
290 // let's try to allocate the mem
291 while (!tester.allocate(megs) && (megs - 10 > 0))
292 { megs -= 10; tester.deallocate(); }
296 fprintf(stderr, "Unable to allocate GPU memory!\n");
300 // clear the first 18 bits
301 which_tests &= 0x3FFF;
302 for (i = 0; i < iter; i++)
304 // Moving Inversions (ones and zeros)
305 if ((MOVING_INVERSIONS_10 & which_tests) == MOVING_INVERSIONS_10)
307 tester.gpuMovingInversionsOnesZeros(err_count);
309 return MOVING_INVERSIONS_10;
311 // Moving Inversions (random)
312 if ((MOVING_INVERSIONS_RAND & which_tests) == MOVING_INVERSIONS_RAND)
314 tester.gpuMovingInversionsRandom(err_count);
316 return MOVING_INVERSIONS_RAND;
318 // Memtest86 Walking 8-bit
319 if ((WALKING_8BIT_M86 & which_tests) == WALKING_8BIT_M86)
321 for (uint shift = 0; shift < 8; shift++)
323 tester.gpuWalking8BitM86(err_count, shift);
325 return WALKING_8BIT_M86;
328 // True Walking zeros (8-bit)
329 if ((WALKING_0_8BIT & which_tests) == WALKING_0_8BIT)
331 for (uint shift = 0; shift < 8; shift++)
333 tester.gpuWalking8Bit(err_count, false, shift);
335 return WALKING_0_8BIT;
338 // True Walking ones (8-bit)
339 if ((WALKING_1_8BIT & which_tests) == WALKING_1_8BIT)
341 for (uint shift = 0; shift < 8; shift++)
343 tester.gpuWalking8Bit(err_count, true, shift);
345 return WALKING_1_8BIT;
348 // Memtest86 Walking zeros (32-bit)
349 if ((WALKING_0_32BIT & which_tests) == WALKING_0_32BIT)
351 for (uint shift = 0; shift < 32; shift++)
353 tester.gpuWalking32Bit(err_count, false, shift);
355 return WALKING_0_32BIT;
358 // Memtest86 Walking ones (32-bit)
359 if ((WALKING_1_32BIT & which_tests) == WALKING_1_32BIT)
361 for (uint shift = 0; shift < 32; shift++)
363 tester.gpuWalking32Bit(err_count, true, shift);
365 return WALKING_1_32BIT;
369 if ((RANDOM_BLOCKS & which_tests) == RANDOM_BLOCKS)
371 tester.gpuRandomBlocks(err_count,rand());
373 return RANDOM_BLOCKS;
377 // Memtest86 Modulo-20
378 if ((MOD_20_32BIT & which_tests) == MOD_20_32BIT)
380 for (uint shift = 0; shift < 20; shift++)
382 tester.gpuModuloX(err_count, shift, rand(), 20, 2);
387 // Logic (one iteration)
388 if ((LOGIC_1_ITER & which_tests) == LOGIC_1_ITER)
390 tester.gpuShortLCG0(err_count,1);
394 // Logic (4 iterations)
395 if ((LOGIC_4_ITER & which_tests) == LOGIC_4_ITER)
397 tester.gpuShortLCG0(err_count,4);
402 // Logic (shared memory, one iteration)
403 if ((LOGIC_1_ITER_SHMEM & which_tests) == LOGIC_1_ITER_SHMEM)
405 tester.gpuShortLCG0Shmem(err_count,1);
407 return LOGIC_1_ITER_SHMEM;
409 // Logic (shared-memory, 4 iterations)
410 if ((LOGIC_4_ITER_SHMEM & which_tests) == LOGIC_4_ITER_SHMEM)
412 tester.gpuShortLCG0Shmem(err_count,4);
414 return LOGIC_4_ITER_SHMEM;
422 /*! \brief Runs a quick memory test and returns 0 in case if no error is detected.
423 * If an error is detected it stops before completing the test and returns a
424 * value greater then 0. In case of other errors (e.g. kernel launch errors,
425 * device querying erros) -1 is returned.
427 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
428 * \returns 0 if no error was detected, otherwise >0
430 int do_quick_memtest(int dev_id)
432 cudaDeviceProp dev_prop;
433 int devmem, res, time=0;
435 if (debug) { time = getTimeMilliseconds(); }
437 if (do_sanity_checks(dev_id, &dev_prop) != 0)
439 // something went wrong
445 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
446 fprintf(debug, ">> Running QUICK memtests on %d MiB (out of total %d MiB), %d iterations\n",
447 QUICK_MEM, devmem, QUICK_ITER);
450 res = do_memtest(QUICK_TESTS, QUICK_MEM, QUICK_ITER);
454 fprintf(debug, "Q-RES = %d\n", res);
455 fprintf(debug, "Q-runtime: %d ms\n", getTimeMilliseconds() - time);
458 /* destroy context only if we created it */
459 if (dev_id !=-1) cudaThreadExit();
463 /*! \brief Runs a full memory test and returns 0 in case if no error is detected.
464 * If an error is detected it stops before completing the test and returns a
465 * value greater then 0. In case of other errors (e.g. kernel launch errors,
466 * device querying erros) -1 is returned.
468 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
469 * \returns 0 if no error was detected, otherwise >0
472 int do_full_memtest(int dev_id)
474 cudaDeviceProp dev_prop;
475 int devmem, res, time=0;
477 if (debug) { time = getTimeMilliseconds(); }
479 if (do_sanity_checks(dev_id, &dev_prop) != 0)
481 // something went wrong
485 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
489 fprintf(debug, ">> Running FULL memtests on %d MiB (out of total %d MiB), %d iterations\n",
490 devmem, devmem, FULL_ITER);
493 /* do all test on the entire memory */
494 res = do_memtest(FULL_TESTS, devmem, FULL_ITER);
498 fprintf(debug, "F-RES = %d\n", res);
499 fprintf(debug, "F-runtime: %d ms\n", getTimeMilliseconds() - time);
502 /* destroy context only if we created it */
503 if (dev_id != -1) cudaThreadExit();
507 /*! \brief Runs a time constrained memory test and returns 0 in case if no error is detected.
508 * If an error is detected it stops before completing the test and returns a value greater
509 * than zero. In case of other errors (e.g. kernel launch errors, device querying erros) -1
510 * is returned. Note, that test iterations are not interrupted therefor the total runtime of
511 * the test will always be multipple of one iteration's runtime.
513 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
514 * \param[in] time_constr the time limit of the testing
515 * \returns 0 if no error was detected, otherwise >0
517 int do_timed_memtest(int dev_id, int time_constr)
519 cudaDeviceProp dev_prop;
520 int devmem, res=0, time=0, startt;
522 if (debug) { time = getTimeMilliseconds(); }
524 time_constr *= 1000; /* convert to ms for convenience */
525 startt = getTimeMilliseconds();
527 if (do_sanity_checks(dev_id, &dev_prop) != 0)
529 // something went wrong
533 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
537 fprintf(debug, ">> Running time constrained memtests on %d MiB (out of total %d MiB), time limit of %d s \n",
538 devmem, devmem, time_constr);
541 /* do the TIMED_TESTS set, one step at a time on the entire memory
542 that can be allocated, and stop when the given time is exceeded */
543 while ( ((int)getTimeMilliseconds() - startt) < time_constr)
545 res = do_memtest(TIMED_TESTS, devmem, 1);
551 fprintf(debug, "T-RES = %d\n", res);
552 fprintf(debug, "T-runtime: %d ms\n", getTimeMilliseconds() - time);
555 /* destroy context only if we created it */
556 if (dev_id != -1) cudaThreadExit();
562 /*******************************************************
563 * The code below is for testing purposes. */
564 int do_custom_memtest(int dev_id)
566 cudaDeviceProp dev_prop;
567 int mem2test, /*devmem,*/ res;
568 // memtestState tester;
572 int time = getTimeMilliseconds();
575 if (do_sanity_checks(dev_id, &dev_prop) != 0)
578 // if ((res=tester.allocate(100))==0)
579 // printf("alloc failed\n");
580 // printf("alloc res = %d\n", res);
581 // res = tester.gpuMemoryBandwidth(bandwidth, tester.size(), 10);
582 // printf("Bandwidth on %d (res %d)= %5.2f\n", tester.size(), res, bandwidth);
583 // tester.deallocate();
585 // devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
589 printf(">> Running CUSTOM memtests [%x] on %d MiB, %d iterations\n",
590 QUICK_TESTS, mem2test, 1);
593 res = do_memtest(QUICK_TESTS, mem2test, 1);
597 printf("C-RES = %d\n", res);
598 printf("C-runtime: %d ms\n", getTimeMilliseconds() - time);
605 * Only for debugging purposes, compile with:
606 * nvcc -DLINUX -D_DEBUG_=2 -L -O -Xcompiler -Wall memtestG80_core.o gmx_gpu_utils.cu -o gmx_gpu_utils_test
608 int main( int argc, char** argv)
612 sprintf(msg, "Device #%d supported: ", dev_id);
613 switch (is_supported_cuda_gpu(dev_id, NULL))
615 case -1: strcat(msg, "error occured"); break;
616 case 0: strcat(msg, "no"); break;
617 case 1: strcat(msg, "yes"); break;
618 default: strcat(msg, "\nhmmm, you should not see this!");
622 printf("Doing memtest.\n");
623 printf("quick memtest result: %d\n", do_quick_memtest(dev_id));
624 printf("timed memtest result: %d\n", do_timed_memtest(dev_id, 15));
625 printf("full memtest result: %d\n", do_full_memtest(dev_id));
634 Functions only used if this file is compiled in debug mode (_DEBUG_ > 0)
635 when the gromacs version are not available.
636 - string trimming function - duplicated from ~/src/gmxlib/string2.c
637 - case agnostic straing compare
639 static void ltrim (char *str)
649 while ((tr[c] == ' ') || (tr[c] == '\t'))
656 static void rtrim (char *str)
664 while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) ) {
670 static void trim (char *str)
676 static int gmx_strncasecmp(const char* s1, const char* s2, int len)
678 return strncasecmp(s1, s2, len);