1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2010, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
40 #include "cuda_runtime_api.h"
42 #include "memtestG80_core.h"
52 #define DUPME(msg) printf("---> %s\n", msg);
58 #if _DEBUG_ == 0/* no gromacs utils in debug mode */
59 #include "gmx_fatal.h"
63 #define QUICK_MEM 250 /*!< Amount of memory to be used in quick memtest. */
64 #define QUICK_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS /*!< Bitflag with type of tests
65 to run in quick memtest. */
66 #define QUICK_ITER 3 /*!< Number of iterations in quick memtest. */
68 #define FULL_TESTS 0x3FFF /*!< Bitflag with all test set on for full memetest. */
69 #define FULL_ITER 25 /*!< Number of iterations in full memtest. */
71 #define TIMED_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS /*!< Bitflag with type of tests to
72 run in time constrained memtest. */
74 /*! Number of supported GPUs */
75 #define NB_GPUS (sizeof(SupportedGPUs)/sizeof(SupportedGPUs[0]))
78 TODO add proper gromacs logging?
81 /*! Bit-flags which refer to memtestG80 test types and are used in do_memtest to specify which tests to run. */
82 enum memtest_G80_test_types {
83 MOVING_INVERSIONS_10 = 0x1,
84 MOVING_INVERSIONS_RAND = 0x2,
85 WALKING_8BIT_M86 = 0x4,
87 WALKING_1_8BIT = 0x10,
88 WALKING_0_32BIT = 0x20,
89 WALKING_1_32BIT = 0x40,
94 LOGIC_1_ITER_SHMEM = 0x800,
95 LOGIC_4_ITER_SHMEM = 0x1000
98 // TODO put this list into an external file and include it so that the list is easily accessible
99 /*! List of supported GPUs. */
100 static const char * const SupportedGPUs[] = {
136 "Quadro Plex 2200 D2",
137 "Quadro Plex 2200 S4",
140 "GeForce 9800 G", /* GX2, GTX, GTX+, GT */
144 "Quadro Plex 2100 D4"
149 /* debug functions, see @the end */
153 int gmx_strncasecmp(const char*, const char*, int);
159 * \brief Runs GPU sanity checks.
160 * Returnes properties of a device with given id or the one that has
161 * already been initialized earlier in the case if of dev_id == -1.
163 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
164 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
166 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
171 cu_err = cudaGetDeviceCount(&dev_count);
172 if (cu_err != cudaSuccess)
174 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
175 cudaGetErrorString(cu_err));
179 /* no CUDA compatible device at all */
183 /* things might go horribly wrong if cudart is not compatible with the driver */
184 if (dev_count < 0 || dev_count > 20)
187 if (dev_id == -1) /* device already selected let's do not destroy the context */
189 cu_err = cudaGetDevice(&id);
190 if (cu_err != cudaSuccess)
192 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
193 cudaGetErrorString(cu_err));
200 if (id > dev_count - 1) /* pfff there's no such device */
202 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
208 memset(dev_prop, 0, sizeof(cudaDeviceProp));
209 cu_err = cudaGetDeviceProperties(dev_prop, id);
210 if (cu_err != cudaSuccess)
212 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
213 cudaGetErrorString(cu_err));
217 /* both major & minor is 9999 if no CUDA capable devices are present */
218 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
220 /* we don't care about emulation mode */
221 if (dev_prop->major == 0)
224 if ((dev_id != -1) && (cu_err = cudaSetDevice(dev_id)) != cudaSuccess)
226 fprintf(stderr, "Error %d while switching to device #%d: %s\n", cu_err, dev_id,
227 cudaGetErrorString(cu_err));
235 * \brief Checks whether the GPU with the given name is supported.
237 * \param[in] gpu_name the name of the CUDA device
238 * \returns 1 if the device is supported, otherwise 0
240 static int is_supported_gpu_n(char *gpuName)
243 for (i = 0; i < NB_GPUS; i++)
246 if (gmx_strncasecmp(gpuName, SupportedGPUs[i], strlen(SupportedGPUs[i])) == 0)
252 /*! \brief Checks whether the GPU with the given device id is supported.
254 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
255 * \param[out] gpu_name Set to contain the name of the CUDA device, if NULL passed, no device name is set.
256 * \returns 1 if the device is supported, otherwise 0
258 int is_supported_cuda_gpu(int dev_id, char *gpu_name)
260 cudaDeviceProp dev_prop;
262 if (debug) fprintf(debug, "Checking compatibility with device #%d, %s\n", dev_id, gpu_name);
264 if (do_sanity_checks(dev_id, &dev_prop) != 0)
267 if (gpu_name != NULL)
269 strcpy(gpu_name, dev_prop.name);
271 return is_supported_gpu_n(dev_prop.name);
276 * \brief Runs a set of memory tests specified by the given bit-flags.
277 * Tries to allocate and do the test on \p megs Mb memory or
278 * the greatest amount that can be allocated (>10Mb).
279 * In case if an error is detected it stops without finishing the remainings
280 * steps/iterations and returns greater then zero value.
281 * In case of other errors (e.g. kernel launch errors, device querying erros)
284 * \param[in] which_tests variable with bit-flags of the requested tests
285 * \param[in] megs amount of memory that will be tested in MB
286 * \param[in] iter number of iterations
287 * \returns 0 if no error was detected, otherwise >0
289 static int do_memtest(unsigned int which_tests, int megs, int iter)
293 uint err_count; //, err_iter;
295 // no parameter check as this fn won't be called externally
297 // let's try to allocate the mem
298 while (!tester.allocate(megs) && (megs - 10 > 0))
299 { megs -= 10; tester.deallocate(); }
303 fprintf(stderr, "Unable to allocate GPU memory!\n");
307 // clear the first 18 bits
308 which_tests &= 0x3FFF;
309 for (i = 0; i < iter; i++)
311 // Moving Inversions (ones and zeros)
312 if ((MOVING_INVERSIONS_10 & which_tests) == MOVING_INVERSIONS_10)
314 tester.gpuMovingInversionsOnesZeros(err_count);
316 return MOVING_INVERSIONS_10;
318 // Moving Inversions (random)
319 if ((MOVING_INVERSIONS_RAND & which_tests) == MOVING_INVERSIONS_RAND)
321 tester.gpuMovingInversionsRandom(err_count);
323 return MOVING_INVERSIONS_RAND;
325 // Memtest86 Walking 8-bit
326 if ((WALKING_8BIT_M86 & which_tests) == WALKING_8BIT_M86)
328 for (uint shift = 0; shift < 8; shift++)
330 tester.gpuWalking8BitM86(err_count, shift);
332 return WALKING_8BIT_M86;
335 // True Walking zeros (8-bit)
336 if ((WALKING_0_8BIT & which_tests) == WALKING_0_8BIT)
338 for (uint shift = 0; shift < 8; shift++)
340 tester.gpuWalking8Bit(err_count, false, shift);
342 return WALKING_0_8BIT;
345 // True Walking ones (8-bit)
346 if ((WALKING_1_8BIT & which_tests) == WALKING_1_8BIT)
348 for (uint shift = 0; shift < 8; shift++)
350 tester.gpuWalking8Bit(err_count, true, shift);
352 return WALKING_1_8BIT;
355 // Memtest86 Walking zeros (32-bit)
356 if ((WALKING_0_32BIT & which_tests) == WALKING_0_32BIT)
358 for (uint shift = 0; shift < 32; shift++)
360 tester.gpuWalking32Bit(err_count, false, shift);
362 return WALKING_0_32BIT;
365 // Memtest86 Walking ones (32-bit)
366 if ((WALKING_1_32BIT & which_tests) == WALKING_1_32BIT)
368 for (uint shift = 0; shift < 32; shift++)
370 tester.gpuWalking32Bit(err_count, true, shift);
372 return WALKING_1_32BIT;
376 if ((RANDOM_BLOCKS & which_tests) == RANDOM_BLOCKS)
378 tester.gpuRandomBlocks(err_count,rand());
380 return RANDOM_BLOCKS;
384 // Memtest86 Modulo-20
385 if ((MOD_20_32BIT & which_tests) == MOD_20_32BIT)
387 for (uint shift = 0; shift < 20; shift++)
389 tester.gpuModuloX(err_count, shift, rand(), 20, 2);
394 // Logic (one iteration)
395 if ((LOGIC_1_ITER & which_tests) == LOGIC_1_ITER)
397 tester.gpuShortLCG0(err_count,1);
401 // Logic (4 iterations)
402 if ((LOGIC_4_ITER & which_tests) == LOGIC_4_ITER)
404 tester.gpuShortLCG0(err_count,4);
409 // Logic (shared memory, one iteration)
410 if ((LOGIC_1_ITER_SHMEM & which_tests) == LOGIC_1_ITER_SHMEM)
412 tester.gpuShortLCG0Shmem(err_count,1);
414 return LOGIC_1_ITER_SHMEM;
416 // Logic (shared-memory, 4 iterations)
417 if ((LOGIC_4_ITER_SHMEM & which_tests) == LOGIC_4_ITER_SHMEM)
419 tester.gpuShortLCG0Shmem(err_count,4);
421 return LOGIC_4_ITER_SHMEM;
429 /*! \brief Runs a quick memory test and returns 0 in case if no error is detected.
430 * If an error is detected it stops before completing the test and returns a
431 * value greater then 0. In case of other errors (e.g. kernel launch errors,
432 * device querying erros) -1 is returned.
434 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
435 * \returns 0 if no error was detected, otherwise >0
437 int do_quick_memtest(int dev_id)
439 cudaDeviceProp dev_prop;
440 int devmem, res, time=0;
442 if (debug) { time = getTimeMilliseconds(); }
444 if (do_sanity_checks(dev_id, &dev_prop) != 0)
446 // something went wrong
452 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
453 fprintf(debug, ">> Running QUICK memtests on %d MiB (out of total %d MiB), %d iterations\n",
454 QUICK_MEM, devmem, QUICK_ITER);
457 res = do_memtest(QUICK_TESTS, QUICK_MEM, QUICK_ITER);
461 fprintf(debug, "Q-RES = %d\n", res);
462 fprintf(debug, "Q-runtime: %d ms\n", getTimeMilliseconds() - time);
465 /* destroy context only if we created it */
466 if (dev_id !=-1) cudaThreadExit();
470 /*! \brief Runs a full memory test and returns 0 in case if no error is detected.
471 * If an error is detected it stops before completing the test and returns a
472 * value greater then 0. In case of other errors (e.g. kernel launch errors,
473 * device querying erros) -1 is returned.
475 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
476 * \returns 0 if no error was detected, otherwise >0
479 int do_full_memtest(int dev_id)
481 cudaDeviceProp dev_prop;
482 int devmem, res, time=0;
484 if (debug) { time = getTimeMilliseconds(); }
486 if (do_sanity_checks(dev_id, &dev_prop) != 0)
488 // something went wrong
492 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
496 fprintf(debug, ">> Running FULL memtests on %d MiB (out of total %d MiB), %d iterations\n",
497 devmem, devmem, FULL_ITER);
500 /* do all test on the entire memory */
501 res = do_memtest(FULL_TESTS, devmem, FULL_ITER);
505 fprintf(debug, "F-RES = %d\n", res);
506 fprintf(debug, "F-runtime: %d ms\n", getTimeMilliseconds() - time);
509 /* destroy context only if we created it */
510 if (dev_id != -1) cudaThreadExit();
514 /*! \brief Runs a time constrained memory test and returns 0 in case if no error is detected.
515 * If an error is detected it stops before completing the test and returns a value greater
516 * than zero. In case of other errors (e.g. kernel launch errors, device querying erros) -1
517 * is returned. Note, that test iterations are not interrupted therefor the total runtime of
518 * the test will always be multipple of one iteration's runtime.
520 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
521 * \param[in] time_constr the time limit of the testing
522 * \returns 0 if no error was detected, otherwise >0
524 int do_timed_memtest(int dev_id, int time_constr)
526 cudaDeviceProp dev_prop;
527 int devmem, res=0, time=0, startt;
529 if (debug) { time = getTimeMilliseconds(); }
531 time_constr *= 1000; /* convert to ms for convenience */
532 startt = getTimeMilliseconds();
534 if (do_sanity_checks(dev_id, &dev_prop) != 0)
536 // something went wrong
540 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
544 fprintf(debug, ">> Running time constrained memtests on %d MiB (out of total %d MiB), time limit of %d s \n",
545 devmem, devmem, time_constr);
548 /* do the TIMED_TESTS set, one step at a time on the entire memory
549 that can be allocated, and stop when the given time is exceeded */
550 while ( ((int)getTimeMilliseconds() - startt) < time_constr)
552 res = do_memtest(TIMED_TESTS, devmem, 1);
558 fprintf(debug, "T-RES = %d\n", res);
559 fprintf(debug, "T-runtime: %d ms\n", getTimeMilliseconds() - time);
562 /* destroy context only if we created it */
563 if (dev_id != -1) cudaThreadExit();
569 /*******************************************************
570 * The code below is for testing purposes. */
571 int do_custom_memtest(int dev_id)
573 cudaDeviceProp dev_prop;
574 int mem2test, /*devmem,*/ res;
575 // memtestState tester;
579 int time = getTimeMilliseconds();
582 if (do_sanity_checks(dev_id, &dev_prop) != 0)
585 // if ((res=tester.allocate(100))==0)
586 // printf("alloc failed\n");
587 // printf("alloc res = %d\n", res);
588 // res = tester.gpuMemoryBandwidth(bandwidth, tester.size(), 10);
589 // printf("Bandwidth on %d (res %d)= %5.2f\n", tester.size(), res, bandwidth);
590 // tester.deallocate();
592 // devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
596 printf(">> Running CUSTOM memtests [%x] on %d MiB, %d iterations\n",
597 QUICK_TESTS, mem2test, 1);
600 res = do_memtest(QUICK_TESTS, mem2test, 1);
604 printf("C-RES = %d\n", res);
605 printf("C-runtime: %d ms\n", getTimeMilliseconds() - time);
612 * Only for debugging purposes, compile with:
613 * nvcc -DLINUX -D_DEBUG_=2 -L -O -Xcompiler -Wall memtestG80_core.o gmx_gpu_utils.cu -o gmx_gpu_utils_test
615 int main( int argc, char** argv)
619 sprintf(msg, "Device #%d supported: ", dev_id);
620 switch (is_supported_cuda_gpu(dev_id, NULL))
622 case -1: strcat(msg, "error occured"); break;
623 case 0: strcat(msg, "no"); break;
624 case 1: strcat(msg, "yes"); break;
625 default: strcat(msg, "\nhmmm, you should not see this!");
629 printf("Doing memtest.\n");
630 printf("quick memtest result: %d\n", do_quick_memtest(dev_id));
631 printf("timed memtest result: %d\n", do_timed_memtest(dev_id, 15));
632 printf("full memtest result: %d\n", do_full_memtest(dev_id));
641 Functions only used if this file is compiled in debug mode (_DEBUG_ > 0)
642 when the gromacs version are not available.
643 - string trimming function - duplicated from ~/src/gmxlib/string2.c
644 - case agnostic straing compare
646 static void ltrim (char *str)
656 while ((tr[c] == ' ') || (tr[c] == '\t'))
663 static void rtrim (char *str)
671 while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) ) {
677 static void trim (char *str)
683 static int gmx_strncasecmp(const char* s1, const char* s2, int len)
685 return strncasecmp(s1, s2, len);