src/gmxlib/gpu_utils/memtestG80_core.h

   1 /*
   2  * memtestG80_core.h
   3  * Public API for core memory test functions for MemtestG80
   4  * Includes functional and OO interfaces to GPU test functions.
   5  *
   6  * Author: Imran Haque, 2009
   7  * Copyright 2009, Stanford University
   8  *
   9  * This file is licensed under the terms of the LGPL. Please see
  10  * the COPYING file in the accompanying source distribution for
  11  * full license terms.
  12  *
  13  */
  14 #ifndef _MEMTESTG80_CORE_H_
  15 #define _MEMTESTG80_CORE_H_
  16
  17 #if defined (WINDOWS) || defined (WINNV)
  18     #include <windows.h>
  19     inline unsigned int getTimeMilliseconds(void) {
  20         return GetTickCount();
  21     }
  22     #include <windows.h>
  23         #define SLEEPMS(x) Sleep(x)
  24 #elif defined (LINUX) || defined (OSX)
  25     #include <sys/time.h>
  26     inline unsigned int getTimeMilliseconds(void) {
  27         struct timeval tv;
  28         gettimeofday(&tv,NULL);
  29         return tv.tv_sec*1000 + tv.tv_usec/1000;
  30     }
  31     #include <unistd.h>
  32     #define SLEEPMS(x) usleep(x*1000)
  33 #else
  34     #error Must #define LINUX, WINDOWS, WINNV, or OSX
  35 #endif
  36
  37 // By default the driver will spinwait when blocked on a kernel call
  38 // Use the SOFTWAIT macro to replace this with a thread sleep and occasional poll
  39 // limit expresses the max time we're willing to stay in the sleep loop - default = 15sec
  40 inline int _pollStatus(unsigned length=1,unsigned limit=15000) {
  41     //while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(length);}
  42     unsigned startTime = getTimeMilliseconds();
  43     while (cudaStreamQuery(0) == cudaErrorNotReady) {
  44         if ((getTimeMilliseconds() - startTime) > limit) return -1;
  45         SLEEPMS(length);
  46     }
  47     return 0;
  48 }
  49 #define SOFTWAIT() if (_pollStatus() != 0) {return 0xFFFFFFFE;} // -2
  50 #define SOFTWAIT_LIM(lim) if (_pollStatus(1,lim) != 0) {return 0xFFFFFFFE;} // -2
  51 //#define SOFTWAIT()
  52 //#define SOFTWAIT(delay) if (_pollStatus(delay) != 0) return -2;
  53 //#define SOFTWAIT(delay,limit) if (_pollStatus(delay,limit) != 0) return -2;
  54 //#define SOFTWAIT() while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(1);}
  55 //#define SOFTWAIT(x) while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(x);}
  56 //#define SOFTWAIT()
  57
  58 // Use this macro to check for kernel errors
  59 #define CHECK_LAUNCH_ERROR() if (cudaGetLastError() != cudaSuccess) {return 0xFFFFFFFF; /* -1 */}
  60
  61
  62 typedef unsigned int uint;
  63
  64 // OO interface to MemtestG80 functions
  65 class memtestState {
  66 protected:
  67         const uint nBlocks;
  68         const uint nThreads;
  69     uint loopIters;
  70         uint megsToTest;
  71     int lcgPeriod;
  72         uint* devTestMem;
  73         uint* devTempMem;
  74         uint* hostTempMem;
  75         bool allocated;
  76 public:
  77     uint initTime;
  78     memtestState() : nBlocks(1024), nThreads(512), loopIters(0), megsToTest(0), allocated(false), devTestMem(NULL),devTempMem(NULL),hostTempMem(NULL), initTime(0),lcgPeriod(1024) {};
  79     ~memtestState() {deallocate();}
  80
  81         uint allocate(uint mbToTest);
  82         void deallocate();
  83         bool isAllocated() const {return allocated;}
  84         uint size() const {return megsToTest;}
  85     void setLCGPeriod(int period) {lcgPeriod = period;}
  86     int getLCGPeriod() const {return lcgPeriod;}
  87
  88     bool gpuMemoryBandwidth(double& bandwidth,uint mbToTest,uint iters=5);
  89         bool gpuWriteConstant(const uint constant) const;
  90         bool gpuVerifyConstant(uint& errorCount,const uint constant) const;
  91         bool gpuShortLCG0(uint& errorCount,const uint repeats) const;
  92         bool gpuShortLCG0Shmem(uint& errorCount,const uint repeats) const;
  93         bool gpuMovingInversionsOnesZeros(uint& errorCount) const;
  94         bool gpuWalking8BitM86(uint& errorCount,const uint shift) const;
  95         bool gpuWalking8Bit(uint& errorCount,const bool ones,const uint shift) const;
  96         bool gpuMovingInversionsRandom(uint& errorCount) const;
  97         bool gpuWalking32Bit(uint& errorCount,const bool ones,const uint shift) const;
  98         bool gpuRandomBlocks(uint& errorCount,const uint seed) const;
  99         bool gpuModuloX(uint& errorCount,const uint shift,const uint pattern,const uint modulus,const uint overwriteIters) const;
 100 };
 101
 102 // Utility functions
 103 __host__ double gpuMemoryBandwidth(uint* src,uint* dst,uint mbToTest,uint iters);
 104 __host__ void gpuWriteConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant);
 105 __host__ uint gpuVerifyConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant,uint* blockErrorCount,uint* errorCounts);
 106
 107 __host__ void cpuWriteConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant);
 108 __host__ uint cpuVerifyConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant);
 109
 110 // Logic tests
 111 __host__ uint gpuShortLCG0(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint repeats,const int period,uint* blockErrorCounts,uint* errorCounts);
 112 __host__ uint gpuShortLCG0Shmem(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint repeats,const int period,uint* blockErrorCounts,uint* errorCounts);
 113
 114 // Memtest86 Test 2: tseq=0,4
 115 __host__ uint gpuMovingInversionsOnesZeros(const uint nBlocks,const uint nThreads,uint* base,uint N,uint* blockErrorCounts,uint* errorCounts);
 116
 117 // Memtest86 Test 3: tseq=1
 118 __host__ uint gpuWalking8BitM86(const uint nBlocks,const uint nThreads,uint* base,uint N,uint shift,uint* blockErrorCounts,uint* errorCounts);
 119 __host__ uint cpuWalking8BitM86(const uint nBlocks,const uint nThreads,uint* base,uint N,uint shift);
 120 __host__ uint gpuWalking8Bit(const uint nBlocks,const uint nThreads,uint* base,uint N,bool ones,uint shift,uint* blockErrorCount,uint* errorCounts);
 121
 122 // Memtest86 Test 4: tseq=10
 123 __host__ uint gpuMovingInversionsRandom(const uint nBlocks,const uint nThreads,uint* base,uint N,uint* blockErrorCounts,uint* errorCounts);
 124
 125 // Memtest86 Test 6: tseq=2
 126 __host__ uint gpuWalking32Bit(const uint nBlocks,const uint nThreads,uint* base,uint N,bool ones,uint shift,uint* blockErrorCount,uint* errorCounts);
 127 //
 128 // Memtest86 Test 7: tseq=9
 129 __host__ uint gpuRandomBlocks(const uint nBlocks,const uint nThreads,uint* base,uint N,uint seed,uint* blockErrorCount,uint* errorCounts);
 130
 131 // Memtest86 Test 8: tseq=3 (M86 uses modulus = 20)
 132 __host__ uint gpuModuloX(const uint nBlocks,const uint nThreads,uint* base,const uint N,uint shift,uint pattern1,const uint modulus,const uint iters,uint* blockErrorCount,uint* errorCounts);
 133
 134 #endif