3 * Public API for core memory test functions for MemtestG80
4 * Includes functional and OO interfaces to GPU test functions.
6 * Author: Imran Haque, 2009
7 * Copyright 2009, Stanford University
9 * This file is licensed under the terms of the LGPL. Please see
10 * the COPYING file in the accompanying source distribution for
14 #ifndef _MEMTESTG80_CORE_H_
15 #define _MEMTESTG80_CORE_H_
17 #if defined (WINDOWS) || defined (WINNV)
19 inline unsigned int getTimeMilliseconds(void) {
20 return GetTickCount();
23 #define SLEEPMS(x) Sleep(x)
24 #elif defined (LINUX) || defined (OSX)
26 inline unsigned int getTimeMilliseconds(void) {
28 gettimeofday(&tv
,NULL
);
29 return tv
.tv_sec
*1000 + tv
.tv_usec
/1000;
32 #define SLEEPMS(x) usleep(x*1000)
34 #error Must #define LINUX, WINDOWS, WINNV, or OSX
37 // By default the driver will spinwait when blocked on a kernel call
38 // Use the SOFTWAIT macro to replace this with a thread sleep and occasional poll
39 // limit expresses the max time we're willing to stay in the sleep loop - default = 15sec
40 inline int _pollStatus(unsigned length
=1,unsigned limit
=15000) {
41 //while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(length);}
42 unsigned startTime
= getTimeMilliseconds();
43 while (cudaStreamQuery(0) == cudaErrorNotReady
) {
44 if ((getTimeMilliseconds() - startTime
) > limit
) return -1;
49 #define SOFTWAIT() if (_pollStatus() != 0) {return 0xFFFFFFFE;} // -2
50 #define SOFTWAIT_LIM(lim) if (_pollStatus(1,lim) != 0) {return 0xFFFFFFFE;} // -2
52 //#define SOFTWAIT(delay) if (_pollStatus(delay) != 0) return -2;
53 //#define SOFTWAIT(delay,limit) if (_pollStatus(delay,limit) != 0) return -2;
54 //#define SOFTWAIT() while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(1);}
55 //#define SOFTWAIT(x) while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(x);}
58 // Use this macro to check for kernel errors
59 #define CHECK_LAUNCH_ERROR() if (cudaGetLastError() != cudaSuccess) {return 0xFFFFFFFF; /* -1 */}
62 typedef unsigned int uint
;
64 // OO interface to MemtestG80 functions
78 memtestState() : nBlocks(1024), nThreads(512), loopIters(0), megsToTest(0), allocated(false), devTestMem(NULL
),devTempMem(NULL
),hostTempMem(NULL
), initTime(0),lcgPeriod(1024) {};
79 ~memtestState() {deallocate();}
81 uint
allocate(uint mbToTest
);
83 bool isAllocated() const {return allocated
;}
84 uint
size() const {return megsToTest
;}
85 void setLCGPeriod(int period
) {lcgPeriod
= period
;}
86 int getLCGPeriod() const {return lcgPeriod
;}
88 bool gpuMemoryBandwidth(double& bandwidth
,uint mbToTest
,uint iters
=5);
89 bool gpuWriteConstant(const uint constant
) const;
90 bool gpuVerifyConstant(uint
& errorCount
,const uint constant
) const;
91 bool gpuShortLCG0(uint
& errorCount
,const uint repeats
) const;
92 bool gpuShortLCG0Shmem(uint
& errorCount
,const uint repeats
) const;
93 bool gpuMovingInversionsOnesZeros(uint
& errorCount
) const;
94 bool gpuWalking8BitM86(uint
& errorCount
,const uint shift
) const;
95 bool gpuWalking8Bit(uint
& errorCount
,const bool ones
,const uint shift
) const;
96 bool gpuMovingInversionsRandom(uint
& errorCount
) const;
97 bool gpuWalking32Bit(uint
& errorCount
,const bool ones
,const uint shift
) const;
98 bool gpuRandomBlocks(uint
& errorCount
,const uint seed
) const;
99 bool gpuModuloX(uint
& errorCount
,const uint shift
,const uint pattern
,const uint modulus
,const uint overwriteIters
) const;
103 __host__
double gpuMemoryBandwidth(uint
* src
,uint
* dst
,uint mbToTest
,uint iters
);
104 __host__
void gpuWriteConstant(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,const uint constant
);
105 __host__ uint
gpuVerifyConstant(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,const uint constant
,uint
* blockErrorCount
,uint
* errorCounts
);
107 __host__
void cpuWriteConstant(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,const uint constant
);
108 __host__ uint
cpuVerifyConstant(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,const uint constant
);
111 __host__ uint
gpuShortLCG0(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,const uint repeats
,const int period
,uint
* blockErrorCounts
,uint
* errorCounts
);
112 __host__ uint
gpuShortLCG0Shmem(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,const uint repeats
,const int period
,uint
* blockErrorCounts
,uint
* errorCounts
);
114 // Memtest86 Test 2: tseq=0,4
115 __host__ uint
gpuMovingInversionsOnesZeros(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,uint
* blockErrorCounts
,uint
* errorCounts
);
117 // Memtest86 Test 3: tseq=1
118 __host__ uint
gpuWalking8BitM86(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,uint shift
,uint
* blockErrorCounts
,uint
* errorCounts
);
119 __host__ uint
cpuWalking8BitM86(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,uint shift
);
120 __host__ uint
gpuWalking8Bit(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,bool ones
,uint shift
,uint
* blockErrorCount
,uint
* errorCounts
);
122 // Memtest86 Test 4: tseq=10
123 __host__ uint
gpuMovingInversionsRandom(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,uint
* blockErrorCounts
,uint
* errorCounts
);
125 // Memtest86 Test 6: tseq=2
126 __host__ uint
gpuWalking32Bit(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,bool ones
,uint shift
,uint
* blockErrorCount
,uint
* errorCounts
);
128 // Memtest86 Test 7: tseq=9
129 __host__ uint
gpuRandomBlocks(const uint nBlocks
,const uint nThreads
,uint
* base
,uint N
,uint seed
,uint
* blockErrorCount
,uint
* errorCounts
);
131 // Memtest86 Test 8: tseq=3 (M86 uses modulus = 20)
132 __host__ uint
gpuModuloX(const uint nBlocks
,const uint nThreads
,uint
* base
,const uint N
,uint shift
,uint pattern1
,const uint modulus
,const uint iters
,uint
* blockErrorCount
,uint
* errorCounts
);