added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / gmxlib / gpu_utils / memtestG80_core.h
blob079a67fcb5cb77debeab3f68cbf8890f22cc09d3
1 /*
2 * memtestG80_core.h
3 * Public API for core memory test functions for MemtestG80
4 * Includes functional and OO interfaces to GPU test functions.
6 * Author: Imran Haque, 2009
7 * Copyright 2009, Stanford University
9 * This file is licensed under the terms of the LGPL. Please see
10 * the COPYING file in the accompanying source distribution for
11 * full license terms.
14 #ifndef _MEMTESTG80_CORE_H_
15 #define _MEMTESTG80_CORE_H_
17 #if defined (WINDOWS) || defined (WINNV)
18 #include <windows.h>
19 inline unsigned int getTimeMilliseconds(void) {
20 return GetTickCount();
22 #include <windows.h>
23 #define SLEEPMS(x) Sleep(x)
24 #elif defined (LINUX) || defined (OSX)
25 #include <sys/time.h>
26 inline unsigned int getTimeMilliseconds(void) {
27 struct timeval tv;
28 gettimeofday(&tv,NULL);
29 return tv.tv_sec*1000 + tv.tv_usec/1000;
31 #include <unistd.h>
32 #define SLEEPMS(x) usleep(x*1000)
33 #else
34 #error Must #define LINUX, WINDOWS, WINNV, or OSX
35 #endif
37 // By default the driver will spinwait when blocked on a kernel call
38 // Use the SOFTWAIT macro to replace this with a thread sleep and occasional poll
39 // limit expresses the max time we're willing to stay in the sleep loop - default = 15sec
40 inline int _pollStatus(unsigned length=1,unsigned limit=15000) {
41 //while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(length);}
42 unsigned startTime = getTimeMilliseconds();
43 while (cudaStreamQuery(0) == cudaErrorNotReady) {
44 if ((getTimeMilliseconds() - startTime) > limit) return -1;
45 SLEEPMS(length);
47 return 0;
49 #define SOFTWAIT() if (_pollStatus() != 0) {return 0xFFFFFFFE;} // -2
50 #define SOFTWAIT_LIM(lim) if (_pollStatus(1,lim) != 0) {return 0xFFFFFFFE;} // -2
51 //#define SOFTWAIT()
52 //#define SOFTWAIT(delay) if (_pollStatus(delay) != 0) return -2;
53 //#define SOFTWAIT(delay,limit) if (_pollStatus(delay,limit) != 0) return -2;
54 //#define SOFTWAIT() while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(1);}
55 //#define SOFTWAIT(x) while (cudaStreamQuery(0) != cudaSuccess) {SLEEPMS(x);}
56 //#define SOFTWAIT()
58 // Use this macro to check for kernel errors
59 #define CHECK_LAUNCH_ERROR() if (cudaGetLastError() != cudaSuccess) {return 0xFFFFFFFF; /* -1 */}
62 typedef unsigned int uint;
64 // OO interface to MemtestG80 functions
65 class memtestState {
66 protected:
67 const uint nBlocks;
68 const uint nThreads;
69 uint loopIters;
70 uint megsToTest;
71 int lcgPeriod;
72 uint* devTestMem;
73 uint* devTempMem;
74 uint* hostTempMem;
75 bool allocated;
76 public:
77 uint initTime;
78 memtestState() : nBlocks(1024), nThreads(512), loopIters(0), megsToTest(0), allocated(false), devTestMem(NULL),devTempMem(NULL),hostTempMem(NULL), initTime(0),lcgPeriod(1024) {};
79 ~memtestState() {deallocate();}
81 uint allocate(uint mbToTest);
82 void deallocate();
83 bool isAllocated() const {return allocated;}
84 uint size() const {return megsToTest;}
85 void setLCGPeriod(int period) {lcgPeriod = period;}
86 int getLCGPeriod() const {return lcgPeriod;}
88 bool gpuMemoryBandwidth(double& bandwidth,uint mbToTest,uint iters=5);
89 bool gpuWriteConstant(const uint constant) const;
90 bool gpuVerifyConstant(uint& errorCount,const uint constant) const;
91 bool gpuShortLCG0(uint& errorCount,const uint repeats) const;
92 bool gpuShortLCG0Shmem(uint& errorCount,const uint repeats) const;
93 bool gpuMovingInversionsOnesZeros(uint& errorCount) const;
94 bool gpuWalking8BitM86(uint& errorCount,const uint shift) const;
95 bool gpuWalking8Bit(uint& errorCount,const bool ones,const uint shift) const;
96 bool gpuMovingInversionsRandom(uint& errorCount) const;
97 bool gpuWalking32Bit(uint& errorCount,const bool ones,const uint shift) const;
98 bool gpuRandomBlocks(uint& errorCount,const uint seed) const;
99 bool gpuModuloX(uint& errorCount,const uint shift,const uint pattern,const uint modulus,const uint overwriteIters) const;
102 // Utility functions
103 __host__ double gpuMemoryBandwidth(uint* src,uint* dst,uint mbToTest,uint iters);
104 __host__ void gpuWriteConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant);
105 __host__ uint gpuVerifyConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant,uint* blockErrorCount,uint* errorCounts);
107 __host__ void cpuWriteConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant);
108 __host__ uint cpuVerifyConstant(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint constant);
110 // Logic tests
111 __host__ uint gpuShortLCG0(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint repeats,const int period,uint* blockErrorCounts,uint* errorCounts);
112 __host__ uint gpuShortLCG0Shmem(const uint nBlocks,const uint nThreads,uint* base,uint N,const uint repeats,const int period,uint* blockErrorCounts,uint* errorCounts);
114 // Memtest86 Test 2: tseq=0,4
115 __host__ uint gpuMovingInversionsOnesZeros(const uint nBlocks,const uint nThreads,uint* base,uint N,uint* blockErrorCounts,uint* errorCounts);
117 // Memtest86 Test 3: tseq=1
118 __host__ uint gpuWalking8BitM86(const uint nBlocks,const uint nThreads,uint* base,uint N,uint shift,uint* blockErrorCounts,uint* errorCounts);
119 __host__ uint cpuWalking8BitM86(const uint nBlocks,const uint nThreads,uint* base,uint N,uint shift);
120 __host__ uint gpuWalking8Bit(const uint nBlocks,const uint nThreads,uint* base,uint N,bool ones,uint shift,uint* blockErrorCount,uint* errorCounts);
122 // Memtest86 Test 4: tseq=10
123 __host__ uint gpuMovingInversionsRandom(const uint nBlocks,const uint nThreads,uint* base,uint N,uint* blockErrorCounts,uint* errorCounts);
125 // Memtest86 Test 6: tseq=2
126 __host__ uint gpuWalking32Bit(const uint nBlocks,const uint nThreads,uint* base,uint N,bool ones,uint shift,uint* blockErrorCount,uint* errorCounts);
128 // Memtest86 Test 7: tseq=9
129 __host__ uint gpuRandomBlocks(const uint nBlocks,const uint nThreads,uint* base,uint N,uint seed,uint* blockErrorCount,uint* errorCounts);
131 // Memtest86 Test 8: tseq=3 (M86 uses modulus = 20)
132 __host__ uint gpuModuloX(const uint nBlocks,const uint nThreads,uint* base,const uint N,uint shift,uint pattern1,const uint modulus,const uint iters,uint* blockErrorCount,uint* errorCounts);
134 #endif