From 4188beededece490681806211bf28e8f214f56c6 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Mon, 25 Jul 2016 12:47:33 +0000 Subject: [PATCH] GPGPU: Complete code to allocate and free device arrays At the beginning of each SCoP, we allocate device arrays for all arrays used on the GPU and we free such arrays after the SCoP has been executed. git-svn-id: https://llvm.org/svn/llvm-project/polly/trunk@276635 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PPCGCodeGeneration.cpp | 49 ++++++++++++++++++++++++++++++++++---- test/GPGPU/double-parallel-loop.ll | 3 ++- tools/GPURuntime/GPUJIT.c | 26 ++++++++++++++++++++ tools/GPURuntime/GPUJIT.h | 5 +++- 4 files changed, 77 insertions(+), 6 deletions(-) diff --git a/lib/CodeGen/PPCGCodeGeneration.cpp b/lib/CodeGen/PPCGCodeGeneration.cpp index 012ae34d..872b2f60 100644 --- a/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/lib/CodeGen/PPCGCodeGeneration.cpp @@ -148,6 +148,9 @@ private: /// more. std::vector LocalArrays; + /// A list of device arrays that has been allocated. + std::vector AllocatedDevArrays; + /// The current GPU context. Value *GPUContext; @@ -266,8 +269,12 @@ private: /// dump its IR to stderr. void finalizeKernelFunction(); + /// Create code that allocates memory to store arrays on device. void allocateDeviceArrays(); + /// Free all allocated device arrays. + void freeDeviceArrays(); + /// Create a call to initialize the GPU context. /// /// @returns A pointer to the newly initialized context. @@ -278,7 +285,17 @@ private: /// @param Context A pointer to an initialized GPU context. void createCallFreeContext(Value *Context); + /// Create a call to allocate memory on the device. + /// + /// @param Size The size of memory to allocate + /// + /// @returns A pointer that identifies this allocation. Value *createCallAllocateMemoryForDevice(Value *Size); + + /// Create a call to free a device array. + /// + /// @param Array The device array to free. + void createCallFreeDeviceMemory(Value *Array); }; void GPUNodeBuilder::initializeAfterRTH() { @@ -287,6 +304,7 @@ void GPUNodeBuilder::initializeAfterRTH() { } void GPUNodeBuilder::finalize() { + freeDeviceArrays(); createCallFreeContext(GPUContext); IslNodeBuilder::finalize(); } @@ -296,8 +314,8 @@ void GPUNodeBuilder::allocateDeviceArrays() { for (int i = 0; i < Prog->n_array; ++i) { gpu_array_info *Array = &Prog->array[i]; - std::string DevPtrName("p_devptr_"); - DevPtrName.append(Array->name); + std::string DevArrayName("p_dev_array_"); + DevArrayName.append(Array->name); Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); @@ -315,13 +333,36 @@ void GPUNodeBuilder::allocateDeviceArrays() { ArraySize = Builder.CreateMul(ArraySize, NumElements); } - Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize); - DevPtr->setName(DevPtrName); + Value *DevArray = createCallAllocateMemoryForDevice(ArraySize); + DevArray->setName(DevArrayName); + AllocatedDevArrays.push_back(DevArray); } isl_ast_build_free(Build); } +void GPUNodeBuilder::freeDeviceArrays() { + for (auto &Array : AllocatedDevArrays) + createCallFreeDeviceMemory(Array); +} + +void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { + const char *Name = "polly_freeDeviceMemory"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector Args; + Args.push_back(Builder.getInt8PtrTy()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F, {Array}); +} + Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { const char *Name = "polly_allocateMemoryForDevice"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); diff --git a/test/GPGPU/double-parallel-loop.ll b/test/GPGPU/double-parallel-loop.ll index 33153aea..1563f0c2 100644 --- a/test/GPGPU/double-parallel-loop.ll +++ b/test/GPGPU/double-parallel-loop.ll @@ -93,7 +93,8 @@ ; IR: polly.start: ; IR-NEXT: [[GPUContext:%.*]] = call i8* @polly_initContext() -; IR-NEXT: %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304) +; IR-NEXT: %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304) +; IR-NEXT: call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef_A) ; IR-NEXT: call void @polly_freeContext(i8* [[GPUContext]]) ; IR-NEXT: br label %polly.exiting diff --git a/tools/GPURuntime/GPUJIT.c b/tools/GPURuntime/GPUJIT.c index ec971580..41801fa1 100644 --- a/tools/GPURuntime/GPUJIT.c +++ b/tools/GPURuntime/GPUJIT.c @@ -339,6 +339,32 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth, debug_print("CUDA kernel launched.\n"); } +void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation) { + dump_function(); + CuMemFreeFcnPtr((CUdeviceptr)Allocation->Cuda); + free(Allocation); +} + +PollyGPUDevicePtr *polly_allocateMemoryForDevice(long MemSize) { + dump_function(); + + PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr)); + + if (DevData == 0) { + fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n"); + exit(-1); + } + + CUresult Res = CuMemAllocFcnPtr(&(DevData->Cuda), MemSize); + + if (Res != CUDA_SUCCESS) { + fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n"); + exit(-1); + } + + return DevData; +} + void polly_freeContext(PollyGPUContext *Context) { dump_function(); diff --git a/tools/GPURuntime/GPUJIT.h b/tools/GPURuntime/GPUJIT.h index 0ded86b2..516475d1 100644 --- a/tools/GPURuntime/GPUJIT.h +++ b/tools/GPURuntime/GPUJIT.h @@ -47,7 +47,7 @@ * PollyGPUModule *Module; * PollyGPUFunction *Kernel; * PollyGPUContext *Context; - * PollyGPUDevicePtr *PtrDevData; + * PollyGPUDevicePtr *DevArray; * int *HostData; * int MemSize; * int BlockWidth = 16; @@ -57,11 +57,13 @@ * * MemSize = 256*64*sizeof(int); * Context = polly_initContext(); + * DevArray = polly_allocateMemoryForDevice(MemSize); * polly_getPTXModule(KernelString, &Module); * polly_getPTXKernelEntry(Entry, Module, &Kernel); * polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData); * polly_launchKernel(Kernel, GridWidth, GridHeight); * polly_copyFromDeviceToHost(HostData, DevData, MemSize); + * polly_freeDeviceMemory(DevArray); * polly_freeContext(Context); * } * @@ -84,5 +86,6 @@ void polly_setKernelParameters(PollyGPUFunction *Kernel, int BlockWidth, int BlockHeight, PollyGPUDevicePtr *DevData); void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth, int GridHeight); +void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation); void polly_freeContext(PollyGPUContext *Context); #endif /* GPUJIT_H_ */ -- 2.11.4.GIT