1 //===---- ManagedMemoryRewrite.cpp - Rewrite global & malloc'd memory -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Take a module and rewrite:
11 // 1. `malloc` -> `polly_mallocManaged`
12 // 2. `free` -> `polly_freeManaged`
13 // 3. global arrays with initializers -> global arrays that are initialized
14 // with a constructor call to
15 // `polly_mallocManaged`.
17 //===----------------------------------------------------------------------===//
19 #include "polly/CodeGen/CodeGeneration.h"
20 #include "polly/CodeGen/IslAst.h"
21 #include "polly/CodeGen/IslNodeBuilder.h"
22 #include "polly/CodeGen/PPCGCodeGeneration.h"
23 #include "polly/CodeGen/Utils.h"
24 #include "polly/DependenceInfo.h"
25 #include "polly/LinkAllPasses.h"
26 #include "polly/Options.h"
27 #include "polly/ScopDetection.h"
28 #include "polly/ScopInfo.h"
29 #include "polly/Support/SCEVValidator.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/BasicAliasAnalysis.h"
32 #include "llvm/Analysis/CaptureTracking.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
35 #include "llvm/Analysis/TargetLibraryInfo.h"
36 #include "llvm/Analysis/TargetTransformInfo.h"
37 #include "llvm/IR/LegacyPassManager.h"
38 #include "llvm/IR/Verifier.h"
39 #include "llvm/IRReader/IRReader.h"
40 #include "llvm/Linker/Linker.h"
41 #include "llvm/Support/TargetRegistry.h"
42 #include "llvm/Support/TargetSelect.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
48 static cl::opt
<bool> RewriteAllocas(
49 "polly-acc-rewrite-allocas",
51 "Ask the managed memory rewriter to also rewrite alloca instructions"),
52 cl::Hidden
, cl::init(false), cl::ZeroOrMore
, cl::cat(PollyCategory
));
54 static cl::opt
<bool> IgnoreLinkageForGlobals(
55 "polly-acc-rewrite-ignore-linkage-for-globals",
57 "By default, we only rewrite globals with internal linkage. This flag "
58 "enables rewriting of globals regardless of linkage"),
59 cl::Hidden
, cl::init(false), cl::ZeroOrMore
, cl::cat(PollyCategory
));
61 #define DEBUG_TYPE "polly-acc-rewrite-managed-memory"
64 static llvm::Function
*getOrCreatePollyMallocManaged(Module
&M
) {
65 const char *Name
= "polly_mallocManaged";
66 Function
*F
= M
.getFunction(Name
);
68 // If F is not available, declare it.
70 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
71 PollyIRBuilder
Builder(M
.getContext());
72 // TODO: How do I get `size_t`? I assume from DataLayout?
73 FunctionType
*Ty
= FunctionType::get(Builder
.getInt8PtrTy(),
74 {Builder
.getInt64Ty()}, false);
75 F
= Function::Create(Ty
, Linkage
, Name
, &M
);
81 static llvm::Function
*getOrCreatePollyFreeManaged(Module
&M
) {
82 const char *Name
= "polly_freeManaged";
83 Function
*F
= M
.getFunction(Name
);
85 // If F is not available, declare it.
87 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
88 PollyIRBuilder
Builder(M
.getContext());
89 // TODO: How do I get `size_t`? I assume from DataLayout?
91 FunctionType::get(Builder
.getVoidTy(), {Builder
.getInt8PtrTy()}, false);
92 F
= Function::Create(Ty
, Linkage
, Name
, &M
);
98 // Expand a constant expression `Cur`, which is used at instruction `Parent`
100 // Since a constant expression can expand to multiple instructions, store all
101 // the expands into a set called `Expands`.
102 // Note that this goes inorder on the constant expression tree.
104 // will be processed with first A, then B * D, then B, then D, and then C.
105 // Though ConstantExprs are not treated as "trees" but as DAGs, since you can
106 // have something like this:
112 // For the purposes of this expansion, we expand the two occurences of D
113 // separately. Therefore, we expand the DAG into the tree:
117 // TODO: We don't _have_to do this, but this is the simplest solution.
118 // We can write a solution that keeps track of which constants have been
120 static void expandConstantExpr(ConstantExpr
*Cur
, PollyIRBuilder
&Builder
,
121 Instruction
*Parent
, int index
,
122 SmallPtrSet
<Instruction
*, 4> &Expands
) {
123 assert(Cur
&& "invalid constant expression passed");
124 Instruction
*I
= Cur
->getAsInstruction();
125 assert(I
&& "unable to convert ConstantExpr to Instruction");
127 DEBUG(dbgs() << "Expanding ConstantExpression: (" << *Cur
128 << ") in Instruction: (" << *I
<< ")\n";);
130 // Invalidate `Cur` so that no one after this point uses `Cur`. Rather,
131 // they should mutate `I`.
135 Parent
->setOperand(index
, I
);
137 // The things that `Parent` uses (its operands) should be created
139 Builder
.SetInsertPoint(Parent
);
142 for (unsigned i
= 0; i
< I
->getNumOperands(); i
++) {
143 Value
*Op
= I
->getOperand(i
);
144 assert(isa
<Constant
>(Op
) && "constant must have a constant operand");
146 if (ConstantExpr
*CExprOp
= dyn_cast
<ConstantExpr
>(Op
))
147 expandConstantExpr(CExprOp
, Builder
, I
, i
, Expands
);
151 // Edit all uses of `OldVal` to NewVal` in `Inst`. This will rewrite
152 // `ConstantExpr`s that are used in the `Inst`.
153 // Note that `replaceAllUsesWith` is insufficient for this purpose because it
154 // does not rewrite values in `ConstantExpr`s.
155 static void rewriteOldValToNew(Instruction
*Inst
, Value
*OldVal
, Value
*NewVal
,
156 PollyIRBuilder
&Builder
) {
158 // This contains a set of instructions in which OldVal must be replaced.
159 // We start with `Inst`, and we fill it up with the expanded `ConstantExpr`s
160 // from `Inst`s arguments.
161 // We need to go through this process because `replaceAllUsesWith` does not
162 // actually edit `ConstantExpr`s.
163 SmallPtrSet
<Instruction
*, 4> InstsToVisit
= {Inst
};
165 // Expand all `ConstantExpr`s and place it in `InstsToVisit`.
166 for (unsigned i
= 0; i
< Inst
->getNumOperands(); i
++) {
167 Value
*Operand
= Inst
->getOperand(i
);
168 if (ConstantExpr
*ValueConstExpr
= dyn_cast
<ConstantExpr
>(Operand
))
169 expandConstantExpr(ValueConstExpr
, Builder
, Inst
, i
, InstsToVisit
);
172 // Now visit each instruction and use `replaceUsesOfWith`. We know that
173 // will work because `I` cannot have any `ConstantExpr` within it.
174 for (Instruction
*I
: InstsToVisit
)
175 I
->replaceUsesOfWith(OldVal
, NewVal
);
178 // Given a value `Current`, return all Instructions that may contain `Current`
180 // We need this auxiliary function, because if we have a
181 // `Constant` that is a user of `V`, we need to recurse into the
182 // `Constant`s uses to gather the root instruciton.
183 static void getInstructionUsersOfValue(Value
*V
,
184 SmallVector
<Instruction
*, 4> &Owners
) {
185 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
188 // Anything that is a `User` must be a constant or an instruction.
189 auto *C
= cast
<Constant
>(V
);
190 for (Use
&CUse
: C
->uses())
191 getInstructionUsersOfValue(CUse
.getUser(), Owners
);
196 replaceGlobalArray(Module
&M
, const DataLayout
&DL
, GlobalVariable
&Array
,
197 SmallPtrSet
<GlobalVariable
*, 4> &ReplacedGlobals
) {
198 // We only want arrays.
199 ArrayType
*ArrayTy
= dyn_cast
<ArrayType
>(Array
.getType()->getElementType());
202 Type
*ElemTy
= ArrayTy
->getElementType();
203 PointerType
*ElemPtrTy
= ElemTy
->getPointerTo();
205 // We only wish to replace arrays that are visible in the module they
206 // inhabit. Otherwise, our type edit from [T] to T* would be illegal across
208 const bool OnlyVisibleInsideModule
= Array
.hasPrivateLinkage() ||
209 Array
.hasInternalLinkage() ||
210 IgnoreLinkageForGlobals
;
211 if (!OnlyVisibleInsideModule
) {
212 DEBUG(dbgs() << "Not rewriting (" << Array
213 << ") to managed memory "
214 "because it could be visible externally. To force rewrite, "
215 "use -polly-acc-rewrite-ignore-linkage-for-globals.\n");
219 if (!Array
.hasInitializer() ||
220 !isa
<ConstantAggregateZero
>(Array
.getInitializer())) {
221 DEBUG(dbgs() << "Not rewriting (" << Array
222 << ") to managed memory "
223 "because it has an initializer which is "
224 "not a zeroinitializer.\n");
228 // At this point, we have committed to replacing this array.
229 ReplacedGlobals
.insert(&Array
);
231 std::string NewName
= Array
.getName();
233 GlobalVariable
*ReplacementToArr
=
234 cast
<GlobalVariable
>(M
.getOrInsertGlobal(NewName
, ElemPtrTy
));
235 ReplacementToArr
->setInitializer(ConstantPointerNull::get(ElemPtrTy
));
237 Function
*PollyMallocManaged
= getOrCreatePollyMallocManaged(M
);
238 std::string FnName
= Array
.getName();
239 FnName
+= ".constructor";
240 PollyIRBuilder
Builder(M
.getContext());
241 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
242 const GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
243 Function
*F
= Function::Create(Ty
, Linkage
, FnName
, &M
);
244 BasicBlock
*Start
= BasicBlock::Create(M
.getContext(), "entry", F
);
245 Builder
.SetInsertPoint(Start
);
247 int ArraySizeInt
= DL
.getTypeAllocSizeInBits(ArrayTy
) / 8;
248 Value
*ArraySize
= Builder
.getInt64(ArraySizeInt
);
249 ArraySize
->setName("array.size");
251 Value
*AllocatedMemRaw
=
252 Builder
.CreateCall(PollyMallocManaged
, {ArraySize
}, "mem.raw");
253 Value
*AllocatedMemTyped
=
254 Builder
.CreatePointerCast(AllocatedMemRaw
, ElemPtrTy
, "mem.typed");
255 Builder
.CreateStore(AllocatedMemTyped
, ReplacementToArr
);
256 Builder
.CreateRetVoid();
258 const int Priority
= 0;
259 appendToGlobalCtors(M
, F
, Priority
, ReplacementToArr
);
261 SmallVector
<Instruction
*, 4> ArrayUserInstructions
;
262 // Get all instructions that use array. We need to do this weird thing
263 // because `Constant`s that contain this array neeed to be expanded into
264 // instructions so that we can replace their parameters. `Constant`s cannot
265 // be edited easily, so we choose to convert all `Constant`s to
266 // `Instruction`s and handle all of the uses of `Array` uniformly.
267 for (Use
&ArrayUse
: Array
.uses())
268 getInstructionUsersOfValue(ArrayUse
.getUser(), ArrayUserInstructions
);
270 for (Instruction
*UserOfArrayInst
: ArrayUserInstructions
) {
272 Builder
.SetInsertPoint(UserOfArrayInst
);
274 Value
*ArrPtrLoaded
= Builder
.CreateLoad(ReplacementToArr
, "arrptr.load");
276 Value
*ArrPtrLoadedBitcasted
= Builder
.CreateBitCast(
277 ArrPtrLoaded
, ArrayTy
->getPointerTo(), "arrptr.bitcast");
278 rewriteOldValToNew(UserOfArrayInst
, &Array
, ArrPtrLoadedBitcasted
, Builder
);
282 // We return all `allocas` that may need to be converted to a call to
283 // cudaMallocManaged.
284 static void getAllocasToBeManaged(Function
&F
,
285 SmallSet
<AllocaInst
*, 4> &Allocas
) {
286 for (BasicBlock
&BB
: F
) {
287 for (Instruction
&I
: BB
) {
288 auto *Alloca
= dyn_cast
<AllocaInst
>(&I
);
291 DEBUG(dbgs() << "Checking if (" << *Alloca
<< ") may be captured: ");
293 if (PointerMayBeCaptured(Alloca
, /* ReturnCaptures */ false,
294 /* StoreCaptures */ true)) {
295 Allocas
.insert(Alloca
);
296 DEBUG(dbgs() << "YES (captured).\n");
298 DEBUG(dbgs() << "NO (not captured).\n");
304 static void rewriteAllocaAsManagedMemory(AllocaInst
*Alloca
,
305 const DataLayout
&DL
) {
306 DEBUG(dbgs() << "rewriting: (" << *Alloca
<< ") to managed mem.\n");
307 Module
*M
= Alloca
->getModule();
308 assert(M
&& "Alloca does not have a module");
310 PollyIRBuilder
Builder(M
->getContext());
311 Builder
.SetInsertPoint(Alloca
);
313 Value
*MallocManagedFn
= getOrCreatePollyMallocManaged(*Alloca
->getModule());
314 const int Size
= DL
.getTypeAllocSize(Alloca
->getType()->getElementType());
315 Value
*SizeVal
= Builder
.getInt64(Size
);
316 Value
*RawManagedMem
= Builder
.CreateCall(MallocManagedFn
, {SizeVal
});
317 Value
*Bitcasted
= Builder
.CreateBitCast(RawManagedMem
, Alloca
->getType());
319 Function
*F
= Alloca
->getFunction();
320 assert(F
&& "Alloca has invalid function");
322 Bitcasted
->takeName(Alloca
);
323 Alloca
->replaceAllUsesWith(Bitcasted
);
324 Alloca
->eraseFromParent();
326 for (BasicBlock
&BB
: *F
) {
327 ReturnInst
*Return
= dyn_cast
<ReturnInst
>(BB
.getTerminator());
330 Builder
.SetInsertPoint(Return
);
332 Value
*FreeManagedFn
= getOrCreatePollyFreeManaged(*M
);
333 Builder
.CreateCall(FreeManagedFn
, {RawManagedMem
});
337 // Replace all uses of `Old` with `New`, even inside `ConstantExpr`.
339 // `replaceAllUsesWith` does replace values in `ConstantExpr`. This function
340 // actually does replace it in `ConstantExpr`. The caveat is that if there is
341 // a use that is *outside* a function (say, at global declarations), we fail.
342 // So, this is meant to be used on values which we know will only be used
345 // This process works by looking through the uses of `Old`. If it finds a
346 // `ConstantExpr`, it recursively looks for the owning instruction.
347 // Then, it expands all the `ConstantExpr` to instructions and replaces
348 // `Old` with `New` in the expanded instructions.
349 static void replaceAllUsesAndConstantUses(Value
*Old
, Value
*New
,
350 PollyIRBuilder
&Builder
) {
351 SmallVector
<Instruction
*, 4> UserInstructions
;
352 // Get all instructions that use array. We need to do this weird thing
353 // because `Constant`s that contain this array neeed to be expanded into
354 // instructions so that we can replace their parameters. `Constant`s cannot
355 // be edited easily, so we choose to convert all `Constant`s to
356 // `Instruction`s and handle all of the uses of `Array` uniformly.
357 for (Use
&ArrayUse
: Old
->uses())
358 getInstructionUsersOfValue(ArrayUse
.getUser(), UserInstructions
);
360 for (Instruction
*I
: UserInstructions
)
361 rewriteOldValToNew(I
, Old
, New
, Builder
);
364 class ManagedMemoryRewritePass
: public ModulePass
{
367 GPUArch Architecture
;
370 ManagedMemoryRewritePass() : ModulePass(ID
) {}
371 virtual bool runOnModule(Module
&M
) {
372 const DataLayout
&DL
= M
.getDataLayout();
374 Function
*Malloc
= M
.getFunction("malloc");
377 PollyIRBuilder
Builder(M
.getContext());
378 Function
*PollyMallocManaged
= getOrCreatePollyMallocManaged(M
);
379 assert(PollyMallocManaged
&& "unable to create polly_mallocManaged");
381 replaceAllUsesAndConstantUses(Malloc
, PollyMallocManaged
, Builder
);
382 Malloc
->eraseFromParent();
385 Function
*Free
= M
.getFunction("free");
388 PollyIRBuilder
Builder(M
.getContext());
389 Function
*PollyFreeManaged
= getOrCreatePollyFreeManaged(M
);
390 assert(PollyFreeManaged
&& "unable to create polly_freeManaged");
392 replaceAllUsesAndConstantUses(Free
, PollyFreeManaged
, Builder
);
393 Free
->eraseFromParent();
396 SmallPtrSet
<GlobalVariable
*, 4> GlobalsToErase
;
397 for (GlobalVariable
&Global
: M
.globals())
398 replaceGlobalArray(M
, DL
, Global
, GlobalsToErase
);
399 for (GlobalVariable
*G
: GlobalsToErase
)
400 G
->eraseFromParent();
402 // Rewrite allocas to cudaMallocs if we are asked to do so.
403 if (RewriteAllocas
) {
404 SmallSet
<AllocaInst
*, 4> AllocasToBeManaged
;
405 for (Function
&F
: M
.functions())
406 getAllocasToBeManaged(F
, AllocasToBeManaged
);
408 for (AllocaInst
*Alloca
: AllocasToBeManaged
)
409 rewriteAllocaAsManagedMemory(Alloca
, DL
);
417 char ManagedMemoryRewritePass::ID
= 42;
419 Pass
*polly::createManagedMemoryRewritePassPass(GPUArch Arch
,
420 GPURuntime Runtime
) {
421 ManagedMemoryRewritePass
*pass
= new ManagedMemoryRewritePass();
422 pass
->Runtime
= Runtime
;
423 pass
->Architecture
= Arch
;
427 INITIALIZE_PASS_BEGIN(
428 ManagedMemoryRewritePass
, "polly-acc-rewrite-managed-memory",
429 "Polly - Rewrite all allocations in heap & data section to managed memory",
431 INITIALIZE_PASS_DEPENDENCY(PPCGCodeGeneration
);
432 INITIALIZE_PASS_DEPENDENCY(DependenceInfo
);
433 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
434 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
435 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
436 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
437 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
439 ManagedMemoryRewritePass
, "polly-acc-rewrite-managed-memory",
440 "Polly - Rewrite all allocations in heap & data section to managed memory",