Add comment for querying --libdir. NFC.
[polly-mirror.git] / lib / CodeGen / LoopGenerators.cpp
blob149cf2cef734bee9ad705e12dee7a7184397ae43
1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and parallel loops as LLVM-IR.
12 //===----------------------------------------------------------------------===//
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/ScopDetection.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23 using namespace llvm;
24 using namespace polly;
26 static cl::opt<int>
27 PollyNumThreads("polly-num-threads",
28 cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
29 cl::init(0));
31 // We generate a loop of either of the following structures:
33 // BeforeBB BeforeBB
34 // | |
35 // v v
36 // GuardBB PreHeaderBB
37 // / | | _____
38 // __ PreHeaderBB | v \/ |
39 // / \ / | HeaderBB latch
40 // latch HeaderBB | |\ |
41 // \ / \ / | \------/
42 // < \ / |
43 // \ / v
44 // ExitBB ExitBB
46 // depending on whether or not we know that it is executed at least once. If
47 // not, GuardBB checks if the loop is executed at least once. If this is the
48 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
49 // contains the loop iv 'polly.indvar', the incremented loop iv
50 // 'polly.indvar_next' as well as the condition to check if we execute another
51 // iteration of the loop. After the loop has finished, we branch to ExitBB.
52 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
53 PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
54 DominatorTree &DT, BasicBlock *&ExitBB,
55 ICmpInst::Predicate Predicate,
56 ScopAnnotator *Annotator, bool Parallel,
57 bool UseGuard) {
58 Function *F = Builder.GetInsertBlock()->getParent();
59 LLVMContext &Context = F->getContext();
61 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
62 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
63 assert(LoopIVType && "UB is not integer?");
65 BasicBlock *BeforeBB = Builder.GetInsertBlock();
66 BasicBlock *GuardBB =
67 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
68 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
69 BasicBlock *PreHeaderBB =
70 BasicBlock::Create(Context, "polly.loop_preheader", F);
72 // Update LoopInfo
73 Loop *OuterLoop = LI.getLoopFor(BeforeBB);
74 Loop *NewLoop = new Loop();
76 if (OuterLoop)
77 OuterLoop->addChildLoop(NewLoop);
78 else
79 LI.addTopLevelLoop(NewLoop);
81 if (OuterLoop) {
82 if (GuardBB)
83 OuterLoop->addBasicBlockToLoop(GuardBB, LI);
84 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
87 NewLoop->addBasicBlockToLoop(HeaderBB, LI);
89 // Notify the annotator (if present) that we have a new loop, but only
90 // after the header block is set.
91 if (Annotator)
92 Annotator->pushLoop(NewLoop, Parallel);
94 // ExitBB
95 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
96 ExitBB->setName("polly.loop_exit");
98 // BeforeBB
99 if (GuardBB) {
100 BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
101 DT.addNewBlock(GuardBB, BeforeBB);
103 // GuardBB
104 Builder.SetInsertPoint(GuardBB);
105 Value *LoopGuard;
106 LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
107 LoopGuard->setName("polly.loop_guard");
108 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
109 DT.addNewBlock(PreHeaderBB, GuardBB);
110 } else {
111 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
112 DT.addNewBlock(PreHeaderBB, BeforeBB);
115 // PreHeaderBB
116 Builder.SetInsertPoint(PreHeaderBB);
117 Builder.CreateBr(HeaderBB);
119 // HeaderBB
120 DT.addNewBlock(HeaderBB, PreHeaderBB);
121 Builder.SetInsertPoint(HeaderBB);
122 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
123 IV->addIncoming(LB, PreHeaderBB);
124 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
125 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
126 Value *LoopCondition;
127 UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
128 LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
129 LoopCondition->setName("polly.loop_cond");
131 // Create the loop latch and annotate it as such.
132 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
133 if (Annotator)
134 Annotator->annotateLoopLatch(B, NewLoop, Parallel);
136 IV->addIncoming(IncrementedIV, HeaderBB);
137 if (GuardBB)
138 DT.changeImmediateDominator(ExitBB, GuardBB);
139 else
140 DT.changeImmediateDominator(ExitBB, HeaderBB);
142 // The loop body should be added here.
143 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
144 return IV;
147 Value *ParallelLoopGenerator::createParallelLoop(
148 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
149 ValueMapT &Map, BasicBlock::iterator *LoopBody) {
150 Function *SubFn;
152 AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
153 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
154 Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
155 *LoopBody = Builder.GetInsertPoint();
156 Builder.SetInsertPoint(&*BeforeLoop);
158 Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
159 "polly.par.userContext");
161 // Add one as the upper bound provided by openmp is a < comparison
162 // whereas the codegenForSequential function creates a <= comparison.
163 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
165 // Tell the runtime we start a parallel loop
166 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
167 Builder.CreateCall(SubFn, SubFnParam);
168 createCallJoinThreads();
170 // Mark the end of the lifetime for the parameter struct.
171 Type *Ty = Struct->getType();
172 ConstantInt *SizeOf = Builder.getInt64(DL.getTypeAllocSize(Ty));
173 Builder.CreateLifetimeEnd(Struct, SizeOf);
175 return IV;
178 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
179 Value *SubFnParam, Value *LB,
180 Value *UB, Value *Stride) {
181 const std::string Name = "GOMP_parallel_loop_runtime_start";
183 Function *F = M->getFunction(Name);
185 // If F is not available, declare it.
186 if (!F) {
187 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
189 Type *Params[] = {PointerType::getUnqual(FunctionType::get(
190 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
191 Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongType,
192 LongType, LongType};
194 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
195 F = Function::Create(Ty, Linkage, Name, M);
198 Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
199 Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
201 Builder.CreateCall(F, Args);
204 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
205 Value *UBPtr) {
206 const std::string Name = "GOMP_loop_runtime_next";
208 Function *F = M->getFunction(Name);
210 // If F is not available, declare it.
211 if (!F) {
212 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
213 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
214 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
215 F = Function::Create(Ty, Linkage, Name, M);
218 Value *Args[] = {LBPtr, UBPtr};
219 Value *Return = Builder.CreateCall(F, Args);
220 Return = Builder.CreateICmpNE(
221 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
222 return Return;
225 void ParallelLoopGenerator::createCallJoinThreads() {
226 const std::string Name = "GOMP_parallel_end";
228 Function *F = M->getFunction(Name);
230 // If F is not available, declare it.
231 if (!F) {
232 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
234 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
235 F = Function::Create(Ty, Linkage, Name, M);
238 Builder.CreateCall(F, {});
241 void ParallelLoopGenerator::createCallCleanupThread() {
242 const std::string Name = "GOMP_loop_end_nowait";
244 Function *F = M->getFunction(Name);
246 // If F is not available, declare it.
247 if (!F) {
248 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
250 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
251 F = Function::Create(Ty, Linkage, Name, M);
254 Builder.CreateCall(F, {});
257 Function *ParallelLoopGenerator::createSubFnDefinition() {
258 Function *F = Builder.GetInsertBlock()->getParent();
259 std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
260 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
261 Function *SubFn = Function::Create(FT, Function::InternalLinkage,
262 F->getName() + "_polly_subfn", M);
264 // Certain backends (e.g., NVPTX) do not support '.'s in function names.
265 // Hence, we ensure that all '.'s are replaced by '_'s.
266 std::string FunctionName = SubFn->getName();
267 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
268 SubFn->setName(FunctionName);
270 // Do not run any polly pass on the new function.
271 SubFn->addFnAttr(PollySkipFnAttr);
273 Function::arg_iterator AI = SubFn->arg_begin();
274 AI->setName("polly.par.userContext");
276 return SubFn;
279 AllocaInst *
280 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
281 SmallVector<Type *, 8> Members;
283 for (Value *V : Values)
284 Members.push_back(V->getType());
286 // We do not want to allocate the alloca inside any loop, thus we allocate it
287 // in the entry block of the function and use annotations to denote the actual
288 // live span (similar to clang).
289 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
290 Instruction *IP = &*EntryBB.getFirstInsertionPt();
291 StructType *Ty = StructType::get(Builder.getContext(), Members);
292 AllocaInst *Struct = new AllocaInst(Ty, nullptr, "polly.par.userContext", IP);
294 // Mark the start of the lifetime for the parameter struct.
295 ConstantInt *SizeOf = Builder.getInt64(DL.getTypeAllocSize(Ty));
296 Builder.CreateLifetimeStart(Struct, SizeOf);
298 for (unsigned i = 0; i < Values.size(); i++) {
299 Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
300 Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
301 Builder.CreateStore(Values[i], Address);
304 return Struct;
307 void ParallelLoopGenerator::extractValuesFromStruct(
308 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
309 for (unsigned i = 0; i < OldValues.size(); i++) {
310 Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
311 Value *NewValue = Builder.CreateLoad(Address);
312 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
313 Map[OldValues[i]] = NewValue;
317 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData,
318 SetVector<Value *> Data,
319 ValueMapT &Map, Function **SubFnPtr) {
320 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
321 Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
322 Function *SubFn = createSubFnDefinition();
323 LLVMContext &Context = SubFn->getContext();
325 // Store the previous basic block.
326 PrevBB = Builder.GetInsertBlock();
328 // Create basic blocks.
329 HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
330 ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
331 CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
332 PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
334 DT.addNewBlock(HeaderBB, PrevBB);
335 DT.addNewBlock(ExitBB, HeaderBB);
336 DT.addNewBlock(CheckNextBB, HeaderBB);
337 DT.addNewBlock(PreHeaderBB, HeaderBB);
339 // Fill up basic block HeaderBB.
340 Builder.SetInsertPoint(HeaderBB);
341 LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
342 UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
343 UserContext = Builder.CreateBitCast(
344 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
346 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
347 Map);
348 Builder.CreateBr(CheckNextBB);
350 // Add code to check if another set of iterations will be executed.
351 Builder.SetInsertPoint(CheckNextBB);
352 Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
353 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
354 "polly.par.hasNextScheduleBlock");
355 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
357 // Add code to load the iv bounds for this set of iterations.
358 Builder.SetInsertPoint(PreHeaderBB);
359 LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
360 UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
362 // Subtract one as the upper bound provided by openmp is a < comparison
363 // whereas the codegenForSequential function creates a <= comparison.
364 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
365 "polly.par.UBAdjusted");
367 Builder.CreateBr(CheckNextBB);
368 Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
369 IV = createLoop(LB, UB, Stride, Builder, P, LI, DT, AfterBB,
370 ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
372 BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
374 // Add code to terminate this subfunction.
375 Builder.SetInsertPoint(ExitBB);
376 createCallCleanupThread();
377 Builder.CreateRetVoid();
379 Builder.SetInsertPoint(&*LoopBody);
380 *SubFnPtr = SubFn;
382 return IV;