1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains functions to create scalar and parallel loops as LLVM-IR.
12 //===----------------------------------------------------------------------===//
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/ScopDetection.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 using namespace polly
;
27 PollyNumThreads("polly-num-threads",
28 cl::desc("Number of threads to use (0 = auto)"), cl::Hidden
,
31 // We generate a loop of either of the following structures:
36 // GuardBB PreHeaderBB
38 // __ PreHeaderBB | v \/ |
39 // / \ / | HeaderBB latch
40 // latch HeaderBB | |\ |
46 // depending on whether or not we know that it is executed at least once. If
47 // not, GuardBB checks if the loop is executed at least once. If this is the
48 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
49 // contains the loop iv 'polly.indvar', the incremented loop iv
50 // 'polly.indvar_next' as well as the condition to check if we execute another
51 // iteration of the loop. After the loop has finished, we branch to ExitBB.
52 Value
*polly::createLoop(Value
*LB
, Value
*UB
, Value
*Stride
,
53 PollyIRBuilder
&Builder
, Pass
*P
, LoopInfo
&LI
,
54 DominatorTree
&DT
, BasicBlock
*&ExitBB
,
55 ICmpInst::Predicate Predicate
,
56 ScopAnnotator
*Annotator
, bool Parallel
,
58 Function
*F
= Builder
.GetInsertBlock()->getParent();
59 LLVMContext
&Context
= F
->getContext();
61 assert(LB
->getType() == UB
->getType() && "Types of loop bounds do not match");
62 IntegerType
*LoopIVType
= dyn_cast
<IntegerType
>(UB
->getType());
63 assert(LoopIVType
&& "UB is not integer?");
65 BasicBlock
*BeforeBB
= Builder
.GetInsertBlock();
67 UseGuard
? BasicBlock::Create(Context
, "polly.loop_if", F
) : nullptr;
68 BasicBlock
*HeaderBB
= BasicBlock::Create(Context
, "polly.loop_header", F
);
69 BasicBlock
*PreHeaderBB
=
70 BasicBlock::Create(Context
, "polly.loop_preheader", F
);
73 Loop
*OuterLoop
= LI
.getLoopFor(BeforeBB
);
74 Loop
*NewLoop
= new Loop();
77 OuterLoop
->addChildLoop(NewLoop
);
79 LI
.addTopLevelLoop(NewLoop
);
83 OuterLoop
->addBasicBlockToLoop(GuardBB
, LI
);
84 OuterLoop
->addBasicBlockToLoop(PreHeaderBB
, LI
);
87 NewLoop
->addBasicBlockToLoop(HeaderBB
, LI
);
89 // Notify the annotator (if present) that we have a new loop, but only
90 // after the header block is set.
92 Annotator
->pushLoop(NewLoop
, Parallel
);
95 ExitBB
= SplitBlock(BeforeBB
, &*Builder
.GetInsertPoint(), &DT
, &LI
);
96 ExitBB
->setName("polly.loop_exit");
100 BeforeBB
->getTerminator()->setSuccessor(0, GuardBB
);
101 DT
.addNewBlock(GuardBB
, BeforeBB
);
104 Builder
.SetInsertPoint(GuardBB
);
106 LoopGuard
= Builder
.CreateICmp(Predicate
, LB
, UB
);
107 LoopGuard
->setName("polly.loop_guard");
108 Builder
.CreateCondBr(LoopGuard
, PreHeaderBB
, ExitBB
);
109 DT
.addNewBlock(PreHeaderBB
, GuardBB
);
111 BeforeBB
->getTerminator()->setSuccessor(0, PreHeaderBB
);
112 DT
.addNewBlock(PreHeaderBB
, BeforeBB
);
116 Builder
.SetInsertPoint(PreHeaderBB
);
117 Builder
.CreateBr(HeaderBB
);
120 DT
.addNewBlock(HeaderBB
, PreHeaderBB
);
121 Builder
.SetInsertPoint(HeaderBB
);
122 PHINode
*IV
= Builder
.CreatePHI(LoopIVType
, 2, "polly.indvar");
123 IV
->addIncoming(LB
, PreHeaderBB
);
124 Stride
= Builder
.CreateZExtOrBitCast(Stride
, LoopIVType
);
125 Value
*IncrementedIV
= Builder
.CreateNSWAdd(IV
, Stride
, "polly.indvar_next");
126 Value
*LoopCondition
;
127 UB
= Builder
.CreateSub(UB
, Stride
, "polly.adjust_ub");
128 LoopCondition
= Builder
.CreateICmp(Predicate
, IV
, UB
);
129 LoopCondition
->setName("polly.loop_cond");
131 // Create the loop latch and annotate it as such.
132 BranchInst
*B
= Builder
.CreateCondBr(LoopCondition
, HeaderBB
, ExitBB
);
134 Annotator
->annotateLoopLatch(B
, NewLoop
, Parallel
);
136 IV
->addIncoming(IncrementedIV
, HeaderBB
);
138 DT
.changeImmediateDominator(ExitBB
, GuardBB
);
140 DT
.changeImmediateDominator(ExitBB
, HeaderBB
);
142 // The loop body should be added here.
143 Builder
.SetInsertPoint(HeaderBB
->getFirstNonPHI());
147 Value
*ParallelLoopGenerator::createParallelLoop(
148 Value
*LB
, Value
*UB
, Value
*Stride
, SetVector
<Value
*> &UsedValues
,
149 ValueMapT
&Map
, BasicBlock::iterator
*LoopBody
) {
152 AllocaInst
*Struct
= storeValuesIntoStruct(UsedValues
);
153 BasicBlock::iterator BeforeLoop
= Builder
.GetInsertPoint();
154 Value
*IV
= createSubFn(Stride
, Struct
, UsedValues
, Map
, &SubFn
);
155 *LoopBody
= Builder
.GetInsertPoint();
156 Builder
.SetInsertPoint(&*BeforeLoop
);
158 Value
*SubFnParam
= Builder
.CreateBitCast(Struct
, Builder
.getInt8PtrTy(),
159 "polly.par.userContext");
161 // Add one as the upper bound provided by openmp is a < comparison
162 // whereas the codegenForSequential function creates a <= comparison.
163 UB
= Builder
.CreateAdd(UB
, ConstantInt::get(LongType
, 1));
165 // Tell the runtime we start a parallel loop
166 createCallSpawnThreads(SubFn
, SubFnParam
, LB
, UB
, Stride
);
167 Builder
.CreateCall(SubFn
, SubFnParam
);
168 createCallJoinThreads();
170 // Mark the end of the lifetime for the parameter struct.
171 Type
*Ty
= Struct
->getType();
172 ConstantInt
*SizeOf
= Builder
.getInt64(DL
.getTypeAllocSize(Ty
));
173 Builder
.CreateLifetimeEnd(Struct
, SizeOf
);
178 void ParallelLoopGenerator::createCallSpawnThreads(Value
*SubFn
,
179 Value
*SubFnParam
, Value
*LB
,
180 Value
*UB
, Value
*Stride
) {
181 const std::string Name
= "GOMP_parallel_loop_runtime_start";
183 Function
*F
= M
->getFunction(Name
);
185 // If F is not available, declare it.
187 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
189 Type
*Params
[] = {PointerType::getUnqual(FunctionType::get(
190 Builder
.getVoidTy(), Builder
.getInt8PtrTy(), false)),
191 Builder
.getInt8PtrTy(), Builder
.getInt32Ty(), LongType
,
194 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), Params
, false);
195 F
= Function::Create(Ty
, Linkage
, Name
, M
);
198 Value
*NumberOfThreads
= Builder
.getInt32(PollyNumThreads
);
199 Value
*Args
[] = {SubFn
, SubFnParam
, NumberOfThreads
, LB
, UB
, Stride
};
201 Builder
.CreateCall(F
, Args
);
204 Value
*ParallelLoopGenerator::createCallGetWorkItem(Value
*LBPtr
,
206 const std::string Name
= "GOMP_loop_runtime_next";
208 Function
*F
= M
->getFunction(Name
);
210 // If F is not available, declare it.
212 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
213 Type
*Params
[] = {LongType
->getPointerTo(), LongType
->getPointerTo()};
214 FunctionType
*Ty
= FunctionType::get(Builder
.getInt8Ty(), Params
, false);
215 F
= Function::Create(Ty
, Linkage
, Name
, M
);
218 Value
*Args
[] = {LBPtr
, UBPtr
};
219 Value
*Return
= Builder
.CreateCall(F
, Args
);
220 Return
= Builder
.CreateICmpNE(
221 Return
, Builder
.CreateZExt(Builder
.getFalse(), Return
->getType()));
225 void ParallelLoopGenerator::createCallJoinThreads() {
226 const std::string Name
= "GOMP_parallel_end";
228 Function
*F
= M
->getFunction(Name
);
230 // If F is not available, declare it.
232 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
234 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
235 F
= Function::Create(Ty
, Linkage
, Name
, M
);
238 Builder
.CreateCall(F
, {});
241 void ParallelLoopGenerator::createCallCleanupThread() {
242 const std::string Name
= "GOMP_loop_end_nowait";
244 Function
*F
= M
->getFunction(Name
);
246 // If F is not available, declare it.
248 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
250 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), false);
251 F
= Function::Create(Ty
, Linkage
, Name
, M
);
254 Builder
.CreateCall(F
, {});
257 Function
*ParallelLoopGenerator::createSubFnDefinition() {
258 Function
*F
= Builder
.GetInsertBlock()->getParent();
259 std::vector
<Type
*> Arguments(1, Builder
.getInt8PtrTy());
260 FunctionType
*FT
= FunctionType::get(Builder
.getVoidTy(), Arguments
, false);
261 Function
*SubFn
= Function::Create(FT
, Function::InternalLinkage
,
262 F
->getName() + "_polly_subfn", M
);
264 // Certain backends (e.g., NVPTX) do not support '.'s in function names.
265 // Hence, we ensure that all '.'s are replaced by '_'s.
266 std::string FunctionName
= SubFn
->getName();
267 std::replace(FunctionName
.begin(), FunctionName
.end(), '.', '_');
268 SubFn
->setName(FunctionName
);
270 // Do not run any polly pass on the new function.
271 SubFn
->addFnAttr(PollySkipFnAttr
);
273 Function::arg_iterator AI
= SubFn
->arg_begin();
274 AI
->setName("polly.par.userContext");
280 ParallelLoopGenerator::storeValuesIntoStruct(SetVector
<Value
*> &Values
) {
281 SmallVector
<Type
*, 8> Members
;
283 for (Value
*V
: Values
)
284 Members
.push_back(V
->getType());
286 // We do not want to allocate the alloca inside any loop, thus we allocate it
287 // in the entry block of the function and use annotations to denote the actual
288 // live span (similar to clang).
289 BasicBlock
&EntryBB
= Builder
.GetInsertBlock()->getParent()->getEntryBlock();
290 Instruction
*IP
= &*EntryBB
.getFirstInsertionPt();
291 StructType
*Ty
= StructType::get(Builder
.getContext(), Members
);
292 AllocaInst
*Struct
= new AllocaInst(Ty
, nullptr, "polly.par.userContext", IP
);
294 // Mark the start of the lifetime for the parameter struct.
295 ConstantInt
*SizeOf
= Builder
.getInt64(DL
.getTypeAllocSize(Ty
));
296 Builder
.CreateLifetimeStart(Struct
, SizeOf
);
298 for (unsigned i
= 0; i
< Values
.size(); i
++) {
299 Value
*Address
= Builder
.CreateStructGEP(Ty
, Struct
, i
);
300 Address
->setName("polly.subfn.storeaddr." + Values
[i
]->getName());
301 Builder
.CreateStore(Values
[i
], Address
);
307 void ParallelLoopGenerator::extractValuesFromStruct(
308 SetVector
<Value
*> OldValues
, Type
*Ty
, Value
*Struct
, ValueMapT
&Map
) {
309 for (unsigned i
= 0; i
< OldValues
.size(); i
++) {
310 Value
*Address
= Builder
.CreateStructGEP(Ty
, Struct
, i
);
311 Value
*NewValue
= Builder
.CreateLoad(Address
);
312 NewValue
->setName("polly.subfunc.arg." + OldValues
[i
]->getName());
313 Map
[OldValues
[i
]] = NewValue
;
317 Value
*ParallelLoopGenerator::createSubFn(Value
*Stride
, AllocaInst
*StructData
,
318 SetVector
<Value
*> Data
,
319 ValueMapT
&Map
, Function
**SubFnPtr
) {
320 BasicBlock
*PrevBB
, *HeaderBB
, *ExitBB
, *CheckNextBB
, *PreHeaderBB
, *AfterBB
;
321 Value
*LBPtr
, *UBPtr
, *UserContext
, *Ret1
, *HasNextSchedule
, *LB
, *UB
, *IV
;
322 Function
*SubFn
= createSubFnDefinition();
323 LLVMContext
&Context
= SubFn
->getContext();
325 // Store the previous basic block.
326 PrevBB
= Builder
.GetInsertBlock();
328 // Create basic blocks.
329 HeaderBB
= BasicBlock::Create(Context
, "polly.par.setup", SubFn
);
330 ExitBB
= BasicBlock::Create(Context
, "polly.par.exit", SubFn
);
331 CheckNextBB
= BasicBlock::Create(Context
, "polly.par.checkNext", SubFn
);
332 PreHeaderBB
= BasicBlock::Create(Context
, "polly.par.loadIVBounds", SubFn
);
334 DT
.addNewBlock(HeaderBB
, PrevBB
);
335 DT
.addNewBlock(ExitBB
, HeaderBB
);
336 DT
.addNewBlock(CheckNextBB
, HeaderBB
);
337 DT
.addNewBlock(PreHeaderBB
, HeaderBB
);
339 // Fill up basic block HeaderBB.
340 Builder
.SetInsertPoint(HeaderBB
);
341 LBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.LBPtr");
342 UBPtr
= Builder
.CreateAlloca(LongType
, nullptr, "polly.par.UBPtr");
343 UserContext
= Builder
.CreateBitCast(
344 &*SubFn
->arg_begin(), StructData
->getType(), "polly.par.userContext");
346 extractValuesFromStruct(Data
, StructData
->getAllocatedType(), UserContext
,
348 Builder
.CreateBr(CheckNextBB
);
350 // Add code to check if another set of iterations will be executed.
351 Builder
.SetInsertPoint(CheckNextBB
);
352 Ret1
= createCallGetWorkItem(LBPtr
, UBPtr
);
353 HasNextSchedule
= Builder
.CreateTrunc(Ret1
, Builder
.getInt1Ty(),
354 "polly.par.hasNextScheduleBlock");
355 Builder
.CreateCondBr(HasNextSchedule
, PreHeaderBB
, ExitBB
);
357 // Add code to load the iv bounds for this set of iterations.
358 Builder
.SetInsertPoint(PreHeaderBB
);
359 LB
= Builder
.CreateLoad(LBPtr
, "polly.par.LB");
360 UB
= Builder
.CreateLoad(UBPtr
, "polly.par.UB");
362 // Subtract one as the upper bound provided by openmp is a < comparison
363 // whereas the codegenForSequential function creates a <= comparison.
364 UB
= Builder
.CreateSub(UB
, ConstantInt::get(LongType
, 1),
365 "polly.par.UBAdjusted");
367 Builder
.CreateBr(CheckNextBB
);
368 Builder
.SetInsertPoint(&*--Builder
.GetInsertPoint());
369 IV
= createLoop(LB
, UB
, Stride
, Builder
, P
, LI
, DT
, AfterBB
,
370 ICmpInst::ICMP_SLE
, nullptr, true, /* UseGuard */ false);
372 BasicBlock::iterator LoopBody
= Builder
.GetInsertPoint();
374 // Add code to terminate this subfunction.
375 Builder
.SetInsertPoint(ExitBB
);
376 createCallCleanupThread();
377 Builder
.CreateRetVoid();
379 Builder
.SetInsertPoint(&*LoopBody
);