1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
14 //===----------------------------------------------------------------------===//
16 #include "polly/CodeGen/BlockGenerators.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/RuntimeDebugBuilder.h"
20 #include "polly/Options.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/RegionInfo.h"
27 #include "llvm/Analysis/ScalarEvolution.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/Local.h"
34 #include "isl/ast_build.h"
39 using namespace polly
;
41 static cl::opt
<bool> Aligned("enable-polly-aligned",
42 cl::desc("Assumed aligned memory accesses."),
43 cl::Hidden
, cl::init(false), cl::ZeroOrMore
,
44 cl::cat(PollyCategory
));
46 static cl::opt
<bool> DebugPrinting(
47 "polly-codegen-add-debug-printing",
48 cl::desc("Add printf calls that show the values loaded/stored."),
49 cl::Hidden
, cl::init(false), cl::ZeroOrMore
, cl::cat(PollyCategory
));
51 BlockGenerator::BlockGenerator(
52 PollyIRBuilder
&B
, LoopInfo
&LI
, ScalarEvolution
&SE
, DominatorTree
&DT
,
53 ScalarAllocaMapTy
&ScalarMap
, ScalarAllocaMapTy
&PHIOpMap
,
54 EscapeUsersAllocaMapTy
&EscapeMap
, ValueMapT
&GlobalMap
,
55 IslExprBuilder
*ExprBuilder
, BasicBlock
*StartBlock
)
56 : Builder(B
), LI(LI
), SE(SE
), ExprBuilder(ExprBuilder
), DT(DT
),
57 EntryBB(nullptr), PHIOpMap(PHIOpMap
), ScalarMap(ScalarMap
),
58 EscapeMap(EscapeMap
), GlobalMap(GlobalMap
), StartBlock(StartBlock
) {}
60 Value
*BlockGenerator::trySynthesizeNewValue(ScopStmt
&Stmt
, Value
*Old
,
64 if (!SE
.isSCEVable(Old
->getType()))
67 const SCEV
*Scev
= SE
.getSCEVAtScope(Old
, L
);
71 if (isa
<SCEVCouldNotCompute
>(Scev
))
74 const SCEV
*NewScev
= SCEVLoopAddRecRewriter::rewrite(Scev
, LTS
, SE
);
76 VTV
.insert(BBMap
.begin(), BBMap
.end());
77 VTV
.insert(GlobalMap
.begin(), GlobalMap
.end());
79 Scop
&S
= *Stmt
.getParent();
80 const DataLayout
&DL
= S
.getFunction().getParent()->getDataLayout();
81 auto IP
= Builder
.GetInsertPoint();
83 assert(IP
!= Builder
.GetInsertBlock()->end() &&
84 "Only instructions can be insert points for SCEVExpander");
86 expandCodeFor(S
, SE
, DL
, "polly", NewScev
, Old
->getType(), &*IP
, &VTV
,
87 StartBlock
->getSinglePredecessor());
89 BBMap
[Old
] = Expanded
;
93 Value
*BlockGenerator::getNewValue(ScopStmt
&Stmt
, Value
*Old
, ValueMapT
&BBMap
,
94 LoopToScevMapT
<S
, Loop
*L
) const {
95 // Constants that do not reference any named value can always remain
96 // unchanged. Handle them early to avoid expensive map lookups. We do not take
97 // the fast-path for external constants which are referenced through globals
98 // as these may need to be rewritten when distributing code accross different
100 if (isa
<Constant
>(Old
) && !isa
<GlobalValue
>(Old
))
103 // Inline asm is like a constant to us.
104 if (isa
<InlineAsm
>(Old
))
107 if (Value
*New
= GlobalMap
.lookup(Old
)) {
108 if (Value
*NewRemapped
= GlobalMap
.lookup(New
))
110 if (Old
->getType()->getScalarSizeInBits() <
111 New
->getType()->getScalarSizeInBits())
112 New
= Builder
.CreateTruncOrBitCast(New
, Old
->getType());
117 if (Value
*New
= BBMap
.lookup(Old
))
120 if (Value
*New
= trySynthesizeNewValue(Stmt
, Old
, BBMap
, LTS
, L
))
123 // A scop-constant value defined by a global or a function parameter.
124 if (isa
<GlobalValue
>(Old
) || isa
<Argument
>(Old
))
127 // A scop-constant value defined by an instruction executed outside the scop.
128 if (const Instruction
*Inst
= dyn_cast
<Instruction
>(Old
))
129 if (!Stmt
.getParent()->contains(Inst
->getParent()))
132 // The scalar dependence is neither available nor SCEVCodegenable.
133 llvm_unreachable("Unexpected scalar dependence in region!");
137 void BlockGenerator::copyInstScalar(ScopStmt
&Stmt
, Instruction
*Inst
,
138 ValueMapT
&BBMap
, LoopToScevMapT
<S
) {
139 // We do not generate debug intrinsics as we did not investigate how to
140 // copy them correctly. At the current state, they just crash the code
141 // generation as the meta-data operands are not correctly copied.
142 if (isa
<DbgInfoIntrinsic
>(Inst
))
145 Instruction
*NewInst
= Inst
->clone();
147 // Replace old operands with the new ones.
148 for (Value
*OldOperand
: Inst
->operands()) {
150 getNewValue(Stmt
, OldOperand
, BBMap
, LTS
, getLoopForStmt(Stmt
));
153 assert(!isa
<StoreInst
>(NewInst
) &&
154 "Store instructions are always needed!");
159 NewInst
->replaceUsesOfWith(OldOperand
, NewOperand
);
162 Builder
.Insert(NewInst
);
163 BBMap
[Inst
] = NewInst
;
165 if (!NewInst
->getType()->isVoidTy())
166 NewInst
->setName("p_" + Inst
->getName());
170 BlockGenerator::generateLocationAccessed(ScopStmt
&Stmt
, MemAccInst Inst
,
171 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
172 isl_id_to_ast_expr
*NewAccesses
) {
173 const MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Inst
);
174 return generateLocationAccessed(
175 Stmt
, getLoopForStmt(Stmt
),
176 Inst
.isNull() ? nullptr : Inst
.getPointerOperand(), BBMap
, LTS
,
177 NewAccesses
, MA
.getId(), MA
.getAccessValue()->getType());
180 Value
*BlockGenerator::generateLocationAccessed(
181 ScopStmt
&Stmt
, Loop
*L
, Value
*Pointer
, ValueMapT
&BBMap
,
182 LoopToScevMapT
<S
, isl_id_to_ast_expr
*NewAccesses
, __isl_take isl_id
*Id
,
183 Type
*ExpectedType
) {
184 isl_ast_expr
*AccessExpr
= isl_id_to_ast_expr_get(NewAccesses
, Id
);
187 AccessExpr
= isl_ast_expr_address_of(AccessExpr
);
188 auto Address
= ExprBuilder
->create(AccessExpr
);
190 // Cast the address of this memory access to a pointer type that has the
191 // same element type as the original access, but uses the address space of
192 // the newly generated pointer.
193 auto OldPtrTy
= ExpectedType
->getPointerTo();
194 auto NewPtrTy
= Address
->getType();
195 OldPtrTy
= PointerType::get(OldPtrTy
->getElementType(),
196 NewPtrTy
->getPointerAddressSpace());
198 if (OldPtrTy
!= NewPtrTy
)
199 Address
= Builder
.CreateBitOrPointerCast(Address
, OldPtrTy
);
204 "If expression was not generated, must use the original pointer value");
205 return getNewValue(Stmt
, Pointer
, BBMap
, LTS
, L
);
209 BlockGenerator::getImplicitAddress(MemoryAccess
&Access
, Loop
*L
,
210 LoopToScevMapT
<S
, ValueMapT
&BBMap
,
211 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
212 if (Access
.isLatestArrayKind())
213 return generateLocationAccessed(*Access
.getStatement(), L
, nullptr, BBMap
,
214 LTS
, NewAccesses
, Access
.getId(),
215 Access
.getAccessValue()->getType());
217 if (Access
.isLatestValueKind() || Access
.isLatestExitPHIKind())
218 return getOrCreateScalarAlloca(Access
.getBaseAddr());
220 if (Access
.isLatestPHIKind())
221 return getOrCreatePHIAlloca(Access
.getBaseAddr());
223 llvm_unreachable("Unknown access type");
226 Loop
*BlockGenerator::getLoopForStmt(const ScopStmt
&Stmt
) const {
227 auto *StmtBB
= Stmt
.getEntryBlock();
228 return LI
.getLoopFor(StmtBB
);
231 Value
*BlockGenerator::generateArrayLoad(ScopStmt
&Stmt
, LoadInst
*Load
,
232 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
233 isl_id_to_ast_expr
*NewAccesses
) {
234 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
))
238 generateLocationAccessed(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
239 Value
*ScalarLoad
= Builder
.CreateAlignedLoad(
240 NewPointer
, Load
->getAlignment(), Load
->getName() + "_p_scalar_");
243 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Load from ", NewPointer
,
244 ": ", ScalarLoad
, "\n");
249 void BlockGenerator::generateArrayStore(ScopStmt
&Stmt
, StoreInst
*Store
,
250 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
251 isl_id_to_ast_expr
*NewAccesses
) {
253 generateLocationAccessed(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
254 Value
*ValueOperand
= getNewValue(Stmt
, Store
->getValueOperand(), BBMap
, LTS
,
255 getLoopForStmt(Stmt
));
258 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Store to ", NewPointer
,
259 ": ", ValueOperand
, "\n");
261 Builder
.CreateAlignedStore(ValueOperand
, NewPointer
, Store
->getAlignment());
264 bool BlockGenerator::canSyntheziseInStmt(ScopStmt
&Stmt
, Instruction
*Inst
) {
265 Loop
*L
= getLoopForStmt(Stmt
);
266 return (Stmt
.isBlockStmt() || !Stmt
.getRegion()->contains(L
)) &&
267 canSynthesize(Inst
, *Stmt
.getParent(), &SE
, L
);
270 void BlockGenerator::copyInstruction(ScopStmt
&Stmt
, Instruction
*Inst
,
271 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
272 isl_id_to_ast_expr
*NewAccesses
) {
273 // Terminator instructions control the control flow. They are explicitly
274 // expressed in the clast and do not need to be copied.
275 if (Inst
->isTerminator())
278 // Synthesizable statements will be generated on-demand.
279 if (canSyntheziseInStmt(Stmt
, Inst
))
282 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
283 Value
*NewLoad
= generateArrayLoad(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
284 // Compute NewLoad before its insertion in BBMap to make the insertion
286 BBMap
[Load
] = NewLoad
;
290 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
291 generateArrayStore(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
295 if (auto *PHI
= dyn_cast
<PHINode
>(Inst
)) {
296 copyPHIInstruction(Stmt
, PHI
, BBMap
, LTS
);
300 // Skip some special intrinsics for which we do not adjust the semantics to
301 // the new schedule. All others are handled like every other instruction.
302 if (isIgnoredIntrinsic(Inst
))
305 copyInstScalar(Stmt
, Inst
, BBMap
, LTS
);
308 void BlockGenerator::removeDeadInstructions(BasicBlock
*BB
, ValueMapT
&BBMap
) {
309 auto NewBB
= Builder
.GetInsertBlock();
310 for (auto I
= NewBB
->rbegin(); I
!= NewBB
->rend(); I
++) {
311 Instruction
*NewInst
= &*I
;
313 if (!isInstructionTriviallyDead(NewInst
))
316 for (auto Pair
: BBMap
)
317 if (Pair
.second
== NewInst
) {
318 BBMap
.erase(Pair
.first
);
321 NewInst
->eraseFromParent();
326 void BlockGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
327 isl_id_to_ast_expr
*NewAccesses
) {
328 assert(Stmt
.isBlockStmt() &&
329 "Only block statements can be copied by the block generator");
333 BasicBlock
*BB
= Stmt
.getBasicBlock();
334 copyBB(Stmt
, BB
, BBMap
, LTS
, NewAccesses
);
335 removeDeadInstructions(BB
, BBMap
);
338 BasicBlock
*BlockGenerator::splitBB(BasicBlock
*BB
) {
339 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
340 &*Builder
.GetInsertPoint(), &DT
, &LI
);
341 CopyBB
->setName("polly.stmt." + BB
->getName());
345 BasicBlock
*BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
,
346 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
347 isl_id_to_ast_expr
*NewAccesses
) {
348 BasicBlock
*CopyBB
= splitBB(BB
);
349 Builder
.SetInsertPoint(&CopyBB
->front());
350 generateScalarLoads(Stmt
, LTS
, BBMap
, NewAccesses
);
352 copyBB(Stmt
, BB
, CopyBB
, BBMap
, LTS
, NewAccesses
);
354 // After a basic block was copied store all scalars that escape this block in
356 generateScalarStores(Stmt
, LTS
, BBMap
, NewAccesses
);
360 void BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
, BasicBlock
*CopyBB
,
361 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
362 isl_id_to_ast_expr
*NewAccesses
) {
363 EntryBB
= &CopyBB
->getParent()->getEntryBlock();
365 for (Instruction
&Inst
: *BB
)
366 copyInstruction(Stmt
, &Inst
, BBMap
, LTS
, NewAccesses
);
369 Value
*BlockGenerator::getOrCreateAlloca(Value
*ScalarBase
,
370 ScalarAllocaMapTy
&Map
,
371 const char *NameExt
) {
372 // If no alloca was found create one and insert it in the entry block.
373 if (!Map
.count(ScalarBase
)) {
374 auto *Ty
= ScalarBase
->getType();
375 auto NewAddr
= new AllocaInst(Ty
, ScalarBase
->getName() + NameExt
);
376 EntryBB
= &Builder
.GetInsertBlock()->getParent()->getEntryBlock();
377 NewAddr
->insertBefore(&*EntryBB
->getFirstInsertionPt());
378 Map
[ScalarBase
] = NewAddr
;
381 auto Addr
= Map
[ScalarBase
];
383 if (auto NewAddr
= GlobalMap
.lookup(Addr
))
389 Value
*BlockGenerator::getOrCreateAlloca(const MemoryAccess
&Access
) {
390 assert(!Access
.isArrayKind() && "Trying to get alloca for array kind");
392 if (Access
.isPHIKind())
393 return getOrCreatePHIAlloca(Access
.getBaseAddr());
395 return getOrCreateScalarAlloca(Access
.getBaseAddr());
398 Value
*BlockGenerator::getOrCreateAlloca(const ScopArrayInfo
*Array
) {
399 assert(!Array
->isArrayKind() && "Trying to get alloca for array kind");
401 if (Array
->isPHIKind())
402 return getOrCreatePHIAlloca(Array
->getBasePtr());
404 return getOrCreateScalarAlloca(Array
->getBasePtr());
407 Value
*BlockGenerator::getOrCreateScalarAlloca(Value
*ScalarBase
) {
408 return getOrCreateAlloca(ScalarBase
, ScalarMap
, ".s2a");
411 Value
*BlockGenerator::getOrCreatePHIAlloca(Value
*ScalarBase
) {
412 return getOrCreateAlloca(ScalarBase
, PHIOpMap
, ".phiops");
415 void BlockGenerator::handleOutsideUsers(const Scop
&S
, Instruction
*Inst
) {
416 // If there are escape users we get the alloca for this instruction and put it
417 // in the EscapeMap for later finalization. Lastly, if the instruction was
418 // copied multiple times we already did this and can exit.
419 if (EscapeMap
.count(Inst
))
422 EscapeUserVectorTy EscapeUsers
;
423 for (User
*U
: Inst
->users()) {
425 // Non-instruction user will never escape.
426 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
433 EscapeUsers
.push_back(UI
);
436 // Exit if no escape uses were found.
437 if (EscapeUsers
.empty())
440 // Get or create an escape alloca for this instruction.
441 auto *ScalarAddr
= getOrCreateScalarAlloca(Inst
);
443 // Remember that this instruction has escape uses and the escape alloca.
444 EscapeMap
[Inst
] = std::make_pair(ScalarAddr
, std::move(EscapeUsers
));
447 void BlockGenerator::generateScalarLoads(
448 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
449 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
450 for (MemoryAccess
*MA
: Stmt
) {
451 if (MA
->isOriginalArrayKind() || MA
->isWrite())
455 auto *StmtDom
= Stmt
.getDomain();
456 auto *AccDom
= isl_map_domain(MA
->getAccessRelation());
457 assert(isl_set_is_subset(StmtDom
, AccDom
) &&
458 "Scalar must be loaded in all statement instances");
459 isl_set_free(StmtDom
);
460 isl_set_free(AccDom
);
464 getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
, BBMap
, NewAccesses
);
465 assert((!isa
<Instruction
>(Address
) ||
466 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
467 Builder
.GetInsertBlock())) &&
468 "Domination violation");
469 BBMap
[MA
->getBaseAddr()] =
470 Builder
.CreateLoad(Address
, Address
->getName() + ".reload");
474 void BlockGenerator::generateScalarStores(
475 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
476 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
477 Loop
*L
= LI
.getLoopFor(Stmt
.getBasicBlock());
479 assert(Stmt
.isBlockStmt() && "Region statements need to use the "
480 "generateScalarStores() function in the "
483 for (MemoryAccess
*MA
: Stmt
) {
484 if (MA
->isOriginalArrayKind() || MA
->isRead())
488 auto *StmtDom
= Stmt
.getDomain();
489 auto *AccDom
= isl_map_domain(MA
->getAccessRelation());
490 assert(isl_set_is_subset(StmtDom
, AccDom
) &&
491 "Scalar must be stored in all statement instances");
492 isl_set_free(StmtDom
);
493 isl_set_free(AccDom
);
496 Value
*Val
= MA
->getAccessValue();
497 if (MA
->isAnyPHIKind()) {
498 assert(MA
->getIncoming().size() >= 1 &&
499 "Block statements have exactly one exiting block, or multiple but "
500 "with same incoming block and value");
501 assert(std::all_of(MA
->getIncoming().begin(), MA
->getIncoming().end(),
502 [&](std::pair
<BasicBlock
*, Value
*> p
) -> bool {
503 return p
.first
== Stmt
.getBasicBlock();
505 "Incoming block must be statement's block");
506 Val
= MA
->getIncoming()[0].second
;
509 getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
, BBMap
, NewAccesses
);
511 Val
= getNewValue(Stmt
, Val
, BBMap
, LTS
, L
);
512 assert((!isa
<Instruction
>(Val
) ||
513 DT
.dominates(cast
<Instruction
>(Val
)->getParent(),
514 Builder
.GetInsertBlock())) &&
515 "Domination violation");
516 assert((!isa
<Instruction
>(Address
) ||
517 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
518 Builder
.GetInsertBlock())) &&
519 "Domination violation");
520 Builder
.CreateStore(Val
, Address
);
524 void BlockGenerator::createScalarInitialization(Scop
&S
) {
525 BasicBlock
*ExitBB
= S
.getExit();
526 BasicBlock
*PreEntryBB
= S
.getEnteringBlock();
528 Builder
.SetInsertPoint(&*StartBlock
->begin());
530 for (auto &Array
: S
.arrays()) {
531 if (Array
->getNumberOfDimensions() != 0)
533 if (Array
->isPHIKind()) {
534 // For PHI nodes, the only values we need to store are the ones that
535 // reach the PHI node from outside the region. In general there should
536 // only be one such incoming edge and this edge should enter through
538 auto PHI
= cast
<PHINode
>(Array
->getBasePtr());
540 for (auto BI
= PHI
->block_begin(), BE
= PHI
->block_end(); BI
!= BE
; BI
++)
541 if (!S
.contains(*BI
) && *BI
!= PreEntryBB
)
542 llvm_unreachable("Incoming edges from outside the scop should always "
543 "come from PreEntryBB");
545 int Idx
= PHI
->getBasicBlockIndex(PreEntryBB
);
549 Value
*ScalarValue
= PHI
->getIncomingValue(Idx
);
551 Builder
.CreateStore(ScalarValue
, getOrCreatePHIAlloca(PHI
));
555 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
557 if (Inst
&& S
.contains(Inst
))
560 // PHI nodes that are not marked as such in their SAI object are either exit
561 // PHI nodes we model as common scalars but without initialization, or
562 // incoming phi nodes that need to be initialized. Check if the first is the
563 // case for Inst and do not create and initialize memory if so.
564 if (auto *PHI
= dyn_cast_or_null
<PHINode
>(Inst
))
565 if (!S
.hasSingleExitEdge() && PHI
->getBasicBlockIndex(ExitBB
) >= 0)
568 Builder
.CreateStore(Array
->getBasePtr(),
569 getOrCreateScalarAlloca(Array
->getBasePtr()));
573 void BlockGenerator::createScalarFinalization(Scop
&S
) {
574 // The exit block of the __unoptimized__ region.
575 BasicBlock
*ExitBB
= S
.getExitingBlock();
576 // The merge block __just after__ the region and the optimized region.
577 BasicBlock
*MergeBB
= S
.getExit();
579 // The exit block of the __optimized__ region.
580 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
581 if (OptExitBB
== ExitBB
)
582 OptExitBB
= *(++pred_begin(MergeBB
));
584 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
585 for (const auto &EscapeMapping
: EscapeMap
) {
586 // Extract the escaping instruction and the escaping users as well as the
587 // alloca the instruction was demoted to.
588 Instruction
*EscapeInst
= EscapeMapping
.first
;
589 const auto &EscapeMappingValue
= EscapeMapping
.second
;
590 const EscapeUserVectorTy
&EscapeUsers
= EscapeMappingValue
.second
;
591 Value
*ScalarAddr
= EscapeMappingValue
.first
;
593 // Reload the demoted instruction in the optimized version of the SCoP.
594 Value
*EscapeInstReload
=
595 Builder
.CreateLoad(ScalarAddr
, EscapeInst
->getName() + ".final_reload");
597 Builder
.CreateBitOrPointerCast(EscapeInstReload
, EscapeInst
->getType());
599 // Create the merge PHI that merges the optimized and unoptimized version.
600 PHINode
*MergePHI
= PHINode::Create(EscapeInst
->getType(), 2,
601 EscapeInst
->getName() + ".merge");
602 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
604 // Add the respective values to the merge PHI.
605 MergePHI
->addIncoming(EscapeInstReload
, OptExitBB
);
606 MergePHI
->addIncoming(EscapeInst
, ExitBB
);
608 // The information of scalar evolution about the escaping instruction needs
609 // to be revoked so the new merged instruction will be used.
610 if (SE
.isSCEVable(EscapeInst
->getType()))
611 SE
.forgetValue(EscapeInst
);
613 // Replace all uses of the demoted instruction with the merge PHI.
614 for (Instruction
*EUser
: EscapeUsers
)
615 EUser
->replaceUsesOfWith(EscapeInst
, MergePHI
);
619 void BlockGenerator::findOutsideUsers(Scop
&S
) {
620 for (auto &Array
: S
.arrays()) {
622 if (Array
->getNumberOfDimensions() != 0)
625 if (Array
->isPHIKind())
628 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
633 // Scop invariant hoisting moves some of the base pointers out of the scop.
634 // We can ignore these, as the invariant load hoisting already registers the
635 // relevant outside users.
636 if (!S
.contains(Inst
))
639 handleOutsideUsers(S
, Inst
);
643 void BlockGenerator::createExitPHINodeMerges(Scop
&S
) {
644 if (S
.hasSingleExitEdge())
647 auto *ExitBB
= S
.getExitingBlock();
648 auto *MergeBB
= S
.getExit();
649 auto *AfterMergeBB
= MergeBB
->getSingleSuccessor();
650 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
651 if (OptExitBB
== ExitBB
)
652 OptExitBB
= *(++pred_begin(MergeBB
));
654 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
656 for (auto &SAI
: S
.arrays()) {
657 auto *Val
= SAI
->getBasePtr();
659 // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
660 // the original PHI's value or the reloaded incoming values from the
661 // generated code. An llvm::Value is merged between the original code's
662 // value or the generated one.
663 if (!SAI
->isExitPHIKind())
666 PHINode
*PHI
= dyn_cast
<PHINode
>(Val
);
670 if (PHI
->getParent() != AfterMergeBB
)
673 std::string Name
= PHI
->getName();
674 Value
*ScalarAddr
= getOrCreateScalarAlloca(PHI
);
675 Value
*Reload
= Builder
.CreateLoad(ScalarAddr
, Name
+ ".ph.final_reload");
676 Reload
= Builder
.CreateBitOrPointerCast(Reload
, PHI
->getType());
677 Value
*OriginalValue
= PHI
->getIncomingValueForBlock(MergeBB
);
678 assert((!isa
<Instruction
>(OriginalValue
) ||
679 cast
<Instruction
>(OriginalValue
)->getParent() != MergeBB
) &&
680 "Original value must no be one we just generated.");
681 auto *MergePHI
= PHINode::Create(PHI
->getType(), 2, Name
+ ".ph.merge");
682 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
683 MergePHI
->addIncoming(Reload
, OptExitBB
);
684 MergePHI
->addIncoming(OriginalValue
, ExitBB
);
685 int Idx
= PHI
->getBasicBlockIndex(MergeBB
);
686 PHI
->setIncomingValue(Idx
, MergePHI
);
690 void BlockGenerator::invalidateScalarEvolution(Scop
&S
) {
692 if (Stmt
.isCopyStmt())
694 else if (Stmt
.isBlockStmt())
695 for (auto &Inst
: *Stmt
.getBasicBlock())
696 SE
.forgetValue(&Inst
);
697 else if (Stmt
.isRegionStmt())
698 for (auto *BB
: Stmt
.getRegion()->blocks())
699 for (auto &Inst
: *BB
)
700 SE
.forgetValue(&Inst
);
702 llvm_unreachable("Unexpected statement type found");
705 void BlockGenerator::finalizeSCoP(Scop
&S
) {
707 createScalarInitialization(S
);
708 createExitPHINodeMerges(S
);
709 createScalarFinalization(S
);
710 invalidateScalarEvolution(S
);
713 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator
&BlockGen
,
714 std::vector
<LoopToScevMapT
> &VLTS
,
716 : BlockGenerator(BlockGen
), VLTS(VLTS
), Schedule(Schedule
) {
717 assert(Schedule
&& "No statement domain provided");
720 Value
*VectorBlockGenerator::getVectorValue(ScopStmt
&Stmt
, Value
*Old
,
721 ValueMapT
&VectorMap
,
722 VectorValueMapT
&ScalarMaps
,
724 if (Value
*NewValue
= VectorMap
.lookup(Old
))
727 int Width
= getVectorWidth();
729 Value
*Vector
= UndefValue::get(VectorType::get(Old
->getType(), Width
));
731 for (int Lane
= 0; Lane
< Width
; Lane
++)
732 Vector
= Builder
.CreateInsertElement(
733 Vector
, getNewValue(Stmt
, Old
, ScalarMaps
[Lane
], VLTS
[Lane
], L
),
734 Builder
.getInt32(Lane
));
736 VectorMap
[Old
] = Vector
;
741 Type
*VectorBlockGenerator::getVectorPtrTy(const Value
*Val
, int Width
) {
742 PointerType
*PointerTy
= dyn_cast
<PointerType
>(Val
->getType());
743 assert(PointerTy
&& "PointerType expected");
745 Type
*ScalarType
= PointerTy
->getElementType();
746 VectorType
*VectorType
= VectorType::get(ScalarType
, Width
);
748 return PointerType::getUnqual(VectorType
);
751 Value
*VectorBlockGenerator::generateStrideOneLoad(
752 ScopStmt
&Stmt
, LoadInst
*Load
, VectorValueMapT
&ScalarMaps
,
753 __isl_keep isl_id_to_ast_expr
*NewAccesses
, bool NegativeStride
= false) {
754 unsigned VectorWidth
= getVectorWidth();
755 auto *Pointer
= Load
->getPointerOperand();
756 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, VectorWidth
);
757 unsigned Offset
= NegativeStride
? VectorWidth
- 1 : 0;
759 Value
*NewPointer
= generateLocationAccessed(Stmt
, Load
, ScalarMaps
[Offset
],
760 VLTS
[Offset
], NewAccesses
);
762 Builder
.CreateBitCast(NewPointer
, VectorPtrType
, "vector_ptr");
764 Builder
.CreateLoad(VectorPtr
, Load
->getName() + "_p_vec_full");
766 VecLoad
->setAlignment(8);
768 if (NegativeStride
) {
769 SmallVector
<Constant
*, 16> Indices
;
770 for (int i
= VectorWidth
- 1; i
>= 0; i
--)
771 Indices
.push_back(ConstantInt::get(Builder
.getInt32Ty(), i
));
772 Constant
*SV
= llvm::ConstantVector::get(Indices
);
773 Value
*RevVecLoad
= Builder
.CreateShuffleVector(
774 VecLoad
, VecLoad
, SV
, Load
->getName() + "_reverse");
781 Value
*VectorBlockGenerator::generateStrideZeroLoad(
782 ScopStmt
&Stmt
, LoadInst
*Load
, ValueMapT
&BBMap
,
783 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
784 auto *Pointer
= Load
->getPointerOperand();
785 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, 1);
787 generateLocationAccessed(Stmt
, Load
, BBMap
, VLTS
[0], NewAccesses
);
788 Value
*VectorPtr
= Builder
.CreateBitCast(NewPointer
, VectorPtrType
,
789 Load
->getName() + "_p_vec_p");
790 LoadInst
*ScalarLoad
=
791 Builder
.CreateLoad(VectorPtr
, Load
->getName() + "_p_splat_one");
794 ScalarLoad
->setAlignment(8);
796 Constant
*SplatVector
= Constant::getNullValue(
797 VectorType::get(Builder
.getInt32Ty(), getVectorWidth()));
799 Value
*VectorLoad
= Builder
.CreateShuffleVector(
800 ScalarLoad
, ScalarLoad
, SplatVector
, Load
->getName() + "_p_splat");
804 Value
*VectorBlockGenerator::generateUnknownStrideLoad(
805 ScopStmt
&Stmt
, LoadInst
*Load
, VectorValueMapT
&ScalarMaps
,
806 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
807 int VectorWidth
= getVectorWidth();
808 auto *Pointer
= Load
->getPointerOperand();
809 VectorType
*VectorType
= VectorType::get(
810 dyn_cast
<PointerType
>(Pointer
->getType())->getElementType(), VectorWidth
);
812 Value
*Vector
= UndefValue::get(VectorType
);
814 for (int i
= 0; i
< VectorWidth
; i
++) {
815 Value
*NewPointer
= generateLocationAccessed(Stmt
, Load
, ScalarMaps
[i
],
816 VLTS
[i
], NewAccesses
);
818 Builder
.CreateLoad(NewPointer
, Load
->getName() + "_p_scalar_");
819 Vector
= Builder
.CreateInsertElement(
820 Vector
, ScalarLoad
, Builder
.getInt32(i
), Load
->getName() + "_p_vec_");
826 void VectorBlockGenerator::generateLoad(
827 ScopStmt
&Stmt
, LoadInst
*Load
, ValueMapT
&VectorMap
,
828 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
829 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
)) {
830 VectorMap
[Load
] = Builder
.CreateVectorSplat(getVectorWidth(), PreloadLoad
,
831 Load
->getName() + "_p");
835 if (!VectorType::isValidElementType(Load
->getType())) {
836 for (int i
= 0; i
< getVectorWidth(); i
++)
837 ScalarMaps
[i
][Load
] =
838 generateArrayLoad(Stmt
, Load
, ScalarMaps
[i
], VLTS
[i
], NewAccesses
);
842 const MemoryAccess
&Access
= Stmt
.getArrayAccessFor(Load
);
844 // Make sure we have scalar values available to access the pointer to
845 // the data location.
846 extractScalarValues(Load
, VectorMap
, ScalarMaps
);
849 if (Access
.isStrideZero(isl_map_copy(Schedule
)))
850 NewLoad
= generateStrideZeroLoad(Stmt
, Load
, ScalarMaps
[0], NewAccesses
);
851 else if (Access
.isStrideOne(isl_map_copy(Schedule
)))
852 NewLoad
= generateStrideOneLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
);
853 else if (Access
.isStrideX(isl_map_copy(Schedule
), -1))
854 NewLoad
= generateStrideOneLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
, true);
856 NewLoad
= generateUnknownStrideLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
);
858 VectorMap
[Load
] = NewLoad
;
861 void VectorBlockGenerator::copyUnaryInst(ScopStmt
&Stmt
, UnaryInstruction
*Inst
,
862 ValueMapT
&VectorMap
,
863 VectorValueMapT
&ScalarMaps
) {
864 int VectorWidth
= getVectorWidth();
865 Value
*NewOperand
= getVectorValue(Stmt
, Inst
->getOperand(0), VectorMap
,
866 ScalarMaps
, getLoopForStmt(Stmt
));
868 assert(isa
<CastInst
>(Inst
) && "Can not generate vector code for instruction");
870 const CastInst
*Cast
= dyn_cast
<CastInst
>(Inst
);
871 VectorType
*DestType
= VectorType::get(Inst
->getType(), VectorWidth
);
872 VectorMap
[Inst
] = Builder
.CreateCast(Cast
->getOpcode(), NewOperand
, DestType
);
875 void VectorBlockGenerator::copyBinaryInst(ScopStmt
&Stmt
, BinaryOperator
*Inst
,
876 ValueMapT
&VectorMap
,
877 VectorValueMapT
&ScalarMaps
) {
878 Loop
*L
= getLoopForStmt(Stmt
);
879 Value
*OpZero
= Inst
->getOperand(0);
880 Value
*OpOne
= Inst
->getOperand(1);
882 Value
*NewOpZero
, *NewOpOne
;
883 NewOpZero
= getVectorValue(Stmt
, OpZero
, VectorMap
, ScalarMaps
, L
);
884 NewOpOne
= getVectorValue(Stmt
, OpOne
, VectorMap
, ScalarMaps
, L
);
886 Value
*NewInst
= Builder
.CreateBinOp(Inst
->getOpcode(), NewOpZero
, NewOpOne
,
887 Inst
->getName() + "p_vec");
888 VectorMap
[Inst
] = NewInst
;
891 void VectorBlockGenerator::copyStore(
892 ScopStmt
&Stmt
, StoreInst
*Store
, ValueMapT
&VectorMap
,
893 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
894 const MemoryAccess
&Access
= Stmt
.getArrayAccessFor(Store
);
896 auto *Pointer
= Store
->getPointerOperand();
897 Value
*Vector
= getVectorValue(Stmt
, Store
->getValueOperand(), VectorMap
,
898 ScalarMaps
, getLoopForStmt(Stmt
));
900 // Make sure we have scalar values available to access the pointer to
901 // the data location.
902 extractScalarValues(Store
, VectorMap
, ScalarMaps
);
904 if (Access
.isStrideOne(isl_map_copy(Schedule
))) {
905 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, getVectorWidth());
906 Value
*NewPointer
= generateLocationAccessed(Stmt
, Store
, ScalarMaps
[0],
907 VLTS
[0], NewAccesses
);
910 Builder
.CreateBitCast(NewPointer
, VectorPtrType
, "vector_ptr");
911 StoreInst
*Store
= Builder
.CreateStore(Vector
, VectorPtr
);
914 Store
->setAlignment(8);
916 for (unsigned i
= 0; i
< ScalarMaps
.size(); i
++) {
917 Value
*Scalar
= Builder
.CreateExtractElement(Vector
, Builder
.getInt32(i
));
918 Value
*NewPointer
= generateLocationAccessed(Stmt
, Store
, ScalarMaps
[i
],
919 VLTS
[i
], NewAccesses
);
920 Builder
.CreateStore(Scalar
, NewPointer
);
925 bool VectorBlockGenerator::hasVectorOperands(const Instruction
*Inst
,
926 ValueMapT
&VectorMap
) {
927 for (Value
*Operand
: Inst
->operands())
928 if (VectorMap
.count(Operand
))
933 bool VectorBlockGenerator::extractScalarValues(const Instruction
*Inst
,
934 ValueMapT
&VectorMap
,
935 VectorValueMapT
&ScalarMaps
) {
936 bool HasVectorOperand
= false;
937 int VectorWidth
= getVectorWidth();
939 for (Value
*Operand
: Inst
->operands()) {
940 ValueMapT::iterator VecOp
= VectorMap
.find(Operand
);
942 if (VecOp
== VectorMap
.end())
945 HasVectorOperand
= true;
946 Value
*NewVector
= VecOp
->second
;
948 for (int i
= 0; i
< VectorWidth
; ++i
) {
949 ValueMapT
&SM
= ScalarMaps
[i
];
951 // If there is one scalar extracted, all scalar elements should have
952 // already been extracted by the code here. So no need to check for the
953 // existence of all of them.
954 if (SM
.count(Operand
))
958 Builder
.CreateExtractElement(NewVector
, Builder
.getInt32(i
));
962 return HasVectorOperand
;
965 void VectorBlockGenerator::copyInstScalarized(
966 ScopStmt
&Stmt
, Instruction
*Inst
, ValueMapT
&VectorMap
,
967 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
968 bool HasVectorOperand
;
969 int VectorWidth
= getVectorWidth();
971 HasVectorOperand
= extractScalarValues(Inst
, VectorMap
, ScalarMaps
);
973 for (int VectorLane
= 0; VectorLane
< getVectorWidth(); VectorLane
++)
974 BlockGenerator::copyInstruction(Stmt
, Inst
, ScalarMaps
[VectorLane
],
975 VLTS
[VectorLane
], NewAccesses
);
977 if (!VectorType::isValidElementType(Inst
->getType()) || !HasVectorOperand
)
980 // Make the result available as vector value.
981 VectorType
*VectorType
= VectorType::get(Inst
->getType(), VectorWidth
);
982 Value
*Vector
= UndefValue::get(VectorType
);
984 for (int i
= 0; i
< VectorWidth
; i
++)
985 Vector
= Builder
.CreateInsertElement(Vector
, ScalarMaps
[i
][Inst
],
986 Builder
.getInt32(i
));
988 VectorMap
[Inst
] = Vector
;
991 int VectorBlockGenerator::getVectorWidth() { return VLTS
.size(); }
993 void VectorBlockGenerator::copyInstruction(
994 ScopStmt
&Stmt
, Instruction
*Inst
, ValueMapT
&VectorMap
,
995 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
996 // Terminator instructions control the control flow. They are explicitly
997 // expressed in the clast and do not need to be copied.
998 if (Inst
->isTerminator())
1001 if (canSyntheziseInStmt(Stmt
, Inst
))
1004 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
1005 generateLoad(Stmt
, Load
, VectorMap
, ScalarMaps
, NewAccesses
);
1009 if (hasVectorOperands(Inst
, VectorMap
)) {
1010 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
1011 copyStore(Stmt
, Store
, VectorMap
, ScalarMaps
, NewAccesses
);
1015 if (auto *Unary
= dyn_cast
<UnaryInstruction
>(Inst
)) {
1016 copyUnaryInst(Stmt
, Unary
, VectorMap
, ScalarMaps
);
1020 if (auto *Binary
= dyn_cast
<BinaryOperator
>(Inst
)) {
1021 copyBinaryInst(Stmt
, Binary
, VectorMap
, ScalarMaps
);
1025 // Falltrough: We generate scalar instructions, if we don't know how to
1026 // generate vector code.
1029 copyInstScalarized(Stmt
, Inst
, VectorMap
, ScalarMaps
, NewAccesses
);
1032 void VectorBlockGenerator::generateScalarVectorLoads(
1033 ScopStmt
&Stmt
, ValueMapT
&VectorBlockMap
) {
1034 for (MemoryAccess
*MA
: Stmt
) {
1035 if (MA
->isArrayKind() || MA
->isWrite())
1038 auto *Address
= getOrCreateAlloca(*MA
);
1039 Type
*VectorPtrType
= getVectorPtrTy(Address
, 1);
1040 Value
*VectorPtr
= Builder
.CreateBitCast(Address
, VectorPtrType
,
1041 Address
->getName() + "_p_vec_p");
1042 auto *Val
= Builder
.CreateLoad(VectorPtr
, Address
->getName() + ".reload");
1043 Constant
*SplatVector
= Constant::getNullValue(
1044 VectorType::get(Builder
.getInt32Ty(), getVectorWidth()));
1046 Value
*VectorVal
= Builder
.CreateShuffleVector(
1047 Val
, Val
, SplatVector
, Address
->getName() + "_p_splat");
1048 VectorBlockMap
[MA
->getBaseAddr()] = VectorVal
;
1052 void VectorBlockGenerator::verifyNoScalarStores(ScopStmt
&Stmt
) {
1053 for (MemoryAccess
*MA
: Stmt
) {
1054 if (MA
->isArrayKind() || MA
->isRead())
1057 llvm_unreachable("Scalar stores not expected in vector loop");
1061 void VectorBlockGenerator::copyStmt(
1062 ScopStmt
&Stmt
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1063 assert(Stmt
.isBlockStmt() && "TODO: Only block statements can be copied by "
1064 "the vector block generator");
1066 BasicBlock
*BB
= Stmt
.getBasicBlock();
1067 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
1068 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1069 CopyBB
->setName("polly.stmt." + BB
->getName());
1070 Builder
.SetInsertPoint(&CopyBB
->front());
1072 // Create two maps that store the mapping from the original instructions of
1073 // the old basic block to their copies in the new basic block. Those maps
1074 // are basic block local.
1076 // As vector code generation is supported there is one map for scalar values
1077 // and one for vector values.
1079 // In case we just do scalar code generation, the vectorMap is not used and
1080 // the scalarMap has just one dimension, which contains the mapping.
1082 // In case vector code generation is done, an instruction may either appear
1083 // in the vector map once (as it is calculating >vectorwidth< values at a
1084 // time. Or (if the values are calculated using scalar operations), it
1085 // appears once in every dimension of the scalarMap.
1086 VectorValueMapT
ScalarBlockMap(getVectorWidth());
1087 ValueMapT VectorBlockMap
;
1089 generateScalarVectorLoads(Stmt
, VectorBlockMap
);
1091 for (Instruction
&Inst
: *BB
)
1092 copyInstruction(Stmt
, &Inst
, VectorBlockMap
, ScalarBlockMap
, NewAccesses
);
1094 verifyNoScalarStores(Stmt
);
1097 BasicBlock
*RegionGenerator::repairDominance(BasicBlock
*BB
,
1098 BasicBlock
*BBCopy
) {
1100 BasicBlock
*BBIDom
= DT
.getNode(BB
)->getIDom()->getBlock();
1101 BasicBlock
*BBCopyIDom
= BlockMap
.lookup(BBIDom
);
1104 DT
.changeImmediateDominator(BBCopy
, BBCopyIDom
);
1109 // This is to determine whether an llvm::Value (defined in @p BB) is usable when
1110 // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
1111 // does not work in cases where the exit block has edges from outside the
1112 // region. In that case the llvm::Value would never be usable in in the exit
1113 // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
1114 // for the subregion's exiting edges only. We need to determine whether an
1115 // llvm::Value is usable in there. We do this by checking whether it dominates
1116 // all exiting blocks individually.
1117 static bool isDominatingSubregionExit(const DominatorTree
&DT
, Region
*R
,
1119 for (auto ExitingBB
: predecessors(R
->getExit())) {
1120 // Check for non-subregion incoming edges.
1121 if (!R
->contains(ExitingBB
))
1124 if (!DT
.dominates(BB
, ExitingBB
))
1131 // Find the direct dominator of the subregion's exit block if the subregion was
1133 static BasicBlock
*findExitDominator(DominatorTree
&DT
, Region
*R
) {
1134 BasicBlock
*Common
= nullptr;
1135 for (auto ExitingBB
: predecessors(R
->getExit())) {
1136 // Check for non-subregion incoming edges.
1137 if (!R
->contains(ExitingBB
))
1140 // First exiting edge.
1146 Common
= DT
.findNearestCommonDominator(Common
, ExitingBB
);
1149 assert(Common
&& R
->contains(Common
));
1153 void RegionGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
1154 isl_id_to_ast_expr
*IdToAstExp
) {
1155 assert(Stmt
.isRegionStmt() &&
1156 "Only region statements can be copied by the region generator");
1158 // Forget all old mappings.
1161 IncompletePHINodeMap
.clear();
1163 // Collection of all values related to this subregion.
1166 // The region represented by the statement.
1167 Region
*R
= Stmt
.getRegion();
1169 // Create a dedicated entry for the region where we can reload all demoted
1171 BasicBlock
*EntryBB
= R
->getEntry();
1172 BasicBlock
*EntryBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1173 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1174 EntryBBCopy
->setName("polly.stmt." + EntryBB
->getName() + ".entry");
1175 Builder
.SetInsertPoint(&EntryBBCopy
->front());
1177 ValueMapT
&EntryBBMap
= RegionMaps
[EntryBBCopy
];
1178 generateScalarLoads(Stmt
, LTS
, EntryBBMap
, IdToAstExp
);
1180 for (auto PI
= pred_begin(EntryBB
), PE
= pred_end(EntryBB
); PI
!= PE
; ++PI
)
1181 if (!R
->contains(*PI
))
1182 BlockMap
[*PI
] = EntryBBCopy
;
1184 // Iterate over all blocks in the region in a breadth-first search.
1185 std::deque
<BasicBlock
*> Blocks
;
1186 SmallSetVector
<BasicBlock
*, 8> SeenBlocks
;
1187 Blocks
.push_back(EntryBB
);
1188 SeenBlocks
.insert(EntryBB
);
1190 while (!Blocks
.empty()) {
1191 BasicBlock
*BB
= Blocks
.front();
1194 // First split the block and update dominance information.
1195 BasicBlock
*BBCopy
= splitBB(BB
);
1196 BasicBlock
*BBCopyIDom
= repairDominance(BB
, BBCopy
);
1198 // Get the mapping for this block and initialize it with either the scalar
1199 // loads from the generated entering block (which dominates all blocks of
1200 // this subregion) or the maps of the immediate dominator, if part of the
1201 // subregion. The latter necessarily includes the former.
1202 ValueMapT
*InitBBMap
;
1204 assert(RegionMaps
.count(BBCopyIDom
));
1205 InitBBMap
= &RegionMaps
[BBCopyIDom
];
1207 InitBBMap
= &EntryBBMap
;
1208 auto Inserted
= RegionMaps
.insert(std::make_pair(BBCopy
, *InitBBMap
));
1209 ValueMapT
&RegionMap
= Inserted
.first
->second
;
1211 // Copy the block with the BlockGenerator.
1212 Builder
.SetInsertPoint(&BBCopy
->front());
1213 copyBB(Stmt
, BB
, BBCopy
, RegionMap
, LTS
, IdToAstExp
);
1215 // In order to remap PHI nodes we store also basic block mappings.
1216 BlockMap
[BB
] = BBCopy
;
1218 // Add values to incomplete PHI nodes waiting for this block to be copied.
1219 for (const PHINodePairTy
&PHINodePair
: IncompletePHINodeMap
[BB
])
1220 addOperandToPHI(Stmt
, PHINodePair
.first
, PHINodePair
.second
, BB
, LTS
);
1221 IncompletePHINodeMap
[BB
].clear();
1223 // And continue with new successors inside the region.
1224 for (auto SI
= succ_begin(BB
), SE
= succ_end(BB
); SI
!= SE
; SI
++)
1225 if (R
->contains(*SI
) && SeenBlocks
.insert(*SI
))
1226 Blocks
.push_back(*SI
);
1228 // Remember value in case it is visible after this subregion.
1229 if (isDominatingSubregionExit(DT
, R
, BB
))
1230 ValueMap
.insert(RegionMap
.begin(), RegionMap
.end());
1233 // Now create a new dedicated region exit block and add it to the region map.
1234 BasicBlock
*ExitBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1235 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1236 ExitBBCopy
->setName("polly.stmt." + R
->getExit()->getName() + ".exit");
1237 BlockMap
[R
->getExit()] = ExitBBCopy
;
1239 BasicBlock
*ExitDomBBCopy
= BlockMap
.lookup(findExitDominator(DT
, R
));
1240 assert(ExitDomBBCopy
&& "Common exit dominator must be within region; at "
1241 "least the entry node must match");
1242 DT
.changeImmediateDominator(ExitBBCopy
, ExitDomBBCopy
);
1244 // As the block generator doesn't handle control flow we need to add the
1245 // region control flow by hand after all blocks have been copied.
1246 for (BasicBlock
*BB
: SeenBlocks
) {
1248 BasicBlock
*BBCopy
= BlockMap
[BB
];
1249 TerminatorInst
*TI
= BB
->getTerminator();
1250 if (isa
<UnreachableInst
>(TI
)) {
1251 while (!BBCopy
->empty())
1252 BBCopy
->begin()->eraseFromParent();
1253 new UnreachableInst(BBCopy
->getContext(), BBCopy
);
1257 Instruction
*BICopy
= BBCopy
->getTerminator();
1259 ValueMapT
&RegionMap
= RegionMaps
[BBCopy
];
1260 RegionMap
.insert(BlockMap
.begin(), BlockMap
.end());
1262 Builder
.SetInsertPoint(BICopy
);
1263 copyInstScalar(Stmt
, TI
, RegionMap
, LTS
);
1264 BICopy
->eraseFromParent();
1267 // Add counting PHI nodes to all loops in the region that can be used as
1268 // replacement for SCEVs refering to the old loop.
1269 for (BasicBlock
*BB
: SeenBlocks
) {
1270 Loop
*L
= LI
.getLoopFor(BB
);
1271 if (L
== nullptr || L
->getHeader() != BB
|| !R
->contains(L
))
1274 BasicBlock
*BBCopy
= BlockMap
[BB
];
1275 Value
*NullVal
= Builder
.getInt32(0);
1277 PHINode::Create(Builder
.getInt32Ty(), 2, "polly.subregion.iv");
1278 Instruction
*LoopPHIInc
= BinaryOperator::CreateAdd(
1279 LoopPHI
, Builder
.getInt32(1), "polly.subregion.iv.inc");
1280 LoopPHI
->insertBefore(&BBCopy
->front());
1281 LoopPHIInc
->insertBefore(BBCopy
->getTerminator());
1283 for (auto *PredBB
: make_range(pred_begin(BB
), pred_end(BB
))) {
1284 if (!R
->contains(PredBB
))
1286 if (L
->contains(PredBB
))
1287 LoopPHI
->addIncoming(LoopPHIInc
, BlockMap
[PredBB
]);
1289 LoopPHI
->addIncoming(NullVal
, BlockMap
[PredBB
]);
1292 for (auto *PredBBCopy
: make_range(pred_begin(BBCopy
), pred_end(BBCopy
)))
1293 if (LoopPHI
->getBasicBlockIndex(PredBBCopy
) < 0)
1294 LoopPHI
->addIncoming(NullVal
, PredBBCopy
);
1296 LTS
[L
] = SE
.getUnknown(LoopPHI
);
1299 // Continue generating code in the exit block.
1300 Builder
.SetInsertPoint(&*ExitBBCopy
->getFirstInsertionPt());
1302 // Write values visible to other statements.
1303 generateScalarStores(Stmt
, LTS
, ValueMap
, IdToAstExp
);
1306 IncompletePHINodeMap
.clear();
1309 PHINode
*RegionGenerator::buildExitPHI(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1310 ValueMapT
&BBMap
, Loop
*L
) {
1311 ScopStmt
*Stmt
= MA
->getStatement();
1312 Region
*SubR
= Stmt
->getRegion();
1313 auto Incoming
= MA
->getIncoming();
1315 PollyIRBuilder::InsertPointGuard
IPGuard(Builder
);
1316 PHINode
*OrigPHI
= cast
<PHINode
>(MA
->getAccessInstruction());
1317 BasicBlock
*NewSubregionExit
= Builder
.GetInsertBlock();
1319 // This can happen if the subregion is simplified after the ScopStmts
1320 // have been created; simplification happens as part of CodeGeneration.
1321 if (OrigPHI
->getParent() != SubR
->getExit()) {
1322 BasicBlock
*FormerExit
= SubR
->getExitingBlock();
1324 NewSubregionExit
= BlockMap
.lookup(FormerExit
);
1327 PHINode
*NewPHI
= PHINode::Create(OrigPHI
->getType(), Incoming
.size(),
1328 "polly." + OrigPHI
->getName(),
1329 NewSubregionExit
->getFirstNonPHI());
1331 // Add the incoming values to the PHI.
1332 for (auto &Pair
: Incoming
) {
1333 BasicBlock
*OrigIncomingBlock
= Pair
.first
;
1334 BasicBlock
*NewIncomingBlock
= BlockMap
.lookup(OrigIncomingBlock
);
1335 Builder
.SetInsertPoint(NewIncomingBlock
->getTerminator());
1336 assert(RegionMaps
.count(NewIncomingBlock
));
1337 ValueMapT
*LocalBBMap
= &RegionMaps
[NewIncomingBlock
];
1339 Value
*OrigIncomingValue
= Pair
.second
;
1340 Value
*NewIncomingValue
=
1341 getNewValue(*Stmt
, OrigIncomingValue
, *LocalBBMap
, LTS
, L
);
1342 NewPHI
->addIncoming(NewIncomingValue
, NewIncomingBlock
);
1348 Value
*RegionGenerator::getExitScalar(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1350 ScopStmt
*Stmt
= MA
->getStatement();
1352 // TODO: Add some test cases that ensure this is really the right choice.
1353 Loop
*L
= LI
.getLoopFor(Stmt
->getRegion()->getExit());
1355 if (MA
->isAnyPHIKind()) {
1356 auto Incoming
= MA
->getIncoming();
1357 assert(!Incoming
.empty() &&
1358 "PHI WRITEs must have originate from at least one incoming block");
1360 // If there is only one incoming value, we do not need to create a PHI.
1361 if (Incoming
.size() == 1) {
1362 Value
*OldVal
= Incoming
[0].second
;
1363 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1366 return buildExitPHI(MA
, LTS
, BBMap
, L
);
1369 // MK_Value accesses leaving the subregion must dominate the exit block; just
1370 // pass the copied value
1371 Value
*OldVal
= MA
->getAccessValue();
1372 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1375 void RegionGenerator::generateScalarStores(
1376 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
1377 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1378 assert(Stmt
.getRegion() &&
1379 "Block statements need to use the generateScalarStores() "
1380 "function in the BlockGenerator");
1382 for (MemoryAccess
*MA
: Stmt
) {
1383 if (MA
->isOriginalArrayKind() || MA
->isRead())
1386 Value
*NewVal
= getExitScalar(MA
, LTS
, BBMap
);
1388 getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
, BBMap
, NewAccesses
);
1389 assert((!isa
<Instruction
>(NewVal
) ||
1390 DT
.dominates(cast
<Instruction
>(NewVal
)->getParent(),
1391 Builder
.GetInsertBlock())) &&
1392 "Domination violation");
1393 assert((!isa
<Instruction
>(Address
) ||
1394 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
1395 Builder
.GetInsertBlock())) &&
1396 "Domination violation");
1397 Builder
.CreateStore(NewVal
, Address
);
1401 void RegionGenerator::addOperandToPHI(ScopStmt
&Stmt
, const PHINode
*PHI
,
1402 PHINode
*PHICopy
, BasicBlock
*IncomingBB
,
1403 LoopToScevMapT
<S
) {
1404 Region
*StmtR
= Stmt
.getRegion();
1406 // If the incoming block was not yet copied mark this PHI as incomplete.
1407 // Once the block will be copied the incoming value will be added.
1408 BasicBlock
*BBCopy
= BlockMap
[IncomingBB
];
1410 assert(StmtR
->contains(IncomingBB
) &&
1411 "Bad incoming block for PHI in non-affine region");
1412 IncompletePHINodeMap
[IncomingBB
].push_back(std::make_pair(PHI
, PHICopy
));
1416 Value
*OpCopy
= nullptr;
1417 if (StmtR
->contains(IncomingBB
)) {
1418 assert(RegionMaps
.count(BBCopy
) &&
1419 "Incoming PHI block did not have a BBMap");
1420 ValueMapT
&BBCopyMap
= RegionMaps
[BBCopy
];
1422 Value
*Op
= PHI
->getIncomingValueForBlock(IncomingBB
);
1424 // If the current insert block is different from the PHIs incoming block
1425 // change it, otherwise do not.
1426 auto IP
= Builder
.GetInsertPoint();
1427 if (IP
->getParent() != BBCopy
)
1428 Builder
.SetInsertPoint(BBCopy
->getTerminator());
1429 OpCopy
= getNewValue(Stmt
, Op
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1430 if (IP
->getParent() != BBCopy
)
1431 Builder
.SetInsertPoint(&*IP
);
1434 if (PHICopy
->getBasicBlockIndex(BBCopy
) >= 0)
1437 Value
*PHIOpAddr
= getOrCreatePHIAlloca(const_cast<PHINode
*>(PHI
));
1438 OpCopy
= new LoadInst(PHIOpAddr
, PHIOpAddr
->getName() + ".reload",
1439 BlockMap
[IncomingBB
]->getTerminator());
1442 assert(OpCopy
&& "Incoming PHI value was not copied properly");
1443 assert(BBCopy
&& "Incoming PHI block was not copied properly");
1444 PHICopy
->addIncoming(OpCopy
, BBCopy
);
1447 void RegionGenerator::copyPHIInstruction(ScopStmt
&Stmt
, PHINode
*PHI
,
1449 LoopToScevMapT
<S
) {
1450 unsigned NumIncoming
= PHI
->getNumIncomingValues();
1452 Builder
.CreatePHI(PHI
->getType(), NumIncoming
, "polly." + PHI
->getName());
1453 PHICopy
->moveBefore(PHICopy
->getParent()->getFirstNonPHI());
1454 BBMap
[PHI
] = PHICopy
;
1456 for (unsigned u
= 0; u
< NumIncoming
; u
++)
1457 addOperandToPHI(Stmt
, PHI
, PHICopy
, PHI
->getIncomingBlock(u
), LTS
);