1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
14 //===----------------------------------------------------------------------===//
16 #include "polly/CodeGen/BlockGenerators.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/IslExprBuilder.h"
19 #include "polly/CodeGen/RuntimeDebugBuilder.h"
20 #include "polly/Options.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "polly/Support/VirtualInstruction.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/RegionInfo.h"
28 #include "llvm/Analysis/ScalarEvolution.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
32 #include "llvm/Transforms/Utils/Local.h"
35 #include "isl/ast_build.h"
40 using namespace polly
;
42 static cl::opt
<bool> Aligned("enable-polly-aligned",
43 cl::desc("Assumed aligned memory accesses."),
44 cl::Hidden
, cl::init(false), cl::ZeroOrMore
,
45 cl::cat(PollyCategory
));
47 bool PollyDebugPrinting
;
48 static cl::opt
<bool, true> DebugPrintingX(
49 "polly-codegen-add-debug-printing",
50 cl::desc("Add printf calls that show the values loaded/stored."),
51 cl::location(PollyDebugPrinting
), cl::Hidden
, cl::init(false),
52 cl::ZeroOrMore
, cl::cat(PollyCategory
));
54 BlockGenerator::BlockGenerator(
55 PollyIRBuilder
&B
, LoopInfo
&LI
, ScalarEvolution
&SE
, DominatorTree
&DT
,
56 AllocaMapTy
&ScalarMap
, EscapeUsersAllocaMapTy
&EscapeMap
,
57 ValueMapT
&GlobalMap
, IslExprBuilder
*ExprBuilder
, BasicBlock
*StartBlock
)
58 : Builder(B
), LI(LI
), SE(SE
), ExprBuilder(ExprBuilder
), DT(DT
),
59 EntryBB(nullptr), ScalarMap(ScalarMap
), EscapeMap(EscapeMap
),
60 GlobalMap(GlobalMap
), StartBlock(StartBlock
) {}
62 Value
*BlockGenerator::trySynthesizeNewValue(ScopStmt
&Stmt
, Value
*Old
,
66 if (!SE
.isSCEVable(Old
->getType()))
69 const SCEV
*Scev
= SE
.getSCEVAtScope(Old
, L
);
73 if (isa
<SCEVCouldNotCompute
>(Scev
))
76 const SCEV
*NewScev
= SCEVLoopAddRecRewriter::rewrite(Scev
, LTS
, SE
);
78 VTV
.insert(BBMap
.begin(), BBMap
.end());
79 VTV
.insert(GlobalMap
.begin(), GlobalMap
.end());
81 Scop
&S
= *Stmt
.getParent();
82 const DataLayout
&DL
= S
.getFunction().getParent()->getDataLayout();
83 auto IP
= Builder
.GetInsertPoint();
85 assert(IP
!= Builder
.GetInsertBlock()->end() &&
86 "Only instructions can be insert points for SCEVExpander");
88 expandCodeFor(S
, SE
, DL
, "polly", NewScev
, Old
->getType(), &*IP
, &VTV
,
89 StartBlock
->getSinglePredecessor());
91 BBMap
[Old
] = Expanded
;
95 Value
*BlockGenerator::getNewValue(ScopStmt
&Stmt
, Value
*Old
, ValueMapT
&BBMap
,
96 LoopToScevMapT
<S
, Loop
*L
) const {
98 auto lookupGlobally
= [this](Value
*Old
) -> Value
* {
99 Value
*New
= GlobalMap
.lookup(Old
);
104 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
105 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
106 // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
107 // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
108 // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
109 // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
110 // GlobalMap should be a mapping from (value in original SCoP) to (copied
111 // value in generated SCoP), without intermediate mappings, which might
112 // easily require transitiveness as well.
113 if (Value
*NewRemapped
= GlobalMap
.lookup(New
))
116 // No test case for this code.
117 if (Old
->getType()->getScalarSizeInBits() <
118 New
->getType()->getScalarSizeInBits())
119 New
= Builder
.CreateTruncOrBitCast(New
, Old
->getType());
124 Value
*New
= nullptr;
125 auto VUse
= VirtualUse::create(&Stmt
, L
, Old
, true);
126 switch (VUse
.getKind()) {
127 case VirtualUse::Block
:
128 // BasicBlock are constants, but the BlockGenerator copies them.
129 New
= BBMap
.lookup(Old
);
132 case VirtualUse::Constant
:
134 // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
135 // Constants should not be redefined. In this case, the GlobalMap just
136 // contains a mapping to the same constant, which is unnecessary, but
138 if ((New
= lookupGlobally(Old
)))
141 assert(!BBMap
.count(Old
));
145 case VirtualUse::ReadOnly
:
146 assert(!GlobalMap
.count(Old
));
149 // * Isl/CodeGen/MemAccess/create_arrays.ll
150 // * Isl/CodeGen/read-only-scalars.ll
151 // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
152 // For some reason these reload a read-only value. The reloaded value ends
153 // up in BBMap, buts its value should be identical.
156 // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
157 // The parallel subfunctions need to reference the read-only value from the
158 // parent function, this is done by reloading them locally.
159 if ((New
= BBMap
.lookup(Old
)))
165 case VirtualUse::Synthesizable
:
167 // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
168 // * Isl/CodeGen/OpenMP/recomputed-srem.ll
169 // * Isl/CodeGen/OpenMP/reference-other-bb.ll
170 // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
171 // For some reason synthesizable values end up in GlobalMap. Their values
172 // are the same as trySynthesizeNewValue would return. The legacy
173 // implementation prioritized GlobalMap, so this is what we do here as well.
174 // Ideally, synthesizable values should not end up in GlobalMap.
175 if ((New
= lookupGlobally(Old
)))
179 // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
180 // * Isl/CodeGen/getNumberOfIterations.ll
181 // * Isl/CodeGen/non_affine_float_compare.ll
182 // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
183 // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
184 // not precomputed (SCEVExpander has its own caching mechanism).
185 // These tests fail without this, but I think trySynthesizeNewValue would
186 // just re-synthesize the same instructions.
187 if ((New
= BBMap
.lookup(Old
)))
190 New
= trySynthesizeNewValue(Stmt
, Old
, BBMap
, LTS
, L
);
193 case VirtualUse::Hoisted
:
194 // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
195 // redefined locally (which will be ignored anyway). That is, the following
196 // assertion should apply: assert(!BBMap.count(Old))
198 New
= lookupGlobally(Old
);
201 case VirtualUse::Intra
:
202 case VirtualUse::Inter
:
203 assert(!GlobalMap
.count(Old
) &&
204 "Intra and inter-stmt values are never global");
205 New
= BBMap
.lookup(Old
);
208 assert(New
&& "Unexpected scalar dependence in region!");
212 void BlockGenerator::copyInstScalar(ScopStmt
&Stmt
, Instruction
*Inst
,
213 ValueMapT
&BBMap
, LoopToScevMapT
<S
) {
214 // We do not generate debug intrinsics as we did not investigate how to
215 // copy them correctly. At the current state, they just crash the code
216 // generation as the meta-data operands are not correctly copied.
217 if (isa
<DbgInfoIntrinsic
>(Inst
))
220 Instruction
*NewInst
= Inst
->clone();
222 // Replace old operands with the new ones.
223 for (Value
*OldOperand
: Inst
->operands()) {
225 getNewValue(Stmt
, OldOperand
, BBMap
, LTS
, getLoopForStmt(Stmt
));
228 assert(!isa
<StoreInst
>(NewInst
) &&
229 "Store instructions are always needed!");
230 NewInst
->deleteValue();
234 NewInst
->replaceUsesOfWith(OldOperand
, NewOperand
);
238 Builder
.Insert(NewInst
);
239 BBMap
[Inst
] = NewInst
;
241 // When copying the instruction onto the Module meant for the GPU,
242 // debug metadata attached to an instruction causes all related
243 // metadata to be pulled into the Module. This includes the DICompileUnit,
244 // which will not be listed in llvm.dbg.cu of the Module since the Module
245 // doesn't contain one. This fails the verification of the Module and the
246 // subsequent generation of the ASM string.
247 if( NewInst
->getModule() != Inst
->getModule() )
248 NewInst
->setDebugLoc(llvm::DebugLoc());
250 if (!NewInst
->getType()->isVoidTy())
251 NewInst
->setName("p_" + Inst
->getName());
255 BlockGenerator::generateLocationAccessed(ScopStmt
&Stmt
, MemAccInst Inst
,
256 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
257 isl_id_to_ast_expr
*NewAccesses
) {
258 const MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Inst
);
259 return generateLocationAccessed(
260 Stmt
, getLoopForStmt(Stmt
),
261 Inst
.isNull() ? nullptr : Inst
.getPointerOperand(), BBMap
, LTS
,
262 NewAccesses
, MA
.getId().release(), MA
.getAccessValue()->getType());
265 Value
*BlockGenerator::generateLocationAccessed(
266 ScopStmt
&Stmt
, Loop
*L
, Value
*Pointer
, ValueMapT
&BBMap
,
267 LoopToScevMapT
<S
, isl_id_to_ast_expr
*NewAccesses
, __isl_take isl_id
*Id
,
268 Type
*ExpectedType
) {
269 isl_ast_expr
*AccessExpr
= isl_id_to_ast_expr_get(NewAccesses
, Id
);
272 AccessExpr
= isl_ast_expr_address_of(AccessExpr
);
273 auto Address
= ExprBuilder
->create(AccessExpr
);
275 // Cast the address of this memory access to a pointer type that has the
276 // same element type as the original access, but uses the address space of
277 // the newly generated pointer.
278 auto OldPtrTy
= ExpectedType
->getPointerTo();
279 auto NewPtrTy
= Address
->getType();
280 OldPtrTy
= PointerType::get(OldPtrTy
->getElementType(),
281 NewPtrTy
->getPointerAddressSpace());
283 if (OldPtrTy
!= NewPtrTy
)
284 Address
= Builder
.CreateBitOrPointerCast(Address
, OldPtrTy
);
289 "If expression was not generated, must use the original pointer value");
290 return getNewValue(Stmt
, Pointer
, BBMap
, LTS
, L
);
294 BlockGenerator::getImplicitAddress(MemoryAccess
&Access
, Loop
*L
,
295 LoopToScevMapT
<S
, ValueMapT
&BBMap
,
296 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
297 if (Access
.isLatestArrayKind())
298 return generateLocationAccessed(*Access
.getStatement(), L
, nullptr, BBMap
,
299 LTS
, NewAccesses
, Access
.getId().release(),
300 Access
.getAccessValue()->getType());
302 return getOrCreateAlloca(Access
);
305 Loop
*BlockGenerator::getLoopForStmt(const ScopStmt
&Stmt
) const {
306 auto *StmtBB
= Stmt
.getEntryBlock();
307 return LI
.getLoopFor(StmtBB
);
310 Value
*BlockGenerator::generateArrayLoad(ScopStmt
&Stmt
, LoadInst
*Load
,
311 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
312 isl_id_to_ast_expr
*NewAccesses
) {
313 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
))
317 generateLocationAccessed(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
318 Value
*ScalarLoad
= Builder
.CreateAlignedLoad(
319 NewPointer
, Load
->getAlignment(), Load
->getName() + "_p_scalar_");
321 if (PollyDebugPrinting
)
322 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Load from ", NewPointer
,
323 ": ", ScalarLoad
, "\n");
328 void BlockGenerator::generateArrayStore(ScopStmt
&Stmt
, StoreInst
*Store
,
329 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
330 isl_id_to_ast_expr
*NewAccesses
) {
331 MemoryAccess
&MA
= Stmt
.getArrayAccessFor(Store
);
332 isl::set AccDom
= MA
.getAccessRelation().domain();
333 std::string Subject
= MA
.getId().get_name();
335 generateConditionalExecution(Stmt
, AccDom
, Subject
.c_str(), [&, this]() {
337 generateLocationAccessed(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
338 Value
*ValueOperand
= getNewValue(Stmt
, Store
->getValueOperand(), BBMap
,
339 LTS
, getLoopForStmt(Stmt
));
341 if (PollyDebugPrinting
)
342 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Store to ", NewPointer
,
343 ": ", ValueOperand
, "\n");
345 Builder
.CreateAlignedStore(ValueOperand
, NewPointer
, Store
->getAlignment());
349 bool BlockGenerator::canSyntheziseInStmt(ScopStmt
&Stmt
, Instruction
*Inst
) {
350 Loop
*L
= getLoopForStmt(Stmt
);
351 return (Stmt
.isBlockStmt() || !Stmt
.getRegion()->contains(L
)) &&
352 canSynthesize(Inst
, *Stmt
.getParent(), &SE
, L
);
355 void BlockGenerator::copyInstruction(ScopStmt
&Stmt
, Instruction
*Inst
,
356 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
357 isl_id_to_ast_expr
*NewAccesses
) {
358 // Terminator instructions control the control flow. They are explicitly
359 // expressed in the clast and do not need to be copied.
360 if (Inst
->isTerminator())
363 // Synthesizable statements will be generated on-demand.
364 if (canSyntheziseInStmt(Stmt
, Inst
))
367 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
368 Value
*NewLoad
= generateArrayLoad(Stmt
, Load
, BBMap
, LTS
, NewAccesses
);
369 // Compute NewLoad before its insertion in BBMap to make the insertion
371 BBMap
[Load
] = NewLoad
;
375 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
376 // Identified as redundant by -polly-simplify.
377 if (!Stmt
.getArrayAccessOrNULLFor(Store
))
380 generateArrayStore(Stmt
, Store
, BBMap
, LTS
, NewAccesses
);
384 if (auto *PHI
= dyn_cast
<PHINode
>(Inst
)) {
385 copyPHIInstruction(Stmt
, PHI
, BBMap
, LTS
);
389 // Skip some special intrinsics for which we do not adjust the semantics to
390 // the new schedule. All others are handled like every other instruction.
391 if (isIgnoredIntrinsic(Inst
))
394 copyInstScalar(Stmt
, Inst
, BBMap
, LTS
);
397 void BlockGenerator::removeDeadInstructions(BasicBlock
*BB
, ValueMapT
&BBMap
) {
398 auto NewBB
= Builder
.GetInsertBlock();
399 for (auto I
= NewBB
->rbegin(); I
!= NewBB
->rend(); I
++) {
400 Instruction
*NewInst
= &*I
;
402 if (!isInstructionTriviallyDead(NewInst
))
405 for (auto Pair
: BBMap
)
406 if (Pair
.second
== NewInst
) {
407 BBMap
.erase(Pair
.first
);
410 NewInst
->eraseFromParent();
415 void BlockGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
416 isl_id_to_ast_expr
*NewAccesses
) {
417 assert(Stmt
.isBlockStmt() &&
418 "Only block statements can be copied by the block generator");
422 BasicBlock
*BB
= Stmt
.getBasicBlock();
423 copyBB(Stmt
, BB
, BBMap
, LTS
, NewAccesses
);
424 removeDeadInstructions(BB
, BBMap
);
427 BasicBlock
*BlockGenerator::splitBB(BasicBlock
*BB
) {
428 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
429 &*Builder
.GetInsertPoint(), &DT
, &LI
);
430 CopyBB
->setName("polly.stmt." + BB
->getName());
434 BasicBlock
*BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
,
435 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
436 isl_id_to_ast_expr
*NewAccesses
) {
437 BasicBlock
*CopyBB
= splitBB(BB
);
438 Builder
.SetInsertPoint(&CopyBB
->front());
439 generateScalarLoads(Stmt
, LTS
, BBMap
, NewAccesses
);
441 copyBB(Stmt
, BB
, CopyBB
, BBMap
, LTS
, NewAccesses
);
443 // After a basic block was copied store all scalars that escape this block in
445 generateScalarStores(Stmt
, LTS
, BBMap
, NewAccesses
);
449 void BlockGenerator::copyBB(ScopStmt
&Stmt
, BasicBlock
*BB
, BasicBlock
*CopyBB
,
450 ValueMapT
&BBMap
, LoopToScevMapT
<S
,
451 isl_id_to_ast_expr
*NewAccesses
) {
452 EntryBB
= &CopyBB
->getParent()->getEntryBlock();
454 if (Stmt
.isBlockStmt())
455 for (Instruction
*Inst
: Stmt
.getInstructions())
456 copyInstruction(Stmt
, Inst
, BBMap
, LTS
, NewAccesses
);
458 for (Instruction
&Inst
: *BB
)
459 copyInstruction(Stmt
, &Inst
, BBMap
, LTS
, NewAccesses
);
462 Value
*BlockGenerator::getOrCreateAlloca(const MemoryAccess
&Access
) {
463 assert(!Access
.isLatestArrayKind() && "Trying to get alloca for array kind");
465 return getOrCreateAlloca(Access
.getLatestScopArrayInfo());
468 Value
*BlockGenerator::getOrCreateAlloca(const ScopArrayInfo
*Array
) {
469 assert(!Array
->isArrayKind() && "Trying to get alloca for array kind");
471 auto &Addr
= ScalarMap
[Array
];
474 // Allow allocas to be (temporarily) redirected once by adding a new
475 // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
476 // is used for example by the OpenMP code generation where a first use
477 // of a scalar while still in the host code allocates a normal alloca with
478 // getOrCreateAlloca. When the values of this scalar are accessed during
479 // the generation of the parallel subfunction, these values are copied over
480 // to the parallel subfunction and each request for a scalar alloca slot
481 // must be forwarded to the temporary in-subfunction slot. This mapping is
482 // removed when the subfunction has been generated and again normal host
483 // code is generated. Due to the following reasons it is not possible to
484 // perform the GlobalMap lookup right after creating the alloca below, but
485 // instead we need to check GlobalMap at each call to getOrCreateAlloca:
487 // 1) GlobalMap may be changed multiple times (for each parallel loop),
488 // 2) The temporary mapping is commonly only known after the initial
489 // alloca has already been generated, and
490 // 3) The original alloca value must be restored after leaving the
492 if (Value
*NewAddr
= GlobalMap
.lookup(&*Addr
))
497 Type
*Ty
= Array
->getElementType();
498 Value
*ScalarBase
= Array
->getBasePtr();
500 if (Array
->isPHIKind())
505 const DataLayout
&DL
= Builder
.GetInsertBlock()->getModule()->getDataLayout();
507 Addr
= new AllocaInst(Ty
, DL
.getAllocaAddrSpace(),
508 ScalarBase
->getName() + NameExt
);
509 EntryBB
= &Builder
.GetInsertBlock()->getParent()->getEntryBlock();
510 Addr
->insertBefore(&*EntryBB
->getFirstInsertionPt());
515 void BlockGenerator::handleOutsideUsers(const Scop
&S
, ScopArrayInfo
*Array
) {
516 Instruction
*Inst
= cast
<Instruction
>(Array
->getBasePtr());
518 // If there are escape users we get the alloca for this instruction and put it
519 // in the EscapeMap for later finalization. Lastly, if the instruction was
520 // copied multiple times we already did this and can exit.
521 if (EscapeMap
.count(Inst
))
524 EscapeUserVectorTy EscapeUsers
;
525 for (User
*U
: Inst
->users()) {
527 // Non-instruction user will never escape.
528 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
535 EscapeUsers
.push_back(UI
);
538 // Exit if no escape uses were found.
539 if (EscapeUsers
.empty())
542 // Get or create an escape alloca for this instruction.
543 auto *ScalarAddr
= getOrCreateAlloca(Array
);
545 // Remember that this instruction has escape uses and the escape alloca.
546 EscapeMap
[Inst
] = std::make_pair(ScalarAddr
, std::move(EscapeUsers
));
549 void BlockGenerator::generateScalarLoads(
550 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
551 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
552 for (MemoryAccess
*MA
: Stmt
) {
553 if (MA
->isOriginalArrayKind() || MA
->isWrite())
557 auto *StmtDom
= Stmt
.getDomain();
558 auto *AccDom
= isl_map_domain(MA
->getAccessRelation().release());
559 assert(isl_set_is_subset(StmtDom
, AccDom
) &&
560 "Scalar must be loaded in all statement instances");
561 isl_set_free(StmtDom
);
562 isl_set_free(AccDom
);
566 getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
, BBMap
, NewAccesses
);
567 assert((!isa
<Instruction
>(Address
) ||
568 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
569 Builder
.GetInsertBlock())) &&
570 "Domination violation");
571 BBMap
[MA
->getAccessValue()] =
572 Builder
.CreateLoad(Address
, Address
->getName() + ".reload");
576 Value
*BlockGenerator::buildContainsCondition(ScopStmt
&Stmt
,
577 const isl::set
&Subdomain
) {
578 isl::ast_build AstBuild
= give(isl_ast_build_copy(Stmt
.getAstBuild()));
579 isl::set Domain
= give(Stmt
.getDomain());
581 isl::union_map USchedule
= AstBuild
.get_schedule();
582 USchedule
= USchedule
.intersect_domain(Domain
);
584 assert(!USchedule
.is_empty());
585 isl::map Schedule
= isl::map::from_union_map(USchedule
);
587 isl::set ScheduledDomain
= Schedule
.range();
588 isl::set ScheduledSet
= Subdomain
.apply(Schedule
);
590 isl::ast_build RestrictedBuild
= AstBuild
.restrict(ScheduledDomain
);
592 isl::ast_expr IsInSet
= RestrictedBuild
.expr_from(ScheduledSet
);
593 Value
*IsInSetExpr
= ExprBuilder
->create(IsInSet
.copy());
594 IsInSetExpr
= Builder
.CreateICmpNE(
595 IsInSetExpr
, ConstantInt::get(IsInSetExpr
->getType(), 0));
600 void BlockGenerator::generateConditionalExecution(
601 ScopStmt
&Stmt
, const isl::set
&Subdomain
, StringRef Subject
,
602 const std::function
<void()> &GenThenFunc
) {
603 isl::set StmtDom
= give(Stmt
.getDomain());
605 // Don't call GenThenFunc if it is never executed. An ast index expression
606 // might not be defined in this case.
607 if (Subdomain
.is_empty())
610 // If the condition is a tautology, don't generate a condition around the
612 bool IsPartialWrite
=
613 !StmtDom
.intersect_params(give(Stmt
.getParent()->getContext()))
614 .is_subset(Subdomain
);
615 if (!IsPartialWrite
) {
620 // Generate the condition.
621 Value
*Cond
= buildContainsCondition(Stmt
, Subdomain
);
622 BasicBlock
*HeadBlock
= Builder
.GetInsertBlock();
623 StringRef BlockName
= HeadBlock
->getName();
625 // Generate the conditional block.
626 SplitBlockAndInsertIfThen(Cond
, &*Builder
.GetInsertPoint(), false, nullptr,
628 BranchInst
*Branch
= cast
<BranchInst
>(HeadBlock
->getTerminator());
629 BasicBlock
*ThenBlock
= Branch
->getSuccessor(0);
630 BasicBlock
*TailBlock
= Branch
->getSuccessor(1);
632 // Assign descriptive names.
633 if (auto *CondInst
= dyn_cast
<Instruction
>(Cond
))
634 CondInst
->setName("polly." + Subject
+ ".cond");
635 ThenBlock
->setName(BlockName
+ "." + Subject
+ ".partial");
636 TailBlock
->setName(BlockName
+ ".cont");
638 // Put the client code into the conditional block and continue in the merge
640 Builder
.SetInsertPoint(ThenBlock
, ThenBlock
->getFirstInsertionPt());
642 Builder
.SetInsertPoint(TailBlock
, TailBlock
->getFirstInsertionPt());
645 void BlockGenerator::generateScalarStores(
646 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
647 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
648 Loop
*L
= LI
.getLoopFor(Stmt
.getBasicBlock());
650 assert(Stmt
.isBlockStmt() &&
651 "Region statements need to use the generateScalarStores() function in "
652 "the RegionGenerator");
654 for (MemoryAccess
*MA
: Stmt
) {
655 if (MA
->isOriginalArrayKind() || MA
->isRead())
658 isl::set AccDom
= MA
->getAccessRelation().domain();
659 std::string Subject
= MA
->getId().get_name();
661 generateConditionalExecution(
662 Stmt
, AccDom
, Subject
.c_str(), [&, this, MA
]() {
663 Value
*Val
= MA
->getAccessValue();
664 if (MA
->isAnyPHIKind()) {
665 assert(MA
->getIncoming().size() >= 1 &&
666 "Block statements have exactly one exiting block, or "
668 "with same incoming block and value");
669 assert(std::all_of(MA
->getIncoming().begin(),
670 MA
->getIncoming().end(),
671 [&](std::pair
<BasicBlock
*, Value
*> p
) -> bool {
672 return p
.first
== Stmt
.getBasicBlock();
674 "Incoming block must be statement's block");
675 Val
= MA
->getIncoming()[0].second
;
677 auto Address
= getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
,
680 Val
= getNewValue(Stmt
, Val
, BBMap
, LTS
, L
);
681 assert((!isa
<Instruction
>(Val
) ||
682 DT
.dominates(cast
<Instruction
>(Val
)->getParent(),
683 Builder
.GetInsertBlock())) &&
684 "Domination violation");
685 assert((!isa
<Instruction
>(Address
) ||
686 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
687 Builder
.GetInsertBlock())) &&
688 "Domination violation");
689 Builder
.CreateStore(Val
, Address
);
695 void BlockGenerator::createScalarInitialization(Scop
&S
) {
696 BasicBlock
*ExitBB
= S
.getExit();
697 BasicBlock
*PreEntryBB
= S
.getEnteringBlock();
699 Builder
.SetInsertPoint(&*StartBlock
->begin());
701 for (auto &Array
: S
.arrays()) {
702 if (Array
->getNumberOfDimensions() != 0)
704 if (Array
->isPHIKind()) {
705 // For PHI nodes, the only values we need to store are the ones that
706 // reach the PHI node from outside the region. In general there should
707 // only be one such incoming edge and this edge should enter through
709 auto PHI
= cast
<PHINode
>(Array
->getBasePtr());
711 for (auto BI
= PHI
->block_begin(), BE
= PHI
->block_end(); BI
!= BE
; BI
++)
712 if (!S
.contains(*BI
) && *BI
!= PreEntryBB
)
713 llvm_unreachable("Incoming edges from outside the scop should always "
714 "come from PreEntryBB");
716 int Idx
= PHI
->getBasicBlockIndex(PreEntryBB
);
720 Value
*ScalarValue
= PHI
->getIncomingValue(Idx
);
722 Builder
.CreateStore(ScalarValue
, getOrCreateAlloca(Array
));
726 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
728 if (Inst
&& S
.contains(Inst
))
731 // PHI nodes that are not marked as such in their SAI object are either exit
732 // PHI nodes we model as common scalars but without initialization, or
733 // incoming phi nodes that need to be initialized. Check if the first is the
734 // case for Inst and do not create and initialize memory if so.
735 if (auto *PHI
= dyn_cast_or_null
<PHINode
>(Inst
))
736 if (!S
.hasSingleExitEdge() && PHI
->getBasicBlockIndex(ExitBB
) >= 0)
739 Builder
.CreateStore(Array
->getBasePtr(), getOrCreateAlloca(Array
));
743 void BlockGenerator::createScalarFinalization(Scop
&S
) {
744 // The exit block of the __unoptimized__ region.
745 BasicBlock
*ExitBB
= S
.getExitingBlock();
746 // The merge block __just after__ the region and the optimized region.
747 BasicBlock
*MergeBB
= S
.getExit();
749 // The exit block of the __optimized__ region.
750 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
751 if (OptExitBB
== ExitBB
)
752 OptExitBB
= *(++pred_begin(MergeBB
));
754 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
755 for (const auto &EscapeMapping
: EscapeMap
) {
756 // Extract the escaping instruction and the escaping users as well as the
757 // alloca the instruction was demoted to.
758 Instruction
*EscapeInst
= EscapeMapping
.first
;
759 const auto &EscapeMappingValue
= EscapeMapping
.second
;
760 const EscapeUserVectorTy
&EscapeUsers
= EscapeMappingValue
.second
;
761 Value
*ScalarAddr
= EscapeMappingValue
.first
;
763 // Reload the demoted instruction in the optimized version of the SCoP.
764 Value
*EscapeInstReload
=
765 Builder
.CreateLoad(ScalarAddr
, EscapeInst
->getName() + ".final_reload");
767 Builder
.CreateBitOrPointerCast(EscapeInstReload
, EscapeInst
->getType());
769 // Create the merge PHI that merges the optimized and unoptimized version.
770 PHINode
*MergePHI
= PHINode::Create(EscapeInst
->getType(), 2,
771 EscapeInst
->getName() + ".merge");
772 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
774 // Add the respective values to the merge PHI.
775 MergePHI
->addIncoming(EscapeInstReload
, OptExitBB
);
776 MergePHI
->addIncoming(EscapeInst
, ExitBB
);
778 // The information of scalar evolution about the escaping instruction needs
779 // to be revoked so the new merged instruction will be used.
780 if (SE
.isSCEVable(EscapeInst
->getType()))
781 SE
.forgetValue(EscapeInst
);
783 // Replace all uses of the demoted instruction with the merge PHI.
784 for (Instruction
*EUser
: EscapeUsers
)
785 EUser
->replaceUsesOfWith(EscapeInst
, MergePHI
);
789 void BlockGenerator::findOutsideUsers(Scop
&S
) {
790 for (auto &Array
: S
.arrays()) {
792 if (Array
->getNumberOfDimensions() != 0)
795 if (Array
->isPHIKind())
798 auto *Inst
= dyn_cast
<Instruction
>(Array
->getBasePtr());
803 // Scop invariant hoisting moves some of the base pointers out of the scop.
804 // We can ignore these, as the invariant load hoisting already registers the
805 // relevant outside users.
806 if (!S
.contains(Inst
))
809 handleOutsideUsers(S
, Array
);
813 void BlockGenerator::createExitPHINodeMerges(Scop
&S
) {
814 if (S
.hasSingleExitEdge())
817 auto *ExitBB
= S
.getExitingBlock();
818 auto *MergeBB
= S
.getExit();
819 auto *AfterMergeBB
= MergeBB
->getSingleSuccessor();
820 BasicBlock
*OptExitBB
= *(pred_begin(MergeBB
));
821 if (OptExitBB
== ExitBB
)
822 OptExitBB
= *(++pred_begin(MergeBB
));
824 Builder
.SetInsertPoint(OptExitBB
->getTerminator());
826 for (auto &SAI
: S
.arrays()) {
827 auto *Val
= SAI
->getBasePtr();
829 // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
830 // the original PHI's value or the reloaded incoming values from the
831 // generated code. An llvm::Value is merged between the original code's
832 // value or the generated one.
833 if (!SAI
->isExitPHIKind())
836 PHINode
*PHI
= dyn_cast
<PHINode
>(Val
);
840 if (PHI
->getParent() != AfterMergeBB
)
843 std::string Name
= PHI
->getName();
844 Value
*ScalarAddr
= getOrCreateAlloca(SAI
);
845 Value
*Reload
= Builder
.CreateLoad(ScalarAddr
, Name
+ ".ph.final_reload");
846 Reload
= Builder
.CreateBitOrPointerCast(Reload
, PHI
->getType());
847 Value
*OriginalValue
= PHI
->getIncomingValueForBlock(MergeBB
);
848 assert((!isa
<Instruction
>(OriginalValue
) ||
849 cast
<Instruction
>(OriginalValue
)->getParent() != MergeBB
) &&
850 "Original value must no be one we just generated.");
851 auto *MergePHI
= PHINode::Create(PHI
->getType(), 2, Name
+ ".ph.merge");
852 MergePHI
->insertBefore(&*MergeBB
->getFirstInsertionPt());
853 MergePHI
->addIncoming(Reload
, OptExitBB
);
854 MergePHI
->addIncoming(OriginalValue
, ExitBB
);
855 int Idx
= PHI
->getBasicBlockIndex(MergeBB
);
856 PHI
->setIncomingValue(Idx
, MergePHI
);
860 void BlockGenerator::invalidateScalarEvolution(Scop
&S
) {
862 if (Stmt
.isCopyStmt())
864 else if (Stmt
.isBlockStmt())
865 for (auto &Inst
: *Stmt
.getBasicBlock())
866 SE
.forgetValue(&Inst
);
867 else if (Stmt
.isRegionStmt())
868 for (auto *BB
: Stmt
.getRegion()->blocks())
869 for (auto &Inst
: *BB
)
870 SE
.forgetValue(&Inst
);
872 llvm_unreachable("Unexpected statement type found");
874 // Invalidate SCEV of loops surrounding the EscapeUsers.
875 for (const auto &EscapeMapping
: EscapeMap
) {
876 const EscapeUserVectorTy
&EscapeUsers
= EscapeMapping
.second
.second
;
877 for (Instruction
*EUser
: EscapeUsers
) {
878 if (Loop
*L
= LI
.getLoopFor(EUser
->getParent()))
881 L
= L
->getParentLoop();
887 void BlockGenerator::finalizeSCoP(Scop
&S
) {
889 createScalarInitialization(S
);
890 createExitPHINodeMerges(S
);
891 createScalarFinalization(S
);
892 invalidateScalarEvolution(S
);
895 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator
&BlockGen
,
896 std::vector
<LoopToScevMapT
> &VLTS
,
898 : BlockGenerator(BlockGen
), VLTS(VLTS
), Schedule(Schedule
) {
899 assert(Schedule
&& "No statement domain provided");
902 Value
*VectorBlockGenerator::getVectorValue(ScopStmt
&Stmt
, Value
*Old
,
903 ValueMapT
&VectorMap
,
904 VectorValueMapT
&ScalarMaps
,
906 if (Value
*NewValue
= VectorMap
.lookup(Old
))
909 int Width
= getVectorWidth();
911 Value
*Vector
= UndefValue::get(VectorType::get(Old
->getType(), Width
));
913 for (int Lane
= 0; Lane
< Width
; Lane
++)
914 Vector
= Builder
.CreateInsertElement(
915 Vector
, getNewValue(Stmt
, Old
, ScalarMaps
[Lane
], VLTS
[Lane
], L
),
916 Builder
.getInt32(Lane
));
918 VectorMap
[Old
] = Vector
;
923 Type
*VectorBlockGenerator::getVectorPtrTy(const Value
*Val
, int Width
) {
924 PointerType
*PointerTy
= dyn_cast
<PointerType
>(Val
->getType());
925 assert(PointerTy
&& "PointerType expected");
927 Type
*ScalarType
= PointerTy
->getElementType();
928 VectorType
*VectorType
= VectorType::get(ScalarType
, Width
);
930 return PointerType::getUnqual(VectorType
);
933 Value
*VectorBlockGenerator::generateStrideOneLoad(
934 ScopStmt
&Stmt
, LoadInst
*Load
, VectorValueMapT
&ScalarMaps
,
935 __isl_keep isl_id_to_ast_expr
*NewAccesses
, bool NegativeStride
= false) {
936 unsigned VectorWidth
= getVectorWidth();
937 auto *Pointer
= Load
->getPointerOperand();
938 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, VectorWidth
);
939 unsigned Offset
= NegativeStride
? VectorWidth
- 1 : 0;
941 Value
*NewPointer
= generateLocationAccessed(Stmt
, Load
, ScalarMaps
[Offset
],
942 VLTS
[Offset
], NewAccesses
);
944 Builder
.CreateBitCast(NewPointer
, VectorPtrType
, "vector_ptr");
946 Builder
.CreateLoad(VectorPtr
, Load
->getName() + "_p_vec_full");
948 VecLoad
->setAlignment(8);
950 if (NegativeStride
) {
951 SmallVector
<Constant
*, 16> Indices
;
952 for (int i
= VectorWidth
- 1; i
>= 0; i
--)
953 Indices
.push_back(ConstantInt::get(Builder
.getInt32Ty(), i
));
954 Constant
*SV
= llvm::ConstantVector::get(Indices
);
955 Value
*RevVecLoad
= Builder
.CreateShuffleVector(
956 VecLoad
, VecLoad
, SV
, Load
->getName() + "_reverse");
963 Value
*VectorBlockGenerator::generateStrideZeroLoad(
964 ScopStmt
&Stmt
, LoadInst
*Load
, ValueMapT
&BBMap
,
965 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
966 auto *Pointer
= Load
->getPointerOperand();
967 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, 1);
969 generateLocationAccessed(Stmt
, Load
, BBMap
, VLTS
[0], NewAccesses
);
970 Value
*VectorPtr
= Builder
.CreateBitCast(NewPointer
, VectorPtrType
,
971 Load
->getName() + "_p_vec_p");
972 LoadInst
*ScalarLoad
=
973 Builder
.CreateLoad(VectorPtr
, Load
->getName() + "_p_splat_one");
976 ScalarLoad
->setAlignment(8);
978 Constant
*SplatVector
= Constant::getNullValue(
979 VectorType::get(Builder
.getInt32Ty(), getVectorWidth()));
981 Value
*VectorLoad
= Builder
.CreateShuffleVector(
982 ScalarLoad
, ScalarLoad
, SplatVector
, Load
->getName() + "_p_splat");
986 Value
*VectorBlockGenerator::generateUnknownStrideLoad(
987 ScopStmt
&Stmt
, LoadInst
*Load
, VectorValueMapT
&ScalarMaps
,
988 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
989 int VectorWidth
= getVectorWidth();
990 auto *Pointer
= Load
->getPointerOperand();
991 VectorType
*VectorType
= VectorType::get(
992 dyn_cast
<PointerType
>(Pointer
->getType())->getElementType(), VectorWidth
);
994 Value
*Vector
= UndefValue::get(VectorType
);
996 for (int i
= 0; i
< VectorWidth
; i
++) {
997 Value
*NewPointer
= generateLocationAccessed(Stmt
, Load
, ScalarMaps
[i
],
998 VLTS
[i
], NewAccesses
);
1000 Builder
.CreateLoad(NewPointer
, Load
->getName() + "_p_scalar_");
1001 Vector
= Builder
.CreateInsertElement(
1002 Vector
, ScalarLoad
, Builder
.getInt32(i
), Load
->getName() + "_p_vec_");
1008 void VectorBlockGenerator::generateLoad(
1009 ScopStmt
&Stmt
, LoadInst
*Load
, ValueMapT
&VectorMap
,
1010 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1011 if (Value
*PreloadLoad
= GlobalMap
.lookup(Load
)) {
1012 VectorMap
[Load
] = Builder
.CreateVectorSplat(getVectorWidth(), PreloadLoad
,
1013 Load
->getName() + "_p");
1017 if (!VectorType::isValidElementType(Load
->getType())) {
1018 for (int i
= 0; i
< getVectorWidth(); i
++)
1019 ScalarMaps
[i
][Load
] =
1020 generateArrayLoad(Stmt
, Load
, ScalarMaps
[i
], VLTS
[i
], NewAccesses
);
1024 const MemoryAccess
&Access
= Stmt
.getArrayAccessFor(Load
);
1026 // Make sure we have scalar values available to access the pointer to
1027 // the data location.
1028 extractScalarValues(Load
, VectorMap
, ScalarMaps
);
1031 if (Access
.isStrideZero(isl::manage(isl_map_copy(Schedule
))))
1032 NewLoad
= generateStrideZeroLoad(Stmt
, Load
, ScalarMaps
[0], NewAccesses
);
1033 else if (Access
.isStrideOne(isl::manage(isl_map_copy(Schedule
))))
1034 NewLoad
= generateStrideOneLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
);
1035 else if (Access
.isStrideX(isl::manage(isl_map_copy(Schedule
)), -1))
1036 NewLoad
= generateStrideOneLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
, true);
1038 NewLoad
= generateUnknownStrideLoad(Stmt
, Load
, ScalarMaps
, NewAccesses
);
1040 VectorMap
[Load
] = NewLoad
;
1043 void VectorBlockGenerator::copyUnaryInst(ScopStmt
&Stmt
, UnaryInstruction
*Inst
,
1044 ValueMapT
&VectorMap
,
1045 VectorValueMapT
&ScalarMaps
) {
1046 int VectorWidth
= getVectorWidth();
1047 Value
*NewOperand
= getVectorValue(Stmt
, Inst
->getOperand(0), VectorMap
,
1048 ScalarMaps
, getLoopForStmt(Stmt
));
1050 assert(isa
<CastInst
>(Inst
) && "Can not generate vector code for instruction");
1052 const CastInst
*Cast
= dyn_cast
<CastInst
>(Inst
);
1053 VectorType
*DestType
= VectorType::get(Inst
->getType(), VectorWidth
);
1054 VectorMap
[Inst
] = Builder
.CreateCast(Cast
->getOpcode(), NewOperand
, DestType
);
1057 void VectorBlockGenerator::copyBinaryInst(ScopStmt
&Stmt
, BinaryOperator
*Inst
,
1058 ValueMapT
&VectorMap
,
1059 VectorValueMapT
&ScalarMaps
) {
1060 Loop
*L
= getLoopForStmt(Stmt
);
1061 Value
*OpZero
= Inst
->getOperand(0);
1062 Value
*OpOne
= Inst
->getOperand(1);
1064 Value
*NewOpZero
, *NewOpOne
;
1065 NewOpZero
= getVectorValue(Stmt
, OpZero
, VectorMap
, ScalarMaps
, L
);
1066 NewOpOne
= getVectorValue(Stmt
, OpOne
, VectorMap
, ScalarMaps
, L
);
1068 Value
*NewInst
= Builder
.CreateBinOp(Inst
->getOpcode(), NewOpZero
, NewOpOne
,
1069 Inst
->getName() + "p_vec");
1070 VectorMap
[Inst
] = NewInst
;
1073 void VectorBlockGenerator::copyStore(
1074 ScopStmt
&Stmt
, StoreInst
*Store
, ValueMapT
&VectorMap
,
1075 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1076 const MemoryAccess
&Access
= Stmt
.getArrayAccessFor(Store
);
1078 auto *Pointer
= Store
->getPointerOperand();
1079 Value
*Vector
= getVectorValue(Stmt
, Store
->getValueOperand(), VectorMap
,
1080 ScalarMaps
, getLoopForStmt(Stmt
));
1082 // Make sure we have scalar values available to access the pointer to
1083 // the data location.
1084 extractScalarValues(Store
, VectorMap
, ScalarMaps
);
1086 if (Access
.isStrideOne(isl::manage(isl_map_copy(Schedule
)))) {
1087 Type
*VectorPtrType
= getVectorPtrTy(Pointer
, getVectorWidth());
1088 Value
*NewPointer
= generateLocationAccessed(Stmt
, Store
, ScalarMaps
[0],
1089 VLTS
[0], NewAccesses
);
1092 Builder
.CreateBitCast(NewPointer
, VectorPtrType
, "vector_ptr");
1093 StoreInst
*Store
= Builder
.CreateStore(Vector
, VectorPtr
);
1096 Store
->setAlignment(8);
1098 for (unsigned i
= 0; i
< ScalarMaps
.size(); i
++) {
1099 Value
*Scalar
= Builder
.CreateExtractElement(Vector
, Builder
.getInt32(i
));
1100 Value
*NewPointer
= generateLocationAccessed(Stmt
, Store
, ScalarMaps
[i
],
1101 VLTS
[i
], NewAccesses
);
1102 Builder
.CreateStore(Scalar
, NewPointer
);
1107 bool VectorBlockGenerator::hasVectorOperands(const Instruction
*Inst
,
1108 ValueMapT
&VectorMap
) {
1109 for (Value
*Operand
: Inst
->operands())
1110 if (VectorMap
.count(Operand
))
1115 bool VectorBlockGenerator::extractScalarValues(const Instruction
*Inst
,
1116 ValueMapT
&VectorMap
,
1117 VectorValueMapT
&ScalarMaps
) {
1118 bool HasVectorOperand
= false;
1119 int VectorWidth
= getVectorWidth();
1121 for (Value
*Operand
: Inst
->operands()) {
1122 ValueMapT::iterator VecOp
= VectorMap
.find(Operand
);
1124 if (VecOp
== VectorMap
.end())
1127 HasVectorOperand
= true;
1128 Value
*NewVector
= VecOp
->second
;
1130 for (int i
= 0; i
< VectorWidth
; ++i
) {
1131 ValueMapT
&SM
= ScalarMaps
[i
];
1133 // If there is one scalar extracted, all scalar elements should have
1134 // already been extracted by the code here. So no need to check for the
1135 // existence of all of them.
1136 if (SM
.count(Operand
))
1140 Builder
.CreateExtractElement(NewVector
, Builder
.getInt32(i
));
1144 return HasVectorOperand
;
1147 void VectorBlockGenerator::copyInstScalarized(
1148 ScopStmt
&Stmt
, Instruction
*Inst
, ValueMapT
&VectorMap
,
1149 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1150 bool HasVectorOperand
;
1151 int VectorWidth
= getVectorWidth();
1153 HasVectorOperand
= extractScalarValues(Inst
, VectorMap
, ScalarMaps
);
1155 for (int VectorLane
= 0; VectorLane
< getVectorWidth(); VectorLane
++)
1156 BlockGenerator::copyInstruction(Stmt
, Inst
, ScalarMaps
[VectorLane
],
1157 VLTS
[VectorLane
], NewAccesses
);
1159 if (!VectorType::isValidElementType(Inst
->getType()) || !HasVectorOperand
)
1162 // Make the result available as vector value.
1163 VectorType
*VectorType
= VectorType::get(Inst
->getType(), VectorWidth
);
1164 Value
*Vector
= UndefValue::get(VectorType
);
1166 for (int i
= 0; i
< VectorWidth
; i
++)
1167 Vector
= Builder
.CreateInsertElement(Vector
, ScalarMaps
[i
][Inst
],
1168 Builder
.getInt32(i
));
1170 VectorMap
[Inst
] = Vector
;
1173 int VectorBlockGenerator::getVectorWidth() { return VLTS
.size(); }
1175 void VectorBlockGenerator::copyInstruction(
1176 ScopStmt
&Stmt
, Instruction
*Inst
, ValueMapT
&VectorMap
,
1177 VectorValueMapT
&ScalarMaps
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1178 // Terminator instructions control the control flow. They are explicitly
1179 // expressed in the clast and do not need to be copied.
1180 if (Inst
->isTerminator())
1183 if (canSyntheziseInStmt(Stmt
, Inst
))
1186 if (auto *Load
= dyn_cast
<LoadInst
>(Inst
)) {
1187 generateLoad(Stmt
, Load
, VectorMap
, ScalarMaps
, NewAccesses
);
1191 if (hasVectorOperands(Inst
, VectorMap
)) {
1192 if (auto *Store
= dyn_cast
<StoreInst
>(Inst
)) {
1193 // Identified as redundant by -polly-simplify.
1194 if (!Stmt
.getArrayAccessOrNULLFor(Store
))
1197 copyStore(Stmt
, Store
, VectorMap
, ScalarMaps
, NewAccesses
);
1201 if (auto *Unary
= dyn_cast
<UnaryInstruction
>(Inst
)) {
1202 copyUnaryInst(Stmt
, Unary
, VectorMap
, ScalarMaps
);
1206 if (auto *Binary
= dyn_cast
<BinaryOperator
>(Inst
)) {
1207 copyBinaryInst(Stmt
, Binary
, VectorMap
, ScalarMaps
);
1211 // Fallthrough: We generate scalar instructions, if we don't know how to
1212 // generate vector code.
1215 copyInstScalarized(Stmt
, Inst
, VectorMap
, ScalarMaps
, NewAccesses
);
1218 void VectorBlockGenerator::generateScalarVectorLoads(
1219 ScopStmt
&Stmt
, ValueMapT
&VectorBlockMap
) {
1220 for (MemoryAccess
*MA
: Stmt
) {
1221 if (MA
->isArrayKind() || MA
->isWrite())
1224 auto *Address
= getOrCreateAlloca(*MA
);
1225 Type
*VectorPtrType
= getVectorPtrTy(Address
, 1);
1226 Value
*VectorPtr
= Builder
.CreateBitCast(Address
, VectorPtrType
,
1227 Address
->getName() + "_p_vec_p");
1228 auto *Val
= Builder
.CreateLoad(VectorPtr
, Address
->getName() + ".reload");
1229 Constant
*SplatVector
= Constant::getNullValue(
1230 VectorType::get(Builder
.getInt32Ty(), getVectorWidth()));
1232 Value
*VectorVal
= Builder
.CreateShuffleVector(
1233 Val
, Val
, SplatVector
, Address
->getName() + "_p_splat");
1234 VectorBlockMap
[MA
->getAccessValue()] = VectorVal
;
1238 void VectorBlockGenerator::verifyNoScalarStores(ScopStmt
&Stmt
) {
1239 for (MemoryAccess
*MA
: Stmt
) {
1240 if (MA
->isArrayKind() || MA
->isRead())
1243 llvm_unreachable("Scalar stores not expected in vector loop");
1247 void VectorBlockGenerator::copyStmt(
1248 ScopStmt
&Stmt
, __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1249 assert(Stmt
.isBlockStmt() &&
1250 "TODO: Only block statements can be copied by the vector block "
1253 BasicBlock
*BB
= Stmt
.getBasicBlock();
1254 BasicBlock
*CopyBB
= SplitBlock(Builder
.GetInsertBlock(),
1255 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1256 CopyBB
->setName("polly.stmt." + BB
->getName());
1257 Builder
.SetInsertPoint(&CopyBB
->front());
1259 // Create two maps that store the mapping from the original instructions of
1260 // the old basic block to their copies in the new basic block. Those maps
1261 // are basic block local.
1263 // As vector code generation is supported there is one map for scalar values
1264 // and one for vector values.
1266 // In case we just do scalar code generation, the vectorMap is not used and
1267 // the scalarMap has just one dimension, which contains the mapping.
1269 // In case vector code generation is done, an instruction may either appear
1270 // in the vector map once (as it is calculating >vectorwidth< values at a
1271 // time. Or (if the values are calculated using scalar operations), it
1272 // appears once in every dimension of the scalarMap.
1273 VectorValueMapT
ScalarBlockMap(getVectorWidth());
1274 ValueMapT VectorBlockMap
;
1276 generateScalarVectorLoads(Stmt
, VectorBlockMap
);
1278 for (Instruction
&Inst
: *BB
)
1279 copyInstruction(Stmt
, &Inst
, VectorBlockMap
, ScalarBlockMap
, NewAccesses
);
1281 verifyNoScalarStores(Stmt
);
1284 BasicBlock
*RegionGenerator::repairDominance(BasicBlock
*BB
,
1285 BasicBlock
*BBCopy
) {
1287 BasicBlock
*BBIDom
= DT
.getNode(BB
)->getIDom()->getBlock();
1288 BasicBlock
*BBCopyIDom
= EndBlockMap
.lookup(BBIDom
);
1291 DT
.changeImmediateDominator(BBCopy
, BBCopyIDom
);
1293 return StartBlockMap
.lookup(BBIDom
);
1296 // This is to determine whether an llvm::Value (defined in @p BB) is usable when
1297 // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
1298 // does not work in cases where the exit block has edges from outside the
1299 // region. In that case the llvm::Value would never be usable in in the exit
1300 // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
1301 // for the subregion's exiting edges only. We need to determine whether an
1302 // llvm::Value is usable in there. We do this by checking whether it dominates
1303 // all exiting blocks individually.
1304 static bool isDominatingSubregionExit(const DominatorTree
&DT
, Region
*R
,
1306 for (auto ExitingBB
: predecessors(R
->getExit())) {
1307 // Check for non-subregion incoming edges.
1308 if (!R
->contains(ExitingBB
))
1311 if (!DT
.dominates(BB
, ExitingBB
))
1318 // Find the direct dominator of the subregion's exit block if the subregion was
1320 static BasicBlock
*findExitDominator(DominatorTree
&DT
, Region
*R
) {
1321 BasicBlock
*Common
= nullptr;
1322 for (auto ExitingBB
: predecessors(R
->getExit())) {
1323 // Check for non-subregion incoming edges.
1324 if (!R
->contains(ExitingBB
))
1327 // First exiting edge.
1333 Common
= DT
.findNearestCommonDominator(Common
, ExitingBB
);
1336 assert(Common
&& R
->contains(Common
));
1340 void RegionGenerator::copyStmt(ScopStmt
&Stmt
, LoopToScevMapT
<S
,
1341 isl_id_to_ast_expr
*IdToAstExp
) {
1342 assert(Stmt
.isRegionStmt() &&
1343 "Only region statements can be copied by the region generator");
1345 // Forget all old mappings.
1346 StartBlockMap
.clear();
1347 EndBlockMap
.clear();
1349 IncompletePHINodeMap
.clear();
1351 // Collection of all values related to this subregion.
1354 // The region represented by the statement.
1355 Region
*R
= Stmt
.getRegion();
1357 // Create a dedicated entry for the region where we can reload all demoted
1359 BasicBlock
*EntryBB
= R
->getEntry();
1360 BasicBlock
*EntryBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1361 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1362 EntryBBCopy
->setName("polly.stmt." + EntryBB
->getName() + ".entry");
1363 Builder
.SetInsertPoint(&EntryBBCopy
->front());
1365 ValueMapT
&EntryBBMap
= RegionMaps
[EntryBBCopy
];
1366 generateScalarLoads(Stmt
, LTS
, EntryBBMap
, IdToAstExp
);
1368 for (auto PI
= pred_begin(EntryBB
), PE
= pred_end(EntryBB
); PI
!= PE
; ++PI
)
1369 if (!R
->contains(*PI
)) {
1370 StartBlockMap
[*PI
] = EntryBBCopy
;
1371 EndBlockMap
[*PI
] = EntryBBCopy
;
1374 // Iterate over all blocks in the region in a breadth-first search.
1375 std::deque
<BasicBlock
*> Blocks
;
1376 SmallSetVector
<BasicBlock
*, 8> SeenBlocks
;
1377 Blocks
.push_back(EntryBB
);
1378 SeenBlocks
.insert(EntryBB
);
1380 while (!Blocks
.empty()) {
1381 BasicBlock
*BB
= Blocks
.front();
1384 // First split the block and update dominance information.
1385 BasicBlock
*BBCopy
= splitBB(BB
);
1386 BasicBlock
*BBCopyIDom
= repairDominance(BB
, BBCopy
);
1388 // Get the mapping for this block and initialize it with either the scalar
1389 // loads from the generated entering block (which dominates all blocks of
1390 // this subregion) or the maps of the immediate dominator, if part of the
1391 // subregion. The latter necessarily includes the former.
1392 ValueMapT
*InitBBMap
;
1394 assert(RegionMaps
.count(BBCopyIDom
));
1395 InitBBMap
= &RegionMaps
[BBCopyIDom
];
1397 InitBBMap
= &EntryBBMap
;
1398 auto Inserted
= RegionMaps
.insert(std::make_pair(BBCopy
, *InitBBMap
));
1399 ValueMapT
&RegionMap
= Inserted
.first
->second
;
1401 // Copy the block with the BlockGenerator.
1402 Builder
.SetInsertPoint(&BBCopy
->front());
1403 copyBB(Stmt
, BB
, BBCopy
, RegionMap
, LTS
, IdToAstExp
);
1405 // In order to remap PHI nodes we store also basic block mappings.
1406 StartBlockMap
[BB
] = BBCopy
;
1407 EndBlockMap
[BB
] = Builder
.GetInsertBlock();
1409 // Add values to incomplete PHI nodes waiting for this block to be copied.
1410 for (const PHINodePairTy
&PHINodePair
: IncompletePHINodeMap
[BB
])
1411 addOperandToPHI(Stmt
, PHINodePair
.first
, PHINodePair
.second
, BB
, LTS
);
1412 IncompletePHINodeMap
[BB
].clear();
1414 // And continue with new successors inside the region.
1415 for (auto SI
= succ_begin(BB
), SE
= succ_end(BB
); SI
!= SE
; SI
++)
1416 if (R
->contains(*SI
) && SeenBlocks
.insert(*SI
))
1417 Blocks
.push_back(*SI
);
1419 // Remember value in case it is visible after this subregion.
1420 if (isDominatingSubregionExit(DT
, R
, BB
))
1421 ValueMap
.insert(RegionMap
.begin(), RegionMap
.end());
1424 // Now create a new dedicated region exit block and add it to the region map.
1425 BasicBlock
*ExitBBCopy
= SplitBlock(Builder
.GetInsertBlock(),
1426 &*Builder
.GetInsertPoint(), &DT
, &LI
);
1427 ExitBBCopy
->setName("polly.stmt." + R
->getExit()->getName() + ".exit");
1428 StartBlockMap
[R
->getExit()] = ExitBBCopy
;
1429 EndBlockMap
[R
->getExit()] = ExitBBCopy
;
1431 BasicBlock
*ExitDomBBCopy
= EndBlockMap
.lookup(findExitDominator(DT
, R
));
1432 assert(ExitDomBBCopy
&&
1433 "Common exit dominator must be within region; at least the entry node "
1435 DT
.changeImmediateDominator(ExitBBCopy
, ExitDomBBCopy
);
1437 // As the block generator doesn't handle control flow we need to add the
1438 // region control flow by hand after all blocks have been copied.
1439 for (BasicBlock
*BB
: SeenBlocks
) {
1441 BasicBlock
*BBCopyStart
= StartBlockMap
[BB
];
1442 BasicBlock
*BBCopyEnd
= EndBlockMap
[BB
];
1443 TerminatorInst
*TI
= BB
->getTerminator();
1444 if (isa
<UnreachableInst
>(TI
)) {
1445 while (!BBCopyEnd
->empty())
1446 BBCopyEnd
->begin()->eraseFromParent();
1447 new UnreachableInst(BBCopyEnd
->getContext(), BBCopyEnd
);
1451 Instruction
*BICopy
= BBCopyEnd
->getTerminator();
1453 ValueMapT
&RegionMap
= RegionMaps
[BBCopyStart
];
1454 RegionMap
.insert(StartBlockMap
.begin(), StartBlockMap
.end());
1456 Builder
.SetInsertPoint(BICopy
);
1457 copyInstScalar(Stmt
, TI
, RegionMap
, LTS
);
1458 BICopy
->eraseFromParent();
1461 // Add counting PHI nodes to all loops in the region that can be used as
1462 // replacement for SCEVs referring to the old loop.
1463 for (BasicBlock
*BB
: SeenBlocks
) {
1464 Loop
*L
= LI
.getLoopFor(BB
);
1465 if (L
== nullptr || L
->getHeader() != BB
|| !R
->contains(L
))
1468 BasicBlock
*BBCopy
= StartBlockMap
[BB
];
1469 Value
*NullVal
= Builder
.getInt32(0);
1471 PHINode::Create(Builder
.getInt32Ty(), 2, "polly.subregion.iv");
1472 Instruction
*LoopPHIInc
= BinaryOperator::CreateAdd(
1473 LoopPHI
, Builder
.getInt32(1), "polly.subregion.iv.inc");
1474 LoopPHI
->insertBefore(&BBCopy
->front());
1475 LoopPHIInc
->insertBefore(BBCopy
->getTerminator());
1477 for (auto *PredBB
: make_range(pred_begin(BB
), pred_end(BB
))) {
1478 if (!R
->contains(PredBB
))
1480 if (L
->contains(PredBB
))
1481 LoopPHI
->addIncoming(LoopPHIInc
, EndBlockMap
[PredBB
]);
1483 LoopPHI
->addIncoming(NullVal
, EndBlockMap
[PredBB
]);
1486 for (auto *PredBBCopy
: make_range(pred_begin(BBCopy
), pred_end(BBCopy
)))
1487 if (LoopPHI
->getBasicBlockIndex(PredBBCopy
) < 0)
1488 LoopPHI
->addIncoming(NullVal
, PredBBCopy
);
1490 LTS
[L
] = SE
.getUnknown(LoopPHI
);
1493 // Continue generating code in the exit block.
1494 Builder
.SetInsertPoint(&*ExitBBCopy
->getFirstInsertionPt());
1496 // Write values visible to other statements.
1497 generateScalarStores(Stmt
, LTS
, ValueMap
, IdToAstExp
);
1498 StartBlockMap
.clear();
1499 EndBlockMap
.clear();
1501 IncompletePHINodeMap
.clear();
1504 PHINode
*RegionGenerator::buildExitPHI(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1505 ValueMapT
&BBMap
, Loop
*L
) {
1506 ScopStmt
*Stmt
= MA
->getStatement();
1507 Region
*SubR
= Stmt
->getRegion();
1508 auto Incoming
= MA
->getIncoming();
1510 PollyIRBuilder::InsertPointGuard
IPGuard(Builder
);
1511 PHINode
*OrigPHI
= cast
<PHINode
>(MA
->getAccessInstruction());
1512 BasicBlock
*NewSubregionExit
= Builder
.GetInsertBlock();
1514 // This can happen if the subregion is simplified after the ScopStmts
1515 // have been created; simplification happens as part of CodeGeneration.
1516 if (OrigPHI
->getParent() != SubR
->getExit()) {
1517 BasicBlock
*FormerExit
= SubR
->getExitingBlock();
1519 NewSubregionExit
= StartBlockMap
.lookup(FormerExit
);
1522 PHINode
*NewPHI
= PHINode::Create(OrigPHI
->getType(), Incoming
.size(),
1523 "polly." + OrigPHI
->getName(),
1524 NewSubregionExit
->getFirstNonPHI());
1526 // Add the incoming values to the PHI.
1527 for (auto &Pair
: Incoming
) {
1528 BasicBlock
*OrigIncomingBlock
= Pair
.first
;
1529 BasicBlock
*NewIncomingBlockStart
= StartBlockMap
.lookup(OrigIncomingBlock
);
1530 BasicBlock
*NewIncomingBlockEnd
= EndBlockMap
.lookup(OrigIncomingBlock
);
1531 Builder
.SetInsertPoint(NewIncomingBlockEnd
->getTerminator());
1532 assert(RegionMaps
.count(NewIncomingBlockStart
));
1533 assert(RegionMaps
.count(NewIncomingBlockEnd
));
1534 ValueMapT
*LocalBBMap
= &RegionMaps
[NewIncomingBlockStart
];
1536 Value
*OrigIncomingValue
= Pair
.second
;
1537 Value
*NewIncomingValue
=
1538 getNewValue(*Stmt
, OrigIncomingValue
, *LocalBBMap
, LTS
, L
);
1539 NewPHI
->addIncoming(NewIncomingValue
, NewIncomingBlockEnd
);
1545 Value
*RegionGenerator::getExitScalar(MemoryAccess
*MA
, LoopToScevMapT
<S
,
1547 ScopStmt
*Stmt
= MA
->getStatement();
1549 // TODO: Add some test cases that ensure this is really the right choice.
1550 Loop
*L
= LI
.getLoopFor(Stmt
->getRegion()->getExit());
1552 if (MA
->isAnyPHIKind()) {
1553 auto Incoming
= MA
->getIncoming();
1554 assert(!Incoming
.empty() &&
1555 "PHI WRITEs must have originate from at least one incoming block");
1557 // If there is only one incoming value, we do not need to create a PHI.
1558 if (Incoming
.size() == 1) {
1559 Value
*OldVal
= Incoming
[0].second
;
1560 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1563 return buildExitPHI(MA
, LTS
, BBMap
, L
);
1566 // MemoryKind::Value accesses leaving the subregion must dominate the exit
1567 // block; just pass the copied value.
1568 Value
*OldVal
= MA
->getAccessValue();
1569 return getNewValue(*Stmt
, OldVal
, BBMap
, LTS
, L
);
1572 void RegionGenerator::generateScalarStores(
1573 ScopStmt
&Stmt
, LoopToScevMapT
<S
, ValueMapT
&BBMap
,
1574 __isl_keep isl_id_to_ast_expr
*NewAccesses
) {
1575 assert(Stmt
.getRegion() &&
1576 "Block statements need to use the generateScalarStores() "
1577 "function in the BlockGenerator");
1579 for (MemoryAccess
*MA
: Stmt
) {
1580 if (MA
->isOriginalArrayKind() || MA
->isRead())
1583 isl::set AccDom
= MA
->getAccessRelation().domain();
1584 std::string Subject
= MA
->getId().get_name();
1585 generateConditionalExecution(
1586 Stmt
, AccDom
, Subject
.c_str(), [&, this, MA
]() {
1588 Value
*NewVal
= getExitScalar(MA
, LTS
, BBMap
);
1589 Value
*Address
= getImplicitAddress(*MA
, getLoopForStmt(Stmt
), LTS
,
1590 BBMap
, NewAccesses
);
1591 assert((!isa
<Instruction
>(NewVal
) ||
1592 DT
.dominates(cast
<Instruction
>(NewVal
)->getParent(),
1593 Builder
.GetInsertBlock())) &&
1594 "Domination violation");
1595 assert((!isa
<Instruction
>(Address
) ||
1596 DT
.dominates(cast
<Instruction
>(Address
)->getParent(),
1597 Builder
.GetInsertBlock())) &&
1598 "Domination violation");
1599 Builder
.CreateStore(NewVal
, Address
);
1604 void RegionGenerator::addOperandToPHI(ScopStmt
&Stmt
, PHINode
*PHI
,
1605 PHINode
*PHICopy
, BasicBlock
*IncomingBB
,
1606 LoopToScevMapT
<S
) {
1607 // If the incoming block was not yet copied mark this PHI as incomplete.
1608 // Once the block will be copied the incoming value will be added.
1609 BasicBlock
*BBCopyStart
= StartBlockMap
[IncomingBB
];
1610 BasicBlock
*BBCopyEnd
= EndBlockMap
[IncomingBB
];
1613 assert(Stmt
.represents(IncomingBB
) &&
1614 "Bad incoming block for PHI in non-affine region");
1615 IncompletePHINodeMap
[IncomingBB
].push_back(std::make_pair(PHI
, PHICopy
));
1619 assert(RegionMaps
.count(BBCopyStart
) &&
1620 "Incoming PHI block did not have a BBMap");
1621 ValueMapT
&BBCopyMap
= RegionMaps
[BBCopyStart
];
1623 Value
*OpCopy
= nullptr;
1625 if (Stmt
.represents(IncomingBB
)) {
1626 Value
*Op
= PHI
->getIncomingValueForBlock(IncomingBB
);
1628 // If the current insert block is different from the PHIs incoming block
1629 // change it, otherwise do not.
1630 auto IP
= Builder
.GetInsertPoint();
1631 if (IP
->getParent() != BBCopyEnd
)
1632 Builder
.SetInsertPoint(BBCopyEnd
->getTerminator());
1633 OpCopy
= getNewValue(Stmt
, Op
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1634 if (IP
->getParent() != BBCopyEnd
)
1635 Builder
.SetInsertPoint(&*IP
);
1637 // All edges from outside the non-affine region become a single edge
1638 // in the new copy of the non-affine region. Make sure to only add the
1639 // corresponding edge the first time we encounter a basic block from
1640 // outside the non-affine region.
1641 if (PHICopy
->getBasicBlockIndex(BBCopyEnd
) >= 0)
1644 // Get the reloaded value.
1645 OpCopy
= getNewValue(Stmt
, PHI
, BBCopyMap
, LTS
, getLoopForStmt(Stmt
));
1648 assert(OpCopy
&& "Incoming PHI value was not copied properly");
1649 PHICopy
->addIncoming(OpCopy
, BBCopyEnd
);
1652 void RegionGenerator::copyPHIInstruction(ScopStmt
&Stmt
, PHINode
*PHI
,
1654 LoopToScevMapT
<S
) {
1655 unsigned NumIncoming
= PHI
->getNumIncomingValues();
1657 Builder
.CreatePHI(PHI
->getType(), NumIncoming
, "polly." + PHI
->getName());
1658 PHICopy
->moveBefore(PHICopy
->getParent()->getFirstNonPHI());
1659 BBMap
[PHI
] = PHICopy
;
1661 for (BasicBlock
*IncomingBB
: PHI
->blocks())
1662 addOperandToPHI(Stmt
, PHI
, PHICopy
, IncomingBB
, LTS
);