1 //===- ScopBuilder.cpp ----------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopDetection.h"
20 #include "polly/ScopDetectionDiagnostic.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "polly/Support/VirtualInstruction.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/EquivalenceClasses.h"
30 #include "llvm/ADT/SetVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/RegionIterator.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/IntrinsicInst.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Use.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CommandLine.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
67 using namespace polly
;
69 #define DEBUG_TYPE "polly-scops"
71 STATISTIC(ScopFound
, "Number of valid Scops");
72 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
73 STATISTIC(InfeasibleScops
,
74 "Number of SCoPs with statically infeasible context.");
76 bool polly::ModelReadOnlyScalars
;
78 static cl::opt
<bool, true> XModelReadOnlyScalars(
79 "polly-analyze-read-only-scalars",
80 cl::desc("Model read-only scalar values in the scop description"),
81 cl::location(ModelReadOnlyScalars
), cl::Hidden
, cl::ZeroOrMore
,
82 cl::init(true), cl::cat(PollyCategory
));
84 static cl::opt
<bool> UnprofitableScalarAccs(
85 "polly-unprofitable-scalar-accs",
86 cl::desc("Count statements with scalar accesses as not optimizable"),
87 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
89 static cl::opt
<bool> DetectFortranArrays(
90 "polly-detect-fortran-arrays",
91 cl::desc("Detect Fortran arrays and use this for code generation"),
92 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
94 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
95 cl::desc("Detect and exploit reductions"),
96 cl::Hidden
, cl::ZeroOrMore
,
97 cl::init(true), cl::cat(PollyCategory
));
99 // Multiplicative reductions can be disabled separately as these kind of
100 // operations can overflow easily. Additive reductions and bit operations
101 // are in contrast pretty stable.
102 static cl::opt
<bool> DisableMultiplicativeReductions(
103 "polly-disable-multiplicative-reductions",
104 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
105 cl::init(false), cl::cat(PollyCategory
));
107 enum class GranularityChoice
{ BasicBlocks
, ScalarIndependence
, Stores
};
109 static cl::opt
<GranularityChoice
> StmtGranularity(
110 "polly-stmt-granularity",
112 "Algorithm to use for splitting basic blocks into multiple statements"),
113 cl::values(clEnumValN(GranularityChoice::BasicBlocks
, "bb",
114 "One statement per basic block"),
115 clEnumValN(GranularityChoice::ScalarIndependence
, "scalar-indep",
116 "Scalar independence heuristic"),
117 clEnumValN(GranularityChoice::Stores
, "store",
118 "Store-level granularity")),
119 cl::init(GranularityChoice::ScalarIndependence
), cl::cat(PollyCategory
));
121 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
122 Region
*NonAffineSubRegion
,
124 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
125 // true, are not modeled as ordinary PHI nodes as they are not part of the
126 // region. However, we model the operands in the predecessor blocks that are
127 // part of the region as regular scalar accesses.
129 // If we can synthesize a PHI we can skip it, however only if it is in
130 // the region. If it is not it can only be in the exit block of the region.
131 // In this case we model the operands but not the PHI itself.
132 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
133 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
136 // PHI nodes are modeled as if they had been demoted prior to the SCoP
137 // detection. Hence, the PHI is a load of a new memory location in which the
138 // incoming value was written at the end of the incoming basic block.
139 bool OnlyNonAffineSubRegionOperands
= true;
140 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
141 Value
*Op
= PHI
->getIncomingValue(u
);
142 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
143 ScopStmt
*OpStmt
= scop
->getIncomingStmtFor(PHI
->getOperandUse(u
));
145 // Do not build PHI dependences inside a non-affine subregion, but make
146 // sure that the necessary scalar values are still made available.
147 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
148 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
149 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
150 ensureValueRead(Op
, OpStmt
);
154 OnlyNonAffineSubRegionOperands
= false;
155 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
158 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
159 addPHIReadAccess(PHIStmt
, PHI
);
163 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
165 assert(!isa
<PHINode
>(Inst
));
167 // Pull-in required operands.
168 for (Use
&Op
: Inst
->operands())
169 ensureValueRead(Op
.get(), UserStmt
);
172 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
173 // Check for uses of this instruction outside the scop. Because we do not
174 // iterate over such instructions and therefore did not "ensure" the existence
175 // of a write, we must determine such use here.
176 if (scop
->isEscaping(Inst
))
177 ensureValueWrite(Inst
);
180 /// Check that a value is a Fortran Array descriptor.
182 /// We check if V has the following structure:
183 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
184 /// [<num> x %struct.descriptor_dimension] }
187 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
189 /// 1. V's type name starts with "struct.array"
190 /// 2. V's type has layout as shown.
191 /// 3. Final member of V's type has name "struct.descriptor_dimension",
192 /// 4. "struct.descriptor_dimension" has layout as shown.
193 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
195 /// We are interested in such types since this is the code that dragonegg
196 /// generates for Fortran array descriptors.
198 /// @param V the Value to be checked.
200 /// @returns True if V is a Fortran array descriptor, False otherwise.
201 bool isFortranArrayDescriptor(Value
*V
) {
202 PointerType
*PTy
= dyn_cast
<PointerType
>(V
->getType());
207 Type
*Ty
= PTy
->getElementType();
208 assert(Ty
&& "Ty expected to be initialized");
209 auto *StructArrTy
= dyn_cast
<StructType
>(Ty
);
211 if (!(StructArrTy
&& StructArrTy
->hasName()))
214 if (!StructArrTy
->getName().startswith("struct.array"))
217 if (StructArrTy
->getNumElements() != 4)
220 const ArrayRef
<Type
*> ArrMemberTys
= StructArrTy
->elements();
223 if (ArrMemberTys
[0] != Type::getInt8PtrTy(V
->getContext()))
226 // Get a reference to the int type and check that all the members
227 // share the same int type
228 Type
*IntTy
= ArrMemberTys
[1];
229 if (ArrMemberTys
[2] != IntTy
)
232 // type: [<num> x %struct.descriptor_dimension]
233 ArrayType
*DescriptorDimArrayTy
= dyn_cast
<ArrayType
>(ArrMemberTys
[3]);
234 if (!DescriptorDimArrayTy
)
237 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
238 StructType
*DescriptorDimTy
=
239 dyn_cast
<StructType
>(DescriptorDimArrayTy
->getElementType());
241 if (!(DescriptorDimTy
&& DescriptorDimTy
->hasName()))
244 if (DescriptorDimTy
->getName() != "struct.descriptor_dimension")
247 if (DescriptorDimTy
->getNumElements() != 3)
250 for (auto MemberTy
: DescriptorDimTy
->elements()) {
251 if (MemberTy
!= IntTy
)
258 Value
*ScopBuilder::findFADAllocationVisible(MemAccInst Inst
) {
259 // match: 4.1 & 4.2 store/load
260 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
264 if (Inst
.getAlignment() != 8)
267 Value
*Address
= Inst
.getPointerOperand();
269 const BitCastInst
*Bitcast
= nullptr;
271 if (auto *Slot
= dyn_cast
<GetElementPtrInst
>(Address
)) {
272 Value
*TypedMem
= Slot
->getPointerOperand();
274 Bitcast
= dyn_cast
<BitCastInst
>(TypedMem
);
277 Bitcast
= dyn_cast
<BitCastInst
>(Address
);
283 auto *MallocMem
= Bitcast
->getOperand(0);
286 auto *MallocCall
= dyn_cast
<CallInst
>(MallocMem
);
290 Function
*MallocFn
= MallocCall
->getCalledFunction();
291 if (!(MallocFn
&& MallocFn
->hasName() && MallocFn
->getName() == "malloc"))
294 // Find all uses the malloc'd memory.
295 // We are looking for a "store" into a struct with the type being the Fortran
297 for (auto user
: MallocMem
->users()) {
299 auto *MallocStore
= dyn_cast
<StoreInst
>(user
);
303 auto *DescriptorGEP
=
304 dyn_cast
<GEPOperator
>(MallocStore
->getPointerOperand());
309 auto DescriptorType
=
310 dyn_cast
<StructType
>(DescriptorGEP
->getSourceElementType());
311 if (!(DescriptorType
&& DescriptorType
->hasName()))
314 Value
*Descriptor
= dyn_cast
<Value
>(DescriptorGEP
->getPointerOperand());
319 if (!isFortranArrayDescriptor(Descriptor
))
328 Value
*ScopBuilder::findFADAllocationInvisible(MemAccInst Inst
) {
330 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
333 Value
*Slot
= Inst
.getPointerOperand();
335 LoadInst
*MemLoad
= nullptr;
337 if (auto *SlotGEP
= dyn_cast
<GetElementPtrInst
>(Slot
)) {
339 MemLoad
= dyn_cast
<LoadInst
>(SlotGEP
->getPointerOperand());
342 MemLoad
= dyn_cast
<LoadInst
>(Slot
);
348 auto *BitcastOperator
=
349 dyn_cast
<BitCastOperator
>(MemLoad
->getPointerOperand());
350 if (!BitcastOperator
)
353 Value
*Descriptor
= dyn_cast
<Value
>(BitcastOperator
->getOperand(0));
357 if (!isFortranArrayDescriptor(Descriptor
))
363 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
364 Value
*Val
= Inst
.getValueOperand();
365 Type
*ElementType
= Val
->getType();
366 Value
*Address
= Inst
.getPointerOperand();
367 const SCEV
*AccessFunction
=
368 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
369 const SCEVUnknown
*BasePointer
=
370 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
371 enum MemoryAccess::AccessType AccType
=
372 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
374 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
)) {
375 auto *Src
= BitCast
->getOperand(0);
376 auto *SrcTy
= Src
->getType();
377 auto *DstTy
= BitCast
->getType();
378 // Do not try to delinearize non-sized (opaque) pointers.
379 if ((SrcTy
->isPointerTy() && !SrcTy
->getPointerElementType()->isSized()) ||
380 (DstTy
->isPointerTy() && !DstTy
->getPointerElementType()->isSized())) {
383 if (SrcTy
->isPointerTy() && DstTy
->isPointerTy() &&
384 DL
.getTypeAllocSize(SrcTy
->getPointerElementType()) ==
385 DL
.getTypeAllocSize(DstTy
->getPointerElementType()))
389 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
393 std::vector
<const SCEV
*> Subscripts
;
394 std::vector
<int> Sizes
;
395 std::tie(Subscripts
, Sizes
) = getIndexExpressionsFromGEP(GEP
, SE
);
396 auto *BasePtr
= GEP
->getOperand(0);
398 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
399 BasePtr
= BasePtrCast
->getOperand(0);
401 // Check for identical base pointers to ensure that we do not miss index
402 // offsets that have been added before this GEP is applied.
403 if (BasePtr
!= BasePointer
->getValue())
406 std::vector
<const SCEV
*> SizesSCEV
;
408 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
410 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
411 for (auto *Subscript
: Subscripts
) {
412 InvariantLoadsSetTy AccessILS
;
413 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
417 for (LoadInst
*LInst
: AccessILS
)
418 if (!ScopRIL
.count(LInst
))
425 SizesSCEV
.push_back(nullptr);
428 SizesSCEV
.push_back(SE
.getSCEV(
429 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
431 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
432 true, Subscripts
, SizesSCEV
, Val
);
436 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
437 if (!PollyDelinearize
)
440 Value
*Address
= Inst
.getPointerOperand();
441 Value
*Val
= Inst
.getValueOperand();
442 Type
*ElementType
= Val
->getType();
443 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
444 enum MemoryAccess::AccessType AccType
=
445 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
447 const SCEV
*AccessFunction
=
448 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
449 const SCEVUnknown
*BasePointer
=
450 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
452 assert(BasePointer
&& "Could not find base pointer");
454 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
455 auto AccItr
= InsnToMemAcc
.find(Inst
);
456 if (AccItr
== InsnToMemAcc
.end())
459 std::vector
<const SCEV
*> Sizes
= {nullptr};
461 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
462 AccItr
->second
.Shape
->DelinearizedSizes
.end());
464 // In case only the element size is contained in the 'Sizes' array, the
465 // access does not access a real multi-dimensional array. Hence, we allow
466 // the normal single-dimensional access construction to handle this.
467 if (Sizes
.size() == 1)
470 // Remove the element size. This information is already provided by the
471 // ElementSize parameter. In case the element size of this access and the
472 // element size used for delinearization differs the delinearization is
473 // incorrect. Hence, we invalidate the scop.
475 // TODO: Handle delinearization with differing element sizes.
476 auto DelinearizedSize
=
477 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
479 if (ElementSize
!= DelinearizedSize
)
480 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
482 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
483 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
487 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
488 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
490 if (MemIntr
== nullptr)
493 auto *L
= LI
.getLoopFor(Inst
->getParent());
494 auto *LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
497 // Check if the length val is actually affine or if we overapproximate it
498 InvariantLoadsSetTy AccessILS
;
499 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
501 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
502 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
503 LengthVal
, SE
, &AccessILS
);
504 for (LoadInst
*LInst
: AccessILS
)
505 if (!ScopRIL
.count(LInst
))
506 LengthIsAffine
= false;
510 auto *DestPtrVal
= MemIntr
->getDest();
513 auto *DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
515 // Ignore accesses to "NULL".
516 // TODO: We could use this to optimize the region further, e.g., intersect
518 // isl_set_complement(isl_set_params(getDomain()))
519 // as we know it would be undefined to execute this instruction anyway.
520 if (DestAccFunc
->isZero())
523 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
525 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
526 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
527 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
528 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
529 Inst
.getValueOperand());
531 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
535 auto *SrcPtrVal
= MemTrans
->getSource();
538 auto *SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
540 // Ignore accesses to "NULL".
541 // TODO: See above TODO
542 if (SrcAccFunc
->isZero())
545 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
547 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
548 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
549 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
550 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
551 Inst
.getValueOperand());
556 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
557 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
562 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
) || isDebugCall(CI
))
565 bool ReadOnly
= false;
566 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
567 auto *CalledFunction
= CI
->getCalledFunction();
568 switch (AA
.getModRefBehavior(CalledFunction
)) {
569 case FMRB_UnknownModRefBehavior
:
570 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
571 case FMRB_DoesNotAccessMemory
:
573 case FMRB_DoesNotReadMemory
:
574 case FMRB_OnlyAccessesInaccessibleMem
:
575 case FMRB_OnlyAccessesInaccessibleOrArgMem
:
577 case FMRB_OnlyReadsMemory
:
578 GlobalReads
.emplace_back(Stmt
, CI
);
580 case FMRB_OnlyReadsArgumentPointees
:
583 case FMRB_OnlyAccessesArgumentPointees
: {
584 auto AccType
= ReadOnly
? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
585 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
586 for (const auto &Arg
: CI
->arg_operands()) {
587 if (!Arg
->getType()->isPointerTy())
590 auto *ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
591 if (ArgSCEV
->isZero())
594 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
595 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
596 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
605 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
606 Value
*Address
= Inst
.getPointerOperand();
607 Value
*Val
= Inst
.getValueOperand();
608 Type
*ElementType
= Val
->getType();
609 enum MemoryAccess::AccessType AccType
=
610 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
612 const SCEV
*AccessFunction
=
613 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
614 const SCEVUnknown
*BasePointer
=
615 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
617 assert(BasePointer
&& "Could not find base pointer");
618 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
620 // Check if the access depends on a loop contained in a non-affine subregion.
621 bool isVariantInNonAffineLoop
= false;
622 SetVector
<const Loop
*> Loops
;
623 findLoops(AccessFunction
, Loops
);
624 for (const Loop
*L
: Loops
)
625 if (Stmt
->contains(L
)) {
626 isVariantInNonAffineLoop
= true;
630 InvariantLoadsSetTy AccessILS
;
632 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
633 bool IsAffine
= !isVariantInNonAffineLoop
&&
634 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
635 AccessFunction
, SE
, &AccessILS
);
637 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
638 for (LoadInst
*LInst
: AccessILS
)
639 if (!ScopRIL
.count(LInst
))
642 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
643 AccType
= MemoryAccess::MAY_WRITE
;
645 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
646 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
649 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
650 if (buildAccessMemIntrinsic(Inst
, Stmt
))
653 if (buildAccessCallInst(Inst
, Stmt
))
656 if (buildAccessMultiDimFixed(Inst
, Stmt
))
659 if (buildAccessMultiDimParam(Inst
, Stmt
))
662 buildAccessSingleDim(Inst
, Stmt
);
665 void ScopBuilder::buildAccessFunctions() {
666 for (auto &Stmt
: *scop
) {
667 if (Stmt
.isBlockStmt()) {
668 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
672 Region
*R
= Stmt
.getRegion();
673 for (BasicBlock
*BB
: R
->blocks())
674 buildAccessFunctions(&Stmt
, *BB
, R
);
677 // Build write accesses for values that are used after the SCoP.
678 // The instructions defining them might be synthesizable and therefore not
679 // contained in any statement, hence we iterate over the original instructions
680 // to identify all escaping values.
681 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
682 for (Instruction
&Inst
: *BB
)
683 buildEscapingDependences(&Inst
);
687 bool ScopBuilder::shouldModelInst(Instruction
*Inst
, Loop
*L
) {
688 return !isa
<TerminatorInst
>(Inst
) && !isIgnoredIntrinsic(Inst
) &&
689 !canSynthesize(Inst
, *scop
, &SE
, L
);
692 /// Generate a name for a statement.
694 /// @param BB The basic block the statement will represent.
695 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
696 /// @param Count The index of the created statement in @p BB.
697 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
698 /// no suffix will be added.
699 /// @param IsLast Uses a special indicator for the last statement of a BB.
700 static std::string
makeStmtName(BasicBlock
*BB
, long BBIdx
, int Count
,
701 bool IsMain
, bool IsLast
= false) {
704 if (UseInstructionNames
)
709 Suffix
+= 'a' + Count
;
711 Suffix
+= std::to_string(Count
);
713 return getIslCompatibleName("Stmt", BB
, BBIdx
, Suffix
, UseInstructionNames
);
716 /// Generate a name for a statement that represents a non-affine subregion.
718 /// @param R The region the statement will represent.
719 /// @param RIdx The index of the @p R relative to other BBs/regions.
720 static std::string
makeStmtName(Region
*R
, long RIdx
) {
721 return getIslCompatibleName("Stmt", R
->getNameStr(), RIdx
, "",
722 UseInstructionNames
);
725 void ScopBuilder::buildSequentialBlockStmts(BasicBlock
*BB
, bool SplitOnStore
) {
726 Loop
*SurroundingLoop
= LI
.getLoopFor(BB
);
729 long BBIdx
= scop
->getNextStmtIdx();
730 std::vector
<Instruction
*> Instructions
;
731 for (Instruction
&Inst
: *BB
) {
732 if (shouldModelInst(&Inst
, SurroundingLoop
))
733 Instructions
.push_back(&Inst
);
734 if (Inst
.getMetadata("polly_split_after") ||
735 (SplitOnStore
&& isa
<StoreInst
>(Inst
))) {
736 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
737 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
739 Instructions
.clear();
743 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
744 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
747 /// Is @p Inst an ordered instruction?
749 /// An unordered instruction is an instruction, such that a sequence of
750 /// unordered instructions can be permuted without changing semantics. Any
751 /// instruction for which this is not always the case is ordered.
752 static bool isOrderedInstruction(Instruction
*Inst
) {
753 return Inst
->mayHaveSideEffects() || Inst
->mayReadOrWriteMemory();
756 /// Join instructions to the same statement if one uses the scalar result of the
758 static void joinOperandTree(EquivalenceClasses
<Instruction
*> &UnionFind
,
759 ArrayRef
<Instruction
*> ModeledInsts
) {
760 for (Instruction
*Inst
: ModeledInsts
) {
761 if (isa
<PHINode
>(Inst
))
764 for (Use
&Op
: Inst
->operands()) {
765 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
.get());
769 // Check if OpInst is in the BB and is a modeled instruction.
770 auto OpVal
= UnionFind
.findValue(OpInst
);
771 if (OpVal
== UnionFind
.end())
774 UnionFind
.unionSets(Inst
, OpInst
);
779 /// Ensure that the order of ordered instructions does not change.
781 /// If we encounter an ordered instruction enclosed in instructions belonging to
782 /// a different statement (which might as well contain ordered instructions, but
783 /// this is not tested here), join them.
785 joinOrderedInstructions(EquivalenceClasses
<Instruction
*> &UnionFind
,
786 ArrayRef
<Instruction
*> ModeledInsts
) {
787 SetVector
<Instruction
*> SeenLeaders
;
788 for (Instruction
*Inst
: ModeledInsts
) {
789 if (!isOrderedInstruction(Inst
))
792 Instruction
*Leader
= UnionFind
.getLeaderValue(Inst
);
793 bool Inserted
= SeenLeaders
.insert(Leader
);
797 // Merge statements to close holes. Say, we have already seen statements A
798 // and B, in this order. Then we see an instruction of A again and we would
799 // see the pattern "A B A". This function joins all statements until the
800 // only seen occurrence of A.
801 for (Instruction
*Prev
: reverse(SeenLeaders
)) {
802 // Items added to 'SeenLeaders' are leaders, but may have lost their
803 // leadership status when merged into another statement.
804 Instruction
*PrevLeader
= UnionFind
.getLeaderValue(SeenLeaders
.back());
805 if (PrevLeader
== Leader
)
807 UnionFind
.unionSets(Prev
, Leader
);
812 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
813 /// the incoming values from this block are executed after the PHI READ.
815 /// Otherwise it could overwrite the incoming value from before the BB with the
816 /// value for the next execution. This can happen if the PHI WRITE is added to
817 /// the statement with the instruction that defines the incoming value (instead
818 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
819 /// are in order, we put both into the statement. PHI WRITEs are always executed
820 /// after PHI READs when they are in the same statement.
822 /// TODO: This is an overpessimization. We only have to ensure that the PHI
823 /// WRITE is not put into a statement containing the PHI itself. That could also
825 /// - having all (strongly connected) PHIs in a single statement,
826 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
827 /// has a chance of being lifted before a PHI by being in a statement with a
828 /// PHI that comes before in the basic block), or
829 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
830 static void joinOrderedPHIs(EquivalenceClasses
<Instruction
*> &UnionFind
,
831 ArrayRef
<Instruction
*> ModeledInsts
) {
832 for (Instruction
*Inst
: ModeledInsts
) {
833 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
837 int Idx
= PHI
->getBasicBlockIndex(PHI
->getParent());
841 Instruction
*IncomingVal
=
842 dyn_cast
<Instruction
>(PHI
->getIncomingValue(Idx
));
846 UnionFind
.unionSets(PHI
, IncomingVal
);
850 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock
*BB
) {
851 Loop
*L
= LI
.getLoopFor(BB
);
853 // Extracting out modeled instructions saves us from checking
854 // shouldModelInst() repeatedly.
855 SmallVector
<Instruction
*, 32> ModeledInsts
;
856 EquivalenceClasses
<Instruction
*> UnionFind
;
857 Instruction
*MainInst
= nullptr;
858 for (Instruction
&Inst
: *BB
) {
859 if (!shouldModelInst(&Inst
, L
))
861 ModeledInsts
.push_back(&Inst
);
862 UnionFind
.insert(&Inst
);
864 // When a BB is split into multiple statements, the main statement is the
865 // one containing the 'main' instruction. We select the first instruction
866 // that is unlikely to be removed (because it has side-effects) as the main
867 // one. It is used to ensure that at least one statement from the bb has the
868 // same name as with -polly-stmt-granularity=bb.
869 if (!MainInst
&& (isa
<StoreInst
>(Inst
) ||
870 (isa
<CallInst
>(Inst
) && !isa
<IntrinsicInst
>(Inst
))))
874 joinOperandTree(UnionFind
, ModeledInsts
);
875 joinOrderedInstructions(UnionFind
, ModeledInsts
);
876 joinOrderedPHIs(UnionFind
, ModeledInsts
);
878 // The list of instructions for statement (statement represented by the leader
879 // instruction). The order of statements instructions is reversed such that
880 // the epilogue is first. This makes it easier to ensure that the epilogue is
881 // the last statement.
882 MapVector
<Instruction
*, std::vector
<Instruction
*>> LeaderToInstList
;
884 // Collect the instructions of all leaders. UnionFind's member iterator
885 // unfortunately are not in any specific order.
886 for (Instruction
&Inst
: reverse(*BB
)) {
887 auto LeaderIt
= UnionFind
.findLeader(&Inst
);
888 if (LeaderIt
== UnionFind
.member_end())
891 std::vector
<Instruction
*> &InstList
= LeaderToInstList
[*LeaderIt
];
892 InstList
.push_back(&Inst
);
895 // Finally build the statements.
897 long BBIdx
= scop
->getNextStmtIdx();
898 bool MainFound
= false;
899 for (auto &Instructions
: reverse(LeaderToInstList
)) {
900 std::vector
<Instruction
*> &InstList
= Instructions
.second
;
902 // If there is no main instruction, make the first statement the main.
905 IsMain
= std::find(InstList
.begin(), InstList
.end(), MainInst
) !=
908 IsMain
= (Count
== 0);
912 std::reverse(InstList
.begin(), InstList
.end());
913 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, IsMain
);
914 scop
->addScopStmt(BB
, Name
, L
, std::move(InstList
));
918 // Unconditionally add an epilogue (last statement). It contains no
919 // instructions, but holds the PHI write accesses for successor basic blocks,
920 // if the incoming value is not defined in another statement if the same BB.
921 // The epilogue will be removed if no PHIWrite is added to it.
922 std::string EpilogueName
= makeStmtName(BB
, BBIdx
, Count
, !MainFound
, true);
923 scop
->addScopStmt(BB
, EpilogueName
, L
, {});
926 void ScopBuilder::buildStmts(Region
&SR
) {
927 if (scop
->isNonAffineSubRegion(&SR
)) {
928 std::vector
<Instruction
*> Instructions
;
929 Loop
*SurroundingLoop
=
930 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
931 for (Instruction
&Inst
: *SR
.getEntry())
932 if (shouldModelInst(&Inst
, SurroundingLoop
))
933 Instructions
.push_back(&Inst
);
934 long RIdx
= scop
->getNextStmtIdx();
935 std::string Name
= makeStmtName(&SR
, RIdx
);
936 scop
->addScopStmt(&SR
, Name
, SurroundingLoop
, Instructions
);
940 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
941 if (I
->isSubRegion())
942 buildStmts(*I
->getNodeAs
<Region
>());
944 BasicBlock
*BB
= I
->getNodeAs
<BasicBlock
>();
945 switch (StmtGranularity
) {
946 case GranularityChoice::BasicBlocks
:
947 buildSequentialBlockStmts(BB
);
949 case GranularityChoice::ScalarIndependence
:
950 buildEqivClassBlockStmts(BB
);
952 case GranularityChoice::Stores
:
953 buildSequentialBlockStmts(BB
, true);
959 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
960 Region
*NonAffineSubRegion
) {
963 "The exit BB is the only one that cannot be represented by a statement");
964 assert(Stmt
->represents(&BB
));
966 // We do not build access functions for error blocks, as they may contain
967 // instructions we can not model.
968 if (isErrorBlock(BB
, scop
->getRegion(), LI
, DT
))
971 auto BuildAccessesForInst
= [this, Stmt
,
972 NonAffineSubRegion
](Instruction
*Inst
) {
973 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
975 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, false);
977 if (auto MemInst
= MemAccInst::dyn_cast(*Inst
)) {
978 assert(Stmt
&& "Cannot build access function in non-existing statement");
979 buildMemoryAccess(MemInst
, Stmt
);
982 // PHI nodes have already been modeled above and TerminatorInsts that are
983 // not part of a non-affine subregion are fully modeled and regenerated
984 // from the polyhedral domains. Hence, they do not need to be modeled as
985 // explicit data dependences.
987 buildScalarDependences(Stmt
, Inst
);
990 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
991 bool IsEntryBlock
= (Stmt
->getEntryBlock() == &BB
);
993 for (Instruction
*Inst
: Stmt
->getInstructions())
994 BuildAccessesForInst(Inst
);
995 if (Stmt
->isRegionStmt())
996 BuildAccessesForInst(BB
.getTerminator());
998 for (Instruction
&Inst
: BB
) {
999 if (isIgnoredIntrinsic(&Inst
))
1002 // Invariant loads already have been processed.
1003 if (isa
<LoadInst
>(Inst
) && RIL
.count(cast
<LoadInst
>(&Inst
)))
1006 BuildAccessesForInst(&Inst
);
1011 MemoryAccess
*ScopBuilder::addMemoryAccess(
1012 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
1013 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
1014 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
1016 bool isKnownMustAccess
= false;
1018 // Accesses in single-basic block statements are always executed.
1019 if (Stmt
->isBlockStmt())
1020 isKnownMustAccess
= true;
1022 if (Stmt
->isRegionStmt()) {
1023 // Accesses that dominate the exit block of a non-affine region are always
1024 // executed. In non-affine regions there may exist MemoryKind::Values that
1025 // do not dominate the exit. MemoryKind::Values will always dominate the
1026 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1027 // non-affine region.
1028 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
1029 isKnownMustAccess
= true;
1032 // Non-affine PHI writes do not "happen" at a particular instruction, but
1033 // after exiting the statement. Therefore they are guaranteed to execute and
1034 // overwrite the old value.
1035 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
1036 isKnownMustAccess
= true;
1038 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
1039 AccType
= MemoryAccess::MAY_WRITE
;
1041 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
1042 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
1044 scop
->addAccessFunction(Access
);
1045 Stmt
->addAccess(Access
);
1049 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
1050 MemoryAccess::AccessType AccType
,
1051 Value
*BaseAddress
, Type
*ElementType
,
1053 ArrayRef
<const SCEV
*> Subscripts
,
1054 ArrayRef
<const SCEV
*> Sizes
,
1055 Value
*AccessValue
) {
1056 ArrayBasePointers
.insert(BaseAddress
);
1057 auto *MemAccess
= addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
,
1058 ElementType
, IsAffine
, AccessValue
,
1059 Subscripts
, Sizes
, MemoryKind::Array
);
1061 if (!DetectFortranArrays
)
1064 if (Value
*FAD
= findFADAllocationInvisible(MemAccInst
))
1065 MemAccess
->setFortranArrayDescriptor(FAD
);
1066 else if (Value
*FAD
= findFADAllocationVisible(MemAccInst
))
1067 MemAccess
->setFortranArrayDescriptor(FAD
);
1070 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
1071 // Find the statement that defines the value of Inst. That statement has to
1072 // write the value to make it available to those statements that read it.
1073 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
1075 // It is possible that the value is synthesizable within a loop (such that it
1076 // is not part of any statement), but not after the loop (where you need the
1077 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1078 // avoid this. In case the IR has no such PHI, use the last statement (where
1079 // the value is synthesizable) to write the value.
1081 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
1083 // Inst not defined within this SCoP.
1087 // Do not process further if the instruction is already written.
1088 if (Stmt
->lookupValueWriteOf(Inst
))
1091 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
1092 true, Inst
, ArrayRef
<const SCEV
*>(),
1093 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
1096 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
1097 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1098 // to be able to replace this one. Currently, there is a split responsibility.
1099 // In a first step, the MemoryAccess is created, but without the
1100 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1101 // AccessRelation is created. At least for scalar accesses, there is no new
1102 // information available at ScopStmt::buildAccessRelations(), so we could
1103 // create the AccessRelation right away. This is what
1104 // ScopStmt::ensureValueRead(Value*) does.
1106 auto *Scope
= UserStmt
->getSurroundingLoop();
1107 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
1108 switch (VUse
.getKind()) {
1109 case VirtualUse::Constant
:
1110 case VirtualUse::Block
:
1111 case VirtualUse::Synthesizable
:
1112 case VirtualUse::Hoisted
:
1113 case VirtualUse::Intra
:
1114 // Uses of these kinds do not need a MemoryAccess.
1117 case VirtualUse::ReadOnly
:
1118 // Add MemoryAccess for invariant values only if requested.
1119 if (!ModelReadOnlyScalars
)
1123 case VirtualUse::Inter
:
1125 // Do not create another MemoryAccess for reloading the value if one already
1127 if (UserStmt
->lookupValueReadOf(V
))
1130 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1131 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1134 // Inter-statement uses need to write the value in their defining statement.
1136 ensureValueWrite(cast
<Instruction
>(V
));
1141 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
1142 BasicBlock
*IncomingBlock
,
1143 Value
*IncomingValue
, bool IsExitBlock
) {
1144 // As the incoming block might turn out to be an error statement ensure we
1145 // will create an exit PHI SAI object. It is needed during code generation
1146 // and would be created later anyway.
1148 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
1149 MemoryKind::ExitPHI
);
1151 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1152 // from outside the SCoP's region have no statement representation.
1156 // Take care for the incoming value being available in the incoming block.
1157 // This must be done before the check for multiple PHI writes because multiple
1158 // exiting edges from subregion each can be the effective written value of the
1159 // subregion. As such, all of them must be made available in the subregion
1161 ensureValueRead(IncomingValue
, IncomingStmt
);
1163 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1164 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
1165 assert(Acc
->getAccessInstruction() == PHI
);
1166 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
1170 MemoryAccess
*Acc
= addMemoryAccess(
1171 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
1172 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1173 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
1175 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
1178 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
1179 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
1180 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1184 void ScopBuilder::buildDomain(ScopStmt
&Stmt
) {
1185 isl::id Id
= isl::id::alloc(scop
->getIslCtx(), Stmt
.getBaseName(), &Stmt
);
1187 Stmt
.Domain
= scop
->getDomainConditions(&Stmt
);
1188 Stmt
.Domain
= Stmt
.Domain
.set_tuple_id(Id
);
1191 void ScopBuilder::collectSurroundingLoops(ScopStmt
&Stmt
) {
1192 isl::set Domain
= Stmt
.getDomain();
1193 BasicBlock
*BB
= Stmt
.getEntryBlock();
1195 Loop
*L
= LI
.getLoopFor(BB
);
1197 while (L
&& Stmt
.isRegionStmt() && Stmt
.getRegion()->contains(L
))
1198 L
= L
->getParentLoop();
1200 SmallVector
<llvm::Loop
*, 8> Loops
;
1202 while (L
&& Stmt
.getParent()->getRegion().contains(L
)) {
1204 L
= L
->getParentLoop();
1207 Stmt
.NestLoops
.insert(Stmt
.NestLoops
.begin(), Loops
.rbegin(), Loops
.rend());
1210 /// Return the reduction type for a given binary operator.
1211 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
1212 const Instruction
*Load
) {
1214 return MemoryAccess::RT_NONE
;
1215 switch (BinOp
->getOpcode()) {
1216 case Instruction::FAdd
:
1217 if (!BinOp
->isFast())
1218 return MemoryAccess::RT_NONE
;
1220 case Instruction::Add
:
1221 return MemoryAccess::RT_ADD
;
1222 case Instruction::Or
:
1223 return MemoryAccess::RT_BOR
;
1224 case Instruction::Xor
:
1225 return MemoryAccess::RT_BXOR
;
1226 case Instruction::And
:
1227 return MemoryAccess::RT_BAND
;
1228 case Instruction::FMul
:
1229 if (!BinOp
->isFast())
1230 return MemoryAccess::RT_NONE
;
1232 case Instruction::Mul
:
1233 if (DisableMultiplicativeReductions
)
1234 return MemoryAccess::RT_NONE
;
1235 return MemoryAccess::RT_MUL
;
1237 return MemoryAccess::RT_NONE
;
1241 void ScopBuilder::checkForReductions(ScopStmt
&Stmt
) {
1242 SmallVector
<MemoryAccess
*, 2> Loads
;
1243 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1245 // First collect candidate load-store reduction chains by iterating over all
1246 // stores and collecting possible reduction loads.
1247 for (MemoryAccess
*StoreMA
: Stmt
) {
1248 if (StoreMA
->isRead())
1252 collectCandidateReductionLoads(StoreMA
, Loads
);
1253 for (MemoryAccess
*LoadMA
: Loads
)
1254 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1257 // Then check each possible candidate pair.
1258 for (const auto &CandidatePair
: Candidates
) {
1260 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
1261 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
1263 // Skip those with obviously unequal base addresses.
1264 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
1268 // And check if the remaining for overlap with other memory accesses.
1269 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
1270 AllAccsRel
= AllAccsRel
.intersect_domain(Stmt
.getDomain());
1271 isl::set AllAccs
= AllAccsRel
.range();
1273 for (MemoryAccess
*MA
: Stmt
) {
1274 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1278 MA
->getAccessRelation().intersect_domain(Stmt
.getDomain());
1279 isl::set Accs
= AccRel
.range();
1281 if (AllAccs
.has_equal_space(Accs
)) {
1282 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
1283 Valid
= Valid
&& OverlapAccs
.is_empty();
1290 const LoadInst
*Load
=
1291 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1292 MemoryAccess::ReductionType RT
=
1293 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1295 // If no overlapping access was found we mark the load and store as
1297 CandidatePair
.first
->markAsReductionLike(RT
);
1298 CandidatePair
.second
->markAsReductionLike(RT
);
1302 void ScopBuilder::collectCandidateReductionLoads(
1303 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1304 ScopStmt
*Stmt
= StoreMA
->getStatement();
1306 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1310 // Skip if there is not one binary operator between the load and the store
1311 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1315 // Skip if the binary operators has multiple uses
1316 if (BinOp
->getNumUses() != 1)
1319 // Skip if the opcode of the binary operator is not commutative/associative
1320 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1323 // Skip if the binary operator is outside the current SCoP
1324 if (BinOp
->getParent() != Store
->getParent())
1327 // Skip if it is a multiplicative reduction and we disabled them
1328 if (DisableMultiplicativeReductions
&&
1329 (BinOp
->getOpcode() == Instruction::Mul
||
1330 BinOp
->getOpcode() == Instruction::FMul
))
1333 // Check the binary operator operands for a candidate load
1334 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1335 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1336 if (!PossibleLoad0
&& !PossibleLoad1
)
1339 // A load is only a candidate if it cannot escape (thus has only this use)
1340 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1341 if (PossibleLoad0
->getParent() == Store
->getParent())
1342 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad0
));
1343 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1344 if (PossibleLoad1
->getParent() == Store
->getParent())
1345 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad1
));
1348 void ScopBuilder::buildAccessRelations(ScopStmt
&Stmt
) {
1349 for (MemoryAccess
*Access
: Stmt
.MemAccs
) {
1350 Type
*ElementType
= Access
->getElementType();
1353 if (Access
->isPHIKind())
1354 Ty
= MemoryKind::PHI
;
1355 else if (Access
->isExitPHIKind())
1356 Ty
= MemoryKind::ExitPHI
;
1357 else if (Access
->isValueKind())
1358 Ty
= MemoryKind::Value
;
1360 Ty
= MemoryKind::Array
;
1362 auto *SAI
= scop
->getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1363 ElementType
, Access
->Sizes
, Ty
);
1364 Access
->buildAccessRelation(SAI
);
1365 scop
->addAccessData(Access
);
1370 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
1371 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
1372 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
1373 assert(PhysUse
.getKind() == VirtUse
.getKind());
1376 /// Check the consistency of every statement's MemoryAccesses.
1378 /// The check is carried out by expecting the "physical" kind of use (derived
1379 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1380 /// kind of use (derived from a statement's MemoryAccess).
1382 /// The "physical" uses are taken by ensureValueRead to determine whether to
1383 /// create MemoryAccesses. When done, the kind of scalar access should be the
1384 /// same no matter which way it was derived.
1386 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1387 /// can intentionally influence on the kind of uses (not corresponding to the
1388 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1389 /// to pick up the virtual uses. But here in the code generator, this has not
1390 /// happened yet, such that virtual and physical uses are equivalent.
1391 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
1392 for (auto *BB
: S
->getRegion().blocks()) {
1393 for (auto &Inst
: *BB
) {
1394 auto *Stmt
= S
->getStmtFor(&Inst
);
1398 if (isIgnoredIntrinsic(&Inst
))
1401 // Branch conditions are encoded in the statement domains.
1402 if (isa
<TerminatorInst
>(&Inst
) && Stmt
->isBlockStmt())
1406 for (auto &Op
: Inst
.operands())
1407 verifyUse(S
, Op
, LI
);
1409 // Stores do not produce values used by other statements.
1410 if (isa
<StoreInst
>(Inst
))
1413 // For every value defined in the block, also check that a use of that
1414 // value in the same statement would not be an inter-statement use. It can
1415 // still be synthesizable or load-hoisted, but these kind of instructions
1416 // are not directly copied in code-generation.
1418 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
1419 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
1420 VirtDef
.getKind() == VirtualUse::Intra
||
1421 VirtDef
.getKind() == VirtualUse::Hoisted
);
1425 if (S
->hasSingleExitEdge())
1428 // PHINodes in the SCoP region's exit block are also uses to be checked.
1429 if (!S
->getRegion().isTopLevelRegion()) {
1430 for (auto &Inst
: *S
->getRegion().getExit()) {
1431 if (!isa
<PHINode
>(Inst
))
1434 for (auto &Op
: Inst
.operands())
1435 verifyUse(S
, Op
, LI
);
1441 /// Return the block that is the representing block for @p RN.
1442 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
1443 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
1444 : RN
->getNodeAs
<BasicBlock
>();
1447 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
,
1448 OptimizationRemarkEmitter
&ORE
) {
1449 scop
.reset(new Scop(R
, SE
, LI
, DT
, *SD
.getDetectionContext(&R
), ORE
));
1453 // Create all invariant load instructions first. These are categorized as
1454 // 'synthesizable', therefore are not part of any ScopStmt but need to be
1455 // created somewhere.
1456 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
1457 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
1458 if (isErrorBlock(*BB
, scop
->getRegion(), LI
, DT
))
1461 for (Instruction
&Inst
: *BB
) {
1462 LoadInst
*Load
= dyn_cast
<LoadInst
>(&Inst
);
1466 if (!RIL
.count(Load
))
1469 // Invariant loads require a MemoryAccess to be created in some statement.
1470 // It is not important to which statement the MemoryAccess is added
1471 // because it will later be removed from the ScopStmt again. We chose the
1472 // first statement of the basic block the LoadInst is in.
1473 ArrayRef
<ScopStmt
*> List
= scop
->getStmtListFor(BB
);
1474 assert(!List
.empty());
1475 ScopStmt
*RILStmt
= List
.front();
1476 buildMemoryAccess(Load
, RILStmt
);
1479 buildAccessFunctions();
1481 // In case the region does not have an exiting block we will later (during
1482 // code generation) split the exit block. This will move potential PHI nodes
1483 // from the current exit block into the new region exiting block. Hence, PHI
1484 // nodes that are at this point not part of the region will be.
1485 // To handle these PHI nodes later we will now model their operands as scalar
1486 // accesses. Note that we do not model anything in the exit block if we have
1487 // an exiting block in the region, as there will not be any splitting later.
1488 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge()) {
1489 for (Instruction
&Inst
: *R
.getExit()) {
1490 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
1494 buildPHIAccesses(nullptr, PHI
, nullptr, true);
1498 // Create memory accesses for global reads since all arrays are now known.
1499 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
1500 for (auto GlobalReadPair
: GlobalReads
) {
1501 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
1502 Instruction
*GlobalRead
= GlobalReadPair
.second
;
1503 for (auto *BP
: ArrayBasePointers
)
1504 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
1505 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
1508 scop
->buildInvariantEquivalenceClasses();
1510 /// A map from basic blocks to their invalid domains.
1511 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
1513 if (!scop
->buildDomains(&R
, DT
, LI
, InvalidDomainMap
)) {
1515 dbgs() << "Bailing-out because buildDomains encountered problems\n");
1519 scop
->addUserAssumptions(AC
, DT
, LI
, InvalidDomainMap
);
1521 // Initialize the invalid domain.
1522 for (ScopStmt
&Stmt
: scop
->Stmts
)
1523 if (Stmt
.isBlockStmt())
1524 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()]);
1526 Stmt
.setInvalidDomain(InvalidDomainMap
[getRegionNodeBasicBlock(
1527 Stmt
.getRegion()->getNode())]);
1529 // Remove empty statements.
1530 // Exit early in case there are no executable statements left in this scop.
1531 scop
->removeStmtNotInDomainMap();
1532 scop
->simplifySCoP(false);
1533 if (scop
->isEmpty()) {
1534 LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1538 // The ScopStmts now have enough information to initialize themselves.
1539 for (ScopStmt
&Stmt
: *scop
) {
1540 collectSurroundingLoops(Stmt
);
1543 buildAccessRelations(Stmt
);
1545 if (DetectReductions
)
1546 checkForReductions(Stmt
);
1549 // Check early for a feasible runtime context.
1550 if (!scop
->hasFeasibleRuntimeContext()) {
1551 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1555 // Check early for profitability. Afterwards it cannot change anymore,
1556 // only the runtime context could become infeasible.
1557 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
1558 scop
->invalidate(PROFITABLE
, DebugLoc());
1560 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1564 scop
->buildSchedule(LI
);
1566 scop
->finalizeAccesses();
1568 scop
->realignParams();
1569 scop
->addUserContext();
1571 // After the context was fully constructed, thus all our knowledge about
1572 // the parameters is in there, we add all recorded assumptions to the
1573 // assumed/invalid context.
1574 scop
->addRecordedAssumptions();
1576 scop
->simplifyContexts();
1577 if (!scop
->buildAliasChecks(AA
)) {
1578 LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1582 scop
->hoistInvariantLoads();
1583 scop
->canonicalizeDynamicBasePtrs();
1584 scop
->verifyInvariantLoads();
1585 scop
->simplifySCoP(true);
1587 // Check late for a feasible runtime context because profitability did not
1589 if (!scop
->hasFeasibleRuntimeContext()) {
1590 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1595 verifyUses(scop
.get(), LI
, DT
);
1599 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AliasAnalysis
&AA
,
1600 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
1601 ScopDetection
&SD
, ScalarEvolution
&SE
,
1602 OptimizationRemarkEmitter
&ORE
)
1603 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
) {
1605 auto P
= getBBPairForRegion(R
);
1606 getDebugLocations(P
, Beg
, End
);
1608 std::string Msg
= "SCoP begins here.";
1609 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
1612 buildScop(*R
, AC
, ORE
);
1614 LLVM_DEBUG(dbgs() << *scop
);
1616 if (!scop
->hasFeasibleRuntimeContext()) {
1618 Msg
= "SCoP ends here but was dismissed.";
1619 LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
1622 Msg
= "SCoP ends here.";
1624 if (scop
->getMaxLoopDepth() > 0)
1628 if (R
->isTopLevelRegion())
1629 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
1632 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)