1 //===- ScopBuilder.cpp ----------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopDetection.h"
20 #include "polly/ScopDetectionDiagnostic.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "polly/Support/VirtualInstruction.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/EquivalenceClasses.h"
30 #include "llvm/ADT/SetVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/RegionIterator.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/IntrinsicInst.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Use.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CommandLine.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
67 using namespace polly
;
69 #define DEBUG_TYPE "polly-scops"
71 STATISTIC(ScopFound
, "Number of valid Scops");
72 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
73 STATISTIC(InfeasibleScops
,
74 "Number of SCoPs with statically infeasible context.");
76 bool polly::ModelReadOnlyScalars
;
78 static cl::opt
<bool, true> XModelReadOnlyScalars(
79 "polly-analyze-read-only-scalars",
80 cl::desc("Model read-only scalar values in the scop description"),
81 cl::location(ModelReadOnlyScalars
), cl::Hidden
, cl::ZeroOrMore
,
82 cl::init(true), cl::cat(PollyCategory
));
84 static cl::opt
<bool> UnprofitableScalarAccs(
85 "polly-unprofitable-scalar-accs",
86 cl::desc("Count statements with scalar accesses as not optimizable"),
87 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
89 static cl::opt
<bool> DetectFortranArrays(
90 "polly-detect-fortran-arrays",
91 cl::desc("Detect Fortran arrays and use this for code generation"),
92 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
94 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
95 cl::desc("Detect and exploit reductions"),
96 cl::Hidden
, cl::ZeroOrMore
,
97 cl::init(true), cl::cat(PollyCategory
));
99 // Multiplicative reductions can be disabled separately as these kind of
100 // operations can overflow easily. Additive reductions and bit operations
101 // are in contrast pretty stable.
102 static cl::opt
<bool> DisableMultiplicativeReductions(
103 "polly-disable-multiplicative-reductions",
104 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
105 cl::init(false), cl::cat(PollyCategory
));
107 enum class GranularityChoice
{ BasicBlocks
, ScalarIndependence
, Stores
};
109 static cl::opt
<GranularityChoice
> StmtGranularity(
110 "polly-stmt-granularity",
112 "Algorithm to use for splitting basic blocks into multiple statements"),
113 cl::values(clEnumValN(GranularityChoice::BasicBlocks
, "bb",
114 "One statement per basic block"),
115 clEnumValN(GranularityChoice::ScalarIndependence
, "scalar-indep",
116 "Scalar independence heuristic"),
117 clEnumValN(GranularityChoice::Stores
, "store",
118 "Store-level granularity")),
119 cl::init(GranularityChoice::BasicBlocks
), cl::cat(PollyCategory
));
121 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
122 Region
*NonAffineSubRegion
,
124 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
125 // true, are not modeled as ordinary PHI nodes as they are not part of the
126 // region. However, we model the operands in the predecessor blocks that are
127 // part of the region as regular scalar accesses.
129 // If we can synthesize a PHI we can skip it, however only if it is in
130 // the region. If it is not it can only be in the exit block of the region.
131 // In this case we model the operands but not the PHI itself.
132 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
133 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
136 // PHI nodes are modeled as if they had been demoted prior to the SCoP
137 // detection. Hence, the PHI is a load of a new memory location in which the
138 // incoming value was written at the end of the incoming basic block.
139 bool OnlyNonAffineSubRegionOperands
= true;
140 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
141 Value
*Op
= PHI
->getIncomingValue(u
);
142 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
143 ScopStmt
*OpStmt
= scop
->getLastStmtFor(OpBB
);
145 // Do not build PHI dependences inside a non-affine subregion, but make
146 // sure that the necessary scalar values are still made available.
147 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
148 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
149 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
150 ensureValueRead(Op
, OpStmt
);
154 OnlyNonAffineSubRegionOperands
= false;
155 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
158 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
159 addPHIReadAccess(PHIStmt
, PHI
);
163 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
165 assert(!isa
<PHINode
>(Inst
));
167 // Pull-in required operands.
168 for (Use
&Op
: Inst
->operands())
169 ensureValueRead(Op
.get(), UserStmt
);
172 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
173 // Check for uses of this instruction outside the scop. Because we do not
174 // iterate over such instructions and therefore did not "ensure" the existence
175 // of a write, we must determine such use here.
176 if (scop
->isEscaping(Inst
))
177 ensureValueWrite(Inst
);
180 /// Check that a value is a Fortran Array descriptor.
182 /// We check if V has the following structure:
183 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
184 /// [<num> x %struct.descriptor_dimension] }
187 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
189 /// 1. V's type name starts with "struct.array"
190 /// 2. V's type has layout as shown.
191 /// 3. Final member of V's type has name "struct.descriptor_dimension",
192 /// 4. "struct.descriptor_dimension" has layout as shown.
193 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
195 /// We are interested in such types since this is the code that dragonegg
196 /// generates for Fortran array descriptors.
198 /// @param V the Value to be checked.
200 /// @returns True if V is a Fortran array descriptor, False otherwise.
201 bool isFortranArrayDescriptor(Value
*V
) {
202 PointerType
*PTy
= dyn_cast
<PointerType
>(V
->getType());
207 Type
*Ty
= PTy
->getElementType();
208 assert(Ty
&& "Ty expected to be initialized");
209 auto *StructArrTy
= dyn_cast
<StructType
>(Ty
);
211 if (!(StructArrTy
&& StructArrTy
->hasName()))
214 if (!StructArrTy
->getName().startswith("struct.array"))
217 if (StructArrTy
->getNumElements() != 4)
220 const ArrayRef
<Type
*> ArrMemberTys
= StructArrTy
->elements();
223 if (ArrMemberTys
[0] != Type::getInt8PtrTy(V
->getContext()))
226 // Get a reference to the int type and check that all the members
227 // share the same int type
228 Type
*IntTy
= ArrMemberTys
[1];
229 if (ArrMemberTys
[2] != IntTy
)
232 // type: [<num> x %struct.descriptor_dimension]
233 ArrayType
*DescriptorDimArrayTy
= dyn_cast
<ArrayType
>(ArrMemberTys
[3]);
234 if (!DescriptorDimArrayTy
)
237 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
238 StructType
*DescriptorDimTy
=
239 dyn_cast
<StructType
>(DescriptorDimArrayTy
->getElementType());
241 if (!(DescriptorDimTy
&& DescriptorDimTy
->hasName()))
244 if (DescriptorDimTy
->getName() != "struct.descriptor_dimension")
247 if (DescriptorDimTy
->getNumElements() != 3)
250 for (auto MemberTy
: DescriptorDimTy
->elements()) {
251 if (MemberTy
!= IntTy
)
258 Value
*ScopBuilder::findFADAllocationVisible(MemAccInst Inst
) {
259 // match: 4.1 & 4.2 store/load
260 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
264 if (Inst
.getAlignment() != 8)
267 Value
*Address
= Inst
.getPointerOperand();
269 const BitCastInst
*Bitcast
= nullptr;
271 if (auto *Slot
= dyn_cast
<GetElementPtrInst
>(Address
)) {
272 Value
*TypedMem
= Slot
->getPointerOperand();
274 Bitcast
= dyn_cast
<BitCastInst
>(TypedMem
);
277 Bitcast
= dyn_cast
<BitCastInst
>(Address
);
283 auto *MallocMem
= Bitcast
->getOperand(0);
286 auto *MallocCall
= dyn_cast
<CallInst
>(MallocMem
);
290 Function
*MallocFn
= MallocCall
->getCalledFunction();
291 if (!(MallocFn
&& MallocFn
->hasName() && MallocFn
->getName() == "malloc"))
294 // Find all uses the malloc'd memory.
295 // We are looking for a "store" into a struct with the type being the Fortran
297 for (auto user
: MallocMem
->users()) {
299 auto *MallocStore
= dyn_cast
<StoreInst
>(user
);
303 auto *DescriptorGEP
=
304 dyn_cast
<GEPOperator
>(MallocStore
->getPointerOperand());
309 auto DescriptorType
=
310 dyn_cast
<StructType
>(DescriptorGEP
->getSourceElementType());
311 if (!(DescriptorType
&& DescriptorType
->hasName()))
314 Value
*Descriptor
= dyn_cast
<Value
>(DescriptorGEP
->getPointerOperand());
319 if (!isFortranArrayDescriptor(Descriptor
))
328 Value
*ScopBuilder::findFADAllocationInvisible(MemAccInst Inst
) {
330 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
333 Value
*Slot
= Inst
.getPointerOperand();
335 LoadInst
*MemLoad
= nullptr;
337 if (auto *SlotGEP
= dyn_cast
<GetElementPtrInst
>(Slot
)) {
339 MemLoad
= dyn_cast
<LoadInst
>(SlotGEP
->getPointerOperand());
342 MemLoad
= dyn_cast
<LoadInst
>(Slot
);
348 auto *BitcastOperator
=
349 dyn_cast
<BitCastOperator
>(MemLoad
->getPointerOperand());
350 if (!BitcastOperator
)
353 Value
*Descriptor
= dyn_cast
<Value
>(BitcastOperator
->getOperand(0));
357 if (!isFortranArrayDescriptor(Descriptor
))
363 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
364 Value
*Val
= Inst
.getValueOperand();
365 Type
*ElementType
= Val
->getType();
366 Value
*Address
= Inst
.getPointerOperand();
367 const SCEV
*AccessFunction
=
368 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
369 const SCEVUnknown
*BasePointer
=
370 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
371 enum MemoryAccess::AccessType AccType
=
372 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
374 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
)) {
375 auto *Src
= BitCast
->getOperand(0);
376 auto *SrcTy
= Src
->getType();
377 auto *DstTy
= BitCast
->getType();
378 // Do not try to delinearize non-sized (opaque) pointers.
379 if ((SrcTy
->isPointerTy() && !SrcTy
->getPointerElementType()->isSized()) ||
380 (DstTy
->isPointerTy() && !DstTy
->getPointerElementType()->isSized())) {
383 if (SrcTy
->isPointerTy() && DstTy
->isPointerTy() &&
384 DL
.getTypeAllocSize(SrcTy
->getPointerElementType()) ==
385 DL
.getTypeAllocSize(DstTy
->getPointerElementType()))
389 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
393 std::vector
<const SCEV
*> Subscripts
;
394 std::vector
<int> Sizes
;
395 std::tie(Subscripts
, Sizes
) = getIndexExpressionsFromGEP(GEP
, SE
);
396 auto *BasePtr
= GEP
->getOperand(0);
398 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
399 BasePtr
= BasePtrCast
->getOperand(0);
401 // Check for identical base pointers to ensure that we do not miss index
402 // offsets that have been added before this GEP is applied.
403 if (BasePtr
!= BasePointer
->getValue())
406 std::vector
<const SCEV
*> SizesSCEV
;
408 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
410 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
411 for (auto *Subscript
: Subscripts
) {
412 InvariantLoadsSetTy AccessILS
;
413 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
417 for (LoadInst
*LInst
: AccessILS
)
418 if (!ScopRIL
.count(LInst
))
425 SizesSCEV
.push_back(nullptr);
428 SizesSCEV
.push_back(SE
.getSCEV(
429 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
431 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
432 true, Subscripts
, SizesSCEV
, Val
);
436 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
437 if (!PollyDelinearize
)
440 Value
*Address
= Inst
.getPointerOperand();
441 Value
*Val
= Inst
.getValueOperand();
442 Type
*ElementType
= Val
->getType();
443 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
444 enum MemoryAccess::AccessType AccType
=
445 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
447 const SCEV
*AccessFunction
=
448 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
449 const SCEVUnknown
*BasePointer
=
450 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
452 assert(BasePointer
&& "Could not find base pointer");
454 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
455 auto AccItr
= InsnToMemAcc
.find(Inst
);
456 if (AccItr
== InsnToMemAcc
.end())
459 std::vector
<const SCEV
*> Sizes
= {nullptr};
461 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
462 AccItr
->second
.Shape
->DelinearizedSizes
.end());
464 // In case only the element size is contained in the 'Sizes' array, the
465 // access does not access a real multi-dimensional array. Hence, we allow
466 // the normal single-dimensional access construction to handle this.
467 if (Sizes
.size() == 1)
470 // Remove the element size. This information is already provided by the
471 // ElementSize parameter. In case the element size of this access and the
472 // element size used for delinearization differs the delinearization is
473 // incorrect. Hence, we invalidate the scop.
475 // TODO: Handle delinearization with differing element sizes.
476 auto DelinearizedSize
=
477 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
479 if (ElementSize
!= DelinearizedSize
)
480 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
482 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
483 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
487 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
488 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
490 if (MemIntr
== nullptr)
493 auto *L
= LI
.getLoopFor(Inst
->getParent());
494 auto *LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
497 // Check if the length val is actually affine or if we overapproximate it
498 InvariantLoadsSetTy AccessILS
;
499 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
501 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
502 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
503 LengthVal
, SE
, &AccessILS
);
504 for (LoadInst
*LInst
: AccessILS
)
505 if (!ScopRIL
.count(LInst
))
506 LengthIsAffine
= false;
510 auto *DestPtrVal
= MemIntr
->getDest();
513 auto *DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
515 // Ignore accesses to "NULL".
516 // TODO: We could use this to optimize the region further, e.g., intersect
518 // isl_set_complement(isl_set_params(getDomain()))
519 // as we know it would be undefined to execute this instruction anyway.
520 if (DestAccFunc
->isZero())
523 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
525 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
526 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
527 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
528 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
529 Inst
.getValueOperand());
531 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
535 auto *SrcPtrVal
= MemTrans
->getSource();
538 auto *SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
540 // Ignore accesses to "NULL".
541 // TODO: See above TODO
542 if (SrcAccFunc
->isZero())
545 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
547 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
548 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
549 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
550 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
551 Inst
.getValueOperand());
556 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
557 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
562 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
))
565 bool ReadOnly
= false;
566 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
567 auto *CalledFunction
= CI
->getCalledFunction();
568 switch (AA
.getModRefBehavior(CalledFunction
)) {
569 case FMRB_UnknownModRefBehavior
:
570 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
571 case FMRB_DoesNotAccessMemory
:
573 case FMRB_DoesNotReadMemory
:
574 case FMRB_OnlyAccessesInaccessibleMem
:
575 case FMRB_OnlyAccessesInaccessibleOrArgMem
:
577 case FMRB_OnlyReadsMemory
:
578 GlobalReads
.emplace_back(Stmt
, CI
);
580 case FMRB_OnlyReadsArgumentPointees
:
583 case FMRB_OnlyAccessesArgumentPointees
: {
584 auto AccType
= ReadOnly
? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
585 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
586 for (const auto &Arg
: CI
->arg_operands()) {
587 if (!Arg
->getType()->isPointerTy())
590 auto *ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
591 if (ArgSCEV
->isZero())
594 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
595 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
596 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
605 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
606 Value
*Address
= Inst
.getPointerOperand();
607 Value
*Val
= Inst
.getValueOperand();
608 Type
*ElementType
= Val
->getType();
609 enum MemoryAccess::AccessType AccType
=
610 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
612 const SCEV
*AccessFunction
=
613 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
614 const SCEVUnknown
*BasePointer
=
615 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
617 assert(BasePointer
&& "Could not find base pointer");
618 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
620 // Check if the access depends on a loop contained in a non-affine subregion.
621 bool isVariantInNonAffineLoop
= false;
622 SetVector
<const Loop
*> Loops
;
623 findLoops(AccessFunction
, Loops
);
624 for (const Loop
*L
: Loops
)
625 if (Stmt
->contains(L
)) {
626 isVariantInNonAffineLoop
= true;
630 InvariantLoadsSetTy AccessILS
;
632 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
633 bool IsAffine
= !isVariantInNonAffineLoop
&&
634 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
635 AccessFunction
, SE
, &AccessILS
);
637 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
638 for (LoadInst
*LInst
: AccessILS
)
639 if (!ScopRIL
.count(LInst
))
642 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
643 AccType
= MemoryAccess::MAY_WRITE
;
645 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
646 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
649 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
650 if (buildAccessMemIntrinsic(Inst
, Stmt
))
653 if (buildAccessCallInst(Inst
, Stmt
))
656 if (buildAccessMultiDimFixed(Inst
, Stmt
))
659 if (buildAccessMultiDimParam(Inst
, Stmt
))
662 buildAccessSingleDim(Inst
, Stmt
);
665 void ScopBuilder::buildAccessFunctions() {
666 for (auto &Stmt
: *scop
) {
667 if (Stmt
.isBlockStmt()) {
668 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
672 Region
*R
= Stmt
.getRegion();
673 for (BasicBlock
*BB
: R
->blocks())
674 buildAccessFunctions(&Stmt
, *BB
, R
);
677 // Build write accesses for values that are used after the SCoP.
678 // The instructions defining them might be synthesizable and therefore not
679 // contained in any statement, hence we iterate over the original instructions
680 // to identify all escaping values.
681 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
682 for (Instruction
&Inst
: *BB
)
683 buildEscapingDependences(&Inst
);
687 bool ScopBuilder::shouldModelInst(Instruction
*Inst
, Loop
*L
) {
688 return !isa
<TerminatorInst
>(Inst
) && !isIgnoredIntrinsic(Inst
) &&
689 !canSynthesize(Inst
, *scop
, &SE
, L
);
692 /// Generate a name for a statement.
694 /// @param S The parent SCoP.
695 /// @param BB The basic block the statement will represent.
696 /// @param Count The index of the created statement in @p BB.
697 static std::string
makeStmtName(Scop
*S
, BasicBlock
*BB
, int Count
) {
698 std::string Suffix
= "";
700 Suffix
+= std::to_string(Count
);
701 return getIslCompatibleName("Stmt", BB
, S
->getNextStmtIdx(), Suffix
,
702 UseInstructionNames
);
705 /// Generate a name for a statement that represents a non-affine subregion.
707 /// @param S The parent SCoP.
708 /// @param R The region the statement will represent.
709 static std::string
makeStmtName(Scop
*S
, Region
*R
) {
710 return getIslCompatibleName("Stmt", R
->getNameStr(), S
->getNextStmtIdx(), "",
711 UseInstructionNames
);
714 void ScopBuilder::buildSequentialBlockStmts(BasicBlock
*BB
, bool SplitOnStore
) {
715 Loop
*SurroundingLoop
= LI
.getLoopFor(BB
);
718 std::vector
<Instruction
*> Instructions
;
719 for (Instruction
&Inst
: *BB
) {
720 if (shouldModelInst(&Inst
, SurroundingLoop
))
721 Instructions
.push_back(&Inst
);
722 if (Inst
.getMetadata("polly_split_after") ||
723 (SplitOnStore
&& isa
<StoreInst
>(Inst
))) {
724 std::string Name
= makeStmtName(scop
.get(), BB
, Count
);
725 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
727 Instructions
.clear();
731 std::string Name
= makeStmtName(scop
.get(), BB
, Count
);
732 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
735 /// Is @p Inst an ordered instruction?
737 /// An unordered instruction is an instruction, such that a sequence of
738 /// unordered instructions can be permuted without changing semantics. Any
739 /// instruction for which this is not always the case is ordered.
740 static bool isOrderedInstruction(Instruction
*Inst
) {
741 return Inst
->mayHaveSideEffects() || Inst
->mayReadOrWriteMemory();
744 /// Join instructions to the same statement if one uses the scalar result of the
746 static void joinOperandTree(EquivalenceClasses
<Instruction
*> &UnionFind
,
747 ArrayRef
<Instruction
*> ModeledInsts
) {
748 for (Instruction
*Inst
: ModeledInsts
) {
749 if (isa
<PHINode
>(Inst
))
752 for (Use
&Op
: Inst
->operands()) {
753 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
.get());
757 // Check if OpInst is in the BB and is a modeled instruction.
758 auto OpVal
= UnionFind
.findValue(OpInst
);
759 if (OpVal
== UnionFind
.end())
762 UnionFind
.unionSets(Inst
, OpInst
);
767 /// Join instructions that are used as incoming value in successor PHIs into the
770 joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses
<Instruction
*> &UnionFind
,
771 ArrayRef
<Instruction
*> ModeledInsts
,
773 for (BasicBlock
*Succ
: successors(BB
)) {
774 for (Instruction
&SuccInst
: *Succ
) {
775 PHINode
*SuccPHI
= dyn_cast
<PHINode
>(&SuccInst
);
779 Value
*IncomingVal
= SuccPHI
->getIncomingValueForBlock(BB
);
780 Instruction
*IncomingInst
= dyn_cast
<Instruction
>(IncomingVal
);
783 if (IncomingInst
->getParent() != BB
)
785 if (UnionFind
.findValue(IncomingInst
) == UnionFind
.end())
788 UnionFind
.unionSets(nullptr, IncomingInst
);
793 /// Ensure that the order of ordered instructions does not change.
795 /// If we encounter an ordered instruction enclosed in instructions belonging to
796 /// a different statement (which might as well contain ordered instructions, but
797 /// this is not tested here), join them.
799 joinOrderedInstructions(EquivalenceClasses
<Instruction
*> &UnionFind
,
800 ArrayRef
<Instruction
*> ModeledInsts
) {
801 SetVector
<Instruction
*> SeenLeaders
;
802 for (Instruction
*Inst
: ModeledInsts
) {
803 if (!isOrderedInstruction(Inst
))
806 Instruction
*Leader
= UnionFind
.getLeaderValue(Inst
);
807 bool Inserted
= SeenLeaders
.insert(Leader
);
811 // Merge statements to close holes. Say, we have already seen statements A
812 // and B, in this order. Then we see an instruction of A again and we would
813 // see the pattern "A B A". This function joins all statements until the
814 // only seen occurrence of A.
815 for (Instruction
*Prev
: reverse(SeenLeaders
)) {
816 // Items added to 'SeenLeaders' are leaders, but may have lost their
817 // leadership status when merged into another statement.
818 Instruction
*PrevLeader
= UnionFind
.getLeaderValue(SeenLeaders
.back());
819 if (PrevLeader
== Leader
)
821 UnionFind
.unionSets(Prev
, Leader
);
826 /// Also ensure that the epilogue is the last statement relative to all ordered
829 /// This is basically joinOrderedInstructions() but using the epilogue as
830 /// 'ordered instruction'.
831 static void joinAllAfterEpilogue(EquivalenceClasses
<Instruction
*> &UnionFind
,
832 ArrayRef
<Instruction
*> ModeledInsts
) {
833 bool EpilogueSeen
= false;
834 for (Instruction
*Inst
: ModeledInsts
) {
835 auto PHIWritesLeader
= UnionFind
.findLeader(nullptr);
836 auto InstLeader
= UnionFind
.findLeader(Inst
);
838 if (PHIWritesLeader
== InstLeader
)
841 if (!isOrderedInstruction(Inst
))
845 UnionFind
.unionSets(PHIWritesLeader
, InstLeader
);
849 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock
*BB
) {
850 Loop
*L
= LI
.getLoopFor(BB
);
852 // Extracting out modeled instructions saves us from checking
853 // shouldModelInst() repeatedly.
854 SmallVector
<Instruction
*, 32> ModeledInsts
;
855 EquivalenceClasses
<Instruction
*> UnionFind
;
856 for (Instruction
&Inst
: *BB
) {
857 if (!shouldModelInst(&Inst
, L
))
859 ModeledInsts
.push_back(&Inst
);
860 UnionFind
.insert(&Inst
);
863 // 'nullptr' represents the last statement for a basic block. It contains no
864 // instructions, but holds the PHI write accesses for successor basic blocks.
865 // If a PHI has an incoming value defined in this BB, it can also be merged
866 // with other statements.
867 // TODO: We wouldn't need this if we would add PHIWrites into the statement
868 // that defines the incoming value (if in the BB) instead of always the last,
869 // so we could unconditionally always add a last statement.
870 UnionFind
.insert(nullptr);
872 joinOperandTree(UnionFind
, ModeledInsts
);
873 joinIncomingPHIValuesIntoEpilogue(UnionFind
, ModeledInsts
, BB
);
874 joinOrderedInstructions(UnionFind
, ModeledInsts
);
875 joinAllAfterEpilogue(UnionFind
, ModeledInsts
);
877 // The list of instructions for statement (statement represented by the leader
878 // instruction). The order of statements instructions is reversed such that
879 // the epilogue is first. This makes it easier to ensure that the epilogue is
880 // the last statement.
881 MapVector
<Instruction
*, std::vector
<Instruction
*>> LeaderToInstList
;
883 // Ensure that the epilogue is last.
884 LeaderToInstList
[nullptr];
886 // Collect the instructions of all leaders. UnionFind's member iterator
887 // unfortunately are not in any specific order.
888 for (Instruction
&Inst
: reverse(*BB
)) {
889 auto LeaderIt
= UnionFind
.findLeader(&Inst
);
890 if (LeaderIt
== UnionFind
.member_end())
893 std::vector
<Instruction
*> &InstList
= LeaderToInstList
[*LeaderIt
];
894 InstList
.push_back(&Inst
);
897 // Finally build the statements.
899 for (auto &Instructions
: reverse(LeaderToInstList
)) {
900 std::vector
<Instruction
*> &InstList
= Instructions
.second
;
901 std::reverse(InstList
.begin(), InstList
.end());
902 std::string Name
= makeStmtName(scop
.get(), BB
, Count
);
903 scop
->addScopStmt(BB
, Name
, L
, std::move(InstList
));
908 void ScopBuilder::buildStmts(Region
&SR
) {
909 if (scop
->isNonAffineSubRegion(&SR
)) {
910 std::vector
<Instruction
*> Instructions
;
911 Loop
*SurroundingLoop
=
912 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
913 for (Instruction
&Inst
: *SR
.getEntry())
914 if (shouldModelInst(&Inst
, SurroundingLoop
))
915 Instructions
.push_back(&Inst
);
916 std::string Name
= makeStmtName(scop
.get(), &SR
);
917 scop
->addScopStmt(&SR
, Name
, SurroundingLoop
, Instructions
);
921 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
922 if (I
->isSubRegion())
923 buildStmts(*I
->getNodeAs
<Region
>());
925 BasicBlock
*BB
= I
->getNodeAs
<BasicBlock
>();
926 switch (StmtGranularity
) {
927 case GranularityChoice::BasicBlocks
:
928 buildSequentialBlockStmts(BB
);
930 case GranularityChoice::ScalarIndependence
:
931 buildEqivClassBlockStmts(BB
);
933 case GranularityChoice::Stores
:
934 buildSequentialBlockStmts(BB
, true);
940 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
941 Region
*NonAffineSubRegion
) {
944 "The exit BB is the only one that cannot be represented by a statement");
945 assert(Stmt
->represents(&BB
));
947 // We do not build access functions for error blocks, as they may contain
948 // instructions we can not model.
949 if (isErrorBlock(BB
, scop
->getRegion(), LI
, DT
))
952 auto BuildAccessesForInst
= [this, Stmt
,
953 NonAffineSubRegion
](Instruction
*Inst
) {
954 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
956 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, false);
958 if (auto MemInst
= MemAccInst::dyn_cast(*Inst
)) {
959 assert(Stmt
&& "Cannot build access function in non-existing statement");
960 buildMemoryAccess(MemInst
, Stmt
);
963 // PHI nodes have already been modeled above and TerminatorInsts that are
964 // not part of a non-affine subregion are fully modeled and regenerated
965 // from the polyhedral domains. Hence, they do not need to be modeled as
966 // explicit data dependences.
968 buildScalarDependences(Stmt
, Inst
);
971 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
972 bool IsEntryBlock
= (Stmt
->getEntryBlock() == &BB
);
974 for (Instruction
*Inst
: Stmt
->getInstructions())
975 BuildAccessesForInst(Inst
);
976 if (Stmt
->isRegionStmt())
977 BuildAccessesForInst(BB
.getTerminator());
979 for (Instruction
&Inst
: BB
) {
980 if (isIgnoredIntrinsic(&Inst
))
983 // Invariant loads already have been processed.
984 if (isa
<LoadInst
>(Inst
) && RIL
.count(cast
<LoadInst
>(&Inst
)))
987 BuildAccessesForInst(&Inst
);
992 MemoryAccess
*ScopBuilder::addMemoryAccess(
993 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
994 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
995 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
997 bool isKnownMustAccess
= false;
999 // Accesses in single-basic block statements are always executed.
1000 if (Stmt
->isBlockStmt())
1001 isKnownMustAccess
= true;
1003 if (Stmt
->isRegionStmt()) {
1004 // Accesses that dominate the exit block of a non-affine region are always
1005 // executed. In non-affine regions there may exist MemoryKind::Values that
1006 // do not dominate the exit. MemoryKind::Values will always dominate the
1007 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1008 // non-affine region.
1009 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
1010 isKnownMustAccess
= true;
1013 // Non-affine PHI writes do not "happen" at a particular instruction, but
1014 // after exiting the statement. Therefore they are guaranteed to execute and
1015 // overwrite the old value.
1016 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
1017 isKnownMustAccess
= true;
1019 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
1020 AccType
= MemoryAccess::MAY_WRITE
;
1022 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
1023 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
1025 scop
->addAccessFunction(Access
);
1026 Stmt
->addAccess(Access
);
1030 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
1031 MemoryAccess::AccessType AccType
,
1032 Value
*BaseAddress
, Type
*ElementType
,
1034 ArrayRef
<const SCEV
*> Subscripts
,
1035 ArrayRef
<const SCEV
*> Sizes
,
1036 Value
*AccessValue
) {
1037 ArrayBasePointers
.insert(BaseAddress
);
1038 auto *MemAccess
= addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
,
1039 ElementType
, IsAffine
, AccessValue
,
1040 Subscripts
, Sizes
, MemoryKind::Array
);
1042 if (!DetectFortranArrays
)
1045 if (Value
*FAD
= findFADAllocationInvisible(MemAccInst
))
1046 MemAccess
->setFortranArrayDescriptor(FAD
);
1047 else if (Value
*FAD
= findFADAllocationVisible(MemAccInst
))
1048 MemAccess
->setFortranArrayDescriptor(FAD
);
1051 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
1052 // Find the statement that defines the value of Inst. That statement has to
1053 // write the value to make it available to those statements that read it.
1054 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
1056 // It is possible that the value is synthesizable within a loop (such that it
1057 // is not part of any statement), but not after the loop (where you need the
1058 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1059 // avoid this. In case the IR has no such PHI, use the last statement (where
1060 // the value is synthesizable) to write the value.
1062 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
1064 // Inst not defined within this SCoP.
1068 // Do not process further if the instruction is already written.
1069 if (Stmt
->lookupValueWriteOf(Inst
))
1072 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
1073 true, Inst
, ArrayRef
<const SCEV
*>(),
1074 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
1077 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
1078 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1079 // to be able to replace this one. Currently, there is a split responsibility.
1080 // In a first step, the MemoryAccess is created, but without the
1081 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1082 // AccessRelation is created. At least for scalar accesses, there is no new
1083 // information available at ScopStmt::buildAccessRelations(), so we could
1084 // create the AccessRelation right away. This is what
1085 // ScopStmt::ensureValueRead(Value*) does.
1087 auto *Scope
= UserStmt
->getSurroundingLoop();
1088 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
1089 switch (VUse
.getKind()) {
1090 case VirtualUse::Constant
:
1091 case VirtualUse::Block
:
1092 case VirtualUse::Synthesizable
:
1093 case VirtualUse::Hoisted
:
1094 case VirtualUse::Intra
:
1095 // Uses of these kinds do not need a MemoryAccess.
1098 case VirtualUse::ReadOnly
:
1099 // Add MemoryAccess for invariant values only if requested.
1100 if (!ModelReadOnlyScalars
)
1104 case VirtualUse::Inter
:
1106 // Do not create another MemoryAccess for reloading the value if one already
1108 if (UserStmt
->lookupValueReadOf(V
))
1111 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1112 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1115 // Inter-statement uses need to write the value in their defining statement.
1117 ensureValueWrite(cast
<Instruction
>(V
));
1122 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
1123 BasicBlock
*IncomingBlock
,
1124 Value
*IncomingValue
, bool IsExitBlock
) {
1125 // As the incoming block might turn out to be an error statement ensure we
1126 // will create an exit PHI SAI object. It is needed during code generation
1127 // and would be created later anyway.
1129 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
1130 MemoryKind::ExitPHI
);
1132 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1133 // from outside the SCoP's region have no statement representation.
1137 // Take care for the incoming value being available in the incoming block.
1138 // This must be done before the check for multiple PHI writes because multiple
1139 // exiting edges from subregion each can be the effective written value of the
1140 // subregion. As such, all of them must be made available in the subregion
1142 ensureValueRead(IncomingValue
, IncomingStmt
);
1144 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1145 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
1146 assert(Acc
->getAccessInstruction() == PHI
);
1147 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
1151 MemoryAccess
*Acc
= addMemoryAccess(
1152 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
1153 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1154 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
1156 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
1159 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
1160 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
1161 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1165 void ScopBuilder::buildDomain(ScopStmt
&Stmt
) {
1166 isl::id Id
= isl::id::alloc(scop
->getIslCtx(), Stmt
.getBaseName(), &Stmt
);
1168 Stmt
.Domain
= scop
->getDomainConditions(&Stmt
);
1169 Stmt
.Domain
= Stmt
.Domain
.set_tuple_id(Id
);
1172 void ScopBuilder::collectSurroundingLoops(ScopStmt
&Stmt
) {
1173 isl::set Domain
= Stmt
.getDomain();
1174 for (unsigned u
= 0, e
= Domain
.dim(isl::dim::set
); u
< e
; u
++) {
1175 isl::id DimId
= Domain
.get_dim_id(isl::dim::set
, u
);
1176 Stmt
.NestLoops
.push_back(static_cast<Loop
*>(DimId
.get_user()));
1180 /// Return the reduction type for a given binary operator.
1181 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
1182 const Instruction
*Load
) {
1184 return MemoryAccess::RT_NONE
;
1185 switch (BinOp
->getOpcode()) {
1186 case Instruction::FAdd
:
1187 if (!BinOp
->isFast())
1188 return MemoryAccess::RT_NONE
;
1190 case Instruction::Add
:
1191 return MemoryAccess::RT_ADD
;
1192 case Instruction::Or
:
1193 return MemoryAccess::RT_BOR
;
1194 case Instruction::Xor
:
1195 return MemoryAccess::RT_BXOR
;
1196 case Instruction::And
:
1197 return MemoryAccess::RT_BAND
;
1198 case Instruction::FMul
:
1199 if (!BinOp
->isFast())
1200 return MemoryAccess::RT_NONE
;
1202 case Instruction::Mul
:
1203 if (DisableMultiplicativeReductions
)
1204 return MemoryAccess::RT_NONE
;
1205 return MemoryAccess::RT_MUL
;
1207 return MemoryAccess::RT_NONE
;
1211 void ScopBuilder::checkForReductions(ScopStmt
&Stmt
) {
1212 SmallVector
<MemoryAccess
*, 2> Loads
;
1213 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1215 // First collect candidate load-store reduction chains by iterating over all
1216 // stores and collecting possible reduction loads.
1217 for (MemoryAccess
*StoreMA
: Stmt
) {
1218 if (StoreMA
->isRead())
1222 collectCandidateReductionLoads(StoreMA
, Loads
);
1223 for (MemoryAccess
*LoadMA
: Loads
)
1224 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1227 // Then check each possible candidate pair.
1228 for (const auto &CandidatePair
: Candidates
) {
1230 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
1231 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
1233 // Skip those with obviously unequal base addresses.
1234 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
1238 // And check if the remaining for overlap with other memory accesses.
1239 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
1240 AllAccsRel
= AllAccsRel
.intersect_domain(Stmt
.getDomain());
1241 isl::set AllAccs
= AllAccsRel
.range();
1243 for (MemoryAccess
*MA
: Stmt
) {
1244 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1248 MA
->getAccessRelation().intersect_domain(Stmt
.getDomain());
1249 isl::set Accs
= AccRel
.range();
1251 if (AllAccs
.has_equal_space(Accs
)) {
1252 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
1253 Valid
= Valid
&& OverlapAccs
.is_empty();
1260 const LoadInst
*Load
=
1261 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1262 MemoryAccess::ReductionType RT
=
1263 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1265 // If no overlapping access was found we mark the load and store as
1267 CandidatePair
.first
->markAsReductionLike(RT
);
1268 CandidatePair
.second
->markAsReductionLike(RT
);
1272 void ScopBuilder::collectCandidateReductionLoads(
1273 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1274 ScopStmt
*Stmt
= StoreMA
->getStatement();
1276 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1280 // Skip if there is not one binary operator between the load and the store
1281 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1285 // Skip if the binary operators has multiple uses
1286 if (BinOp
->getNumUses() != 1)
1289 // Skip if the opcode of the binary operator is not commutative/associative
1290 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1293 // Skip if the binary operator is outside the current SCoP
1294 if (BinOp
->getParent() != Store
->getParent())
1297 // Skip if it is a multiplicative reduction and we disabled them
1298 if (DisableMultiplicativeReductions
&&
1299 (BinOp
->getOpcode() == Instruction::Mul
||
1300 BinOp
->getOpcode() == Instruction::FMul
))
1303 // Check the binary operator operands for a candidate load
1304 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1305 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1306 if (!PossibleLoad0
&& !PossibleLoad1
)
1309 // A load is only a candidate if it cannot escape (thus has only this use)
1310 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1311 if (PossibleLoad0
->getParent() == Store
->getParent())
1312 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad0
));
1313 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1314 if (PossibleLoad1
->getParent() == Store
->getParent())
1315 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad1
));
1318 void ScopBuilder::buildAccessRelations(ScopStmt
&Stmt
) {
1319 for (MemoryAccess
*Access
: Stmt
.MemAccs
) {
1320 Type
*ElementType
= Access
->getElementType();
1323 if (Access
->isPHIKind())
1324 Ty
= MemoryKind::PHI
;
1325 else if (Access
->isExitPHIKind())
1326 Ty
= MemoryKind::ExitPHI
;
1327 else if (Access
->isValueKind())
1328 Ty
= MemoryKind::Value
;
1330 Ty
= MemoryKind::Array
;
1332 auto *SAI
= scop
->getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1333 ElementType
, Access
->Sizes
, Ty
);
1334 Access
->buildAccessRelation(SAI
);
1335 scop
->addAccessData(Access
);
1340 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
1341 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
1342 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
1343 assert(PhysUse
.getKind() == VirtUse
.getKind());
1346 /// Check the consistency of every statement's MemoryAccesses.
1348 /// The check is carried out by expecting the "physical" kind of use (derived
1349 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1350 /// kind of use (derived from a statement's MemoryAccess).
1352 /// The "physical" uses are taken by ensureValueRead to determine whether to
1353 /// create MemoryAccesses. When done, the kind of scalar access should be the
1354 /// same no matter which way it was derived.
1356 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1357 /// can intentionally influence on the kind of uses (not corresponding to the
1358 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1359 /// to pick up the virtual uses. But here in the code generator, this has not
1360 /// happened yet, such that virtual and physical uses are equivalent.
1361 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
1362 for (auto *BB
: S
->getRegion().blocks()) {
1363 for (auto &Inst
: *BB
) {
1364 auto *Stmt
= S
->getStmtFor(&Inst
);
1368 if (isIgnoredIntrinsic(&Inst
))
1371 // Branch conditions are encoded in the statement domains.
1372 if (isa
<TerminatorInst
>(&Inst
) && Stmt
->isBlockStmt())
1376 for (auto &Op
: Inst
.operands())
1377 verifyUse(S
, Op
, LI
);
1379 // Stores do not produce values used by other statements.
1380 if (isa
<StoreInst
>(Inst
))
1383 // For every value defined in the block, also check that a use of that
1384 // value in the same statement would not be an inter-statement use. It can
1385 // still be synthesizable or load-hoisted, but these kind of instructions
1386 // are not directly copied in code-generation.
1388 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
1389 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
1390 VirtDef
.getKind() == VirtualUse::Intra
||
1391 VirtDef
.getKind() == VirtualUse::Hoisted
);
1395 if (S
->hasSingleExitEdge())
1398 // PHINodes in the SCoP region's exit block are also uses to be checked.
1399 if (!S
->getRegion().isTopLevelRegion()) {
1400 for (auto &Inst
: *S
->getRegion().getExit()) {
1401 if (!isa
<PHINode
>(Inst
))
1404 for (auto &Op
: Inst
.operands())
1405 verifyUse(S
, Op
, LI
);
1411 /// Return the block that is the representing block for @p RN.
1412 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
1413 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
1414 : RN
->getNodeAs
<BasicBlock
>();
1417 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
,
1418 OptimizationRemarkEmitter
&ORE
) {
1419 scop
.reset(new Scop(R
, SE
, LI
, DT
, *SD
.getDetectionContext(&R
), ORE
));
1423 // Create all invariant load instructions first. These are categorized as
1424 // 'synthesizable', therefore are not part of any ScopStmt but need to be
1425 // created somewhere.
1426 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
1427 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
1428 if (isErrorBlock(*BB
, scop
->getRegion(), LI
, DT
))
1431 for (Instruction
&Inst
: *BB
) {
1432 LoadInst
*Load
= dyn_cast
<LoadInst
>(&Inst
);
1436 if (!RIL
.count(Load
))
1439 // Invariant loads require a MemoryAccess to be created in some statement.
1440 // It is not important to which statement the MemoryAccess is added
1441 // because it will later be removed from the ScopStmt again. We chose the
1442 // first statement of the basic block the LoadInst is in.
1443 ArrayRef
<ScopStmt
*> List
= scop
->getStmtListFor(BB
);
1444 assert(!List
.empty());
1445 ScopStmt
*RILStmt
= List
.front();
1446 buildMemoryAccess(Load
, RILStmt
);
1449 buildAccessFunctions();
1451 // In case the region does not have an exiting block we will later (during
1452 // code generation) split the exit block. This will move potential PHI nodes
1453 // from the current exit block into the new region exiting block. Hence, PHI
1454 // nodes that are at this point not part of the region will be.
1455 // To handle these PHI nodes later we will now model their operands as scalar
1456 // accesses. Note that we do not model anything in the exit block if we have
1457 // an exiting block in the region, as there will not be any splitting later.
1458 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge()) {
1459 for (Instruction
&Inst
: *R
.getExit()) {
1460 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
1464 buildPHIAccesses(nullptr, PHI
, nullptr, true);
1468 // Create memory accesses for global reads since all arrays are now known.
1469 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
1470 for (auto GlobalReadPair
: GlobalReads
) {
1471 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
1472 Instruction
*GlobalRead
= GlobalReadPair
.second
;
1473 for (auto *BP
: ArrayBasePointers
)
1474 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
1475 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
1478 scop
->buildInvariantEquivalenceClasses();
1480 /// A map from basic blocks to their invalid domains.
1481 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
1483 if (!scop
->buildDomains(&R
, DT
, LI
, InvalidDomainMap
)) {
1484 DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1488 scop
->addUserAssumptions(AC
, DT
, LI
, InvalidDomainMap
);
1490 // Initialize the invalid domain.
1491 for (ScopStmt
&Stmt
: scop
->Stmts
)
1492 if (Stmt
.isBlockStmt())
1493 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()]);
1495 Stmt
.setInvalidDomain(InvalidDomainMap
[getRegionNodeBasicBlock(
1496 Stmt
.getRegion()->getNode())]);
1498 // Remove empty statements.
1499 // Exit early in case there are no executable statements left in this scop.
1500 scop
->removeStmtNotInDomainMap();
1501 scop
->simplifySCoP(false);
1502 if (scop
->isEmpty()) {
1503 DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1507 // The ScopStmts now have enough information to initialize themselves.
1508 for (ScopStmt
&Stmt
: *scop
) {
1510 collectSurroundingLoops(Stmt
);
1511 buildAccessRelations(Stmt
);
1513 if (DetectReductions
)
1514 checkForReductions(Stmt
);
1517 // Check early for a feasible runtime context.
1518 if (!scop
->hasFeasibleRuntimeContext()) {
1519 DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1523 // Check early for profitability. Afterwards it cannot change anymore,
1524 // only the runtime context could become infeasible.
1525 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
1526 scop
->invalidate(PROFITABLE
, DebugLoc());
1527 DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1531 scop
->buildSchedule(LI
);
1533 scop
->finalizeAccesses();
1535 scop
->realignParams();
1536 scop
->addUserContext();
1538 // After the context was fully constructed, thus all our knowledge about
1539 // the parameters is in there, we add all recorded assumptions to the
1540 // assumed/invalid context.
1541 scop
->addRecordedAssumptions();
1543 scop
->simplifyContexts();
1544 if (!scop
->buildAliasChecks(AA
)) {
1545 DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1549 scop
->hoistInvariantLoads();
1550 scop
->canonicalizeDynamicBasePtrs();
1551 scop
->verifyInvariantLoads();
1552 scop
->simplifySCoP(true);
1554 // Check late for a feasible runtime context because profitability did not
1556 if (!scop
->hasFeasibleRuntimeContext()) {
1557 DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1562 verifyUses(scop
.get(), LI
, DT
);
1566 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AliasAnalysis
&AA
,
1567 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
1568 ScopDetection
&SD
, ScalarEvolution
&SE
,
1569 OptimizationRemarkEmitter
&ORE
)
1570 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
) {
1572 auto P
= getBBPairForRegion(R
);
1573 getDebugLocations(P
, Beg
, End
);
1575 std::string Msg
= "SCoP begins here.";
1576 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
1579 buildScop(*R
, AC
, ORE
);
1581 DEBUG(dbgs() << *scop
);
1583 if (!scop
->hasFeasibleRuntimeContext()) {
1585 Msg
= "SCoP ends here but was dismissed.";
1586 DEBUG(dbgs() << "SCoP detected but dismissed\n");
1589 Msg
= "SCoP ends here.";
1591 if (scop
->getMaxLoopDepth() > 0)
1595 if (R
->isTopLevelRegion())
1596 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
1599 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)