1 //===- ScopBuilder.cpp ----------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopDetection.h"
20 #include "polly/ScopDetectionDiagnostic.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "polly/Support/VirtualInstruction.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/EquivalenceClasses.h"
30 #include "llvm/ADT/SetVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/RegionIterator.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/IntrinsicInst.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Use.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CommandLine.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
67 using namespace polly
;
69 #define DEBUG_TYPE "polly-scops"
71 STATISTIC(ScopFound
, "Number of valid Scops");
72 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
73 STATISTIC(InfeasibleScops
,
74 "Number of SCoPs with statically infeasible context.");
76 bool polly::ModelReadOnlyScalars
;
78 static cl::opt
<bool, true> XModelReadOnlyScalars(
79 "polly-analyze-read-only-scalars",
80 cl::desc("Model read-only scalar values in the scop description"),
81 cl::location(ModelReadOnlyScalars
), cl::Hidden
, cl::ZeroOrMore
,
82 cl::init(true), cl::cat(PollyCategory
));
84 static cl::opt
<bool> UnprofitableScalarAccs(
85 "polly-unprofitable-scalar-accs",
86 cl::desc("Count statements with scalar accesses as not optimizable"),
87 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
89 static cl::opt
<bool> DetectFortranArrays(
90 "polly-detect-fortran-arrays",
91 cl::desc("Detect Fortran arrays and use this for code generation"),
92 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
94 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
95 cl::desc("Detect and exploit reductions"),
96 cl::Hidden
, cl::ZeroOrMore
,
97 cl::init(true), cl::cat(PollyCategory
));
99 // Multiplicative reductions can be disabled separately as these kind of
100 // operations can overflow easily. Additive reductions and bit operations
101 // are in contrast pretty stable.
102 static cl::opt
<bool> DisableMultiplicativeReductions(
103 "polly-disable-multiplicative-reductions",
104 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
105 cl::init(false), cl::cat(PollyCategory
));
107 enum class GranularityChoice
{ BasicBlocks
, ScalarIndependence
, Stores
};
109 static cl::opt
<GranularityChoice
> StmtGranularity(
110 "polly-stmt-granularity",
112 "Algorithm to use for splitting basic blocks into multiple statements"),
113 cl::values(clEnumValN(GranularityChoice::BasicBlocks
, "bb",
114 "One statement per basic block"),
115 clEnumValN(GranularityChoice::ScalarIndependence
, "scalar-indep",
116 "Scalar independence heuristic"),
117 clEnumValN(GranularityChoice::Stores
, "store",
118 "Store-level granularity")),
119 cl::init(GranularityChoice::BasicBlocks
), cl::cat(PollyCategory
));
121 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
122 Region
*NonAffineSubRegion
,
124 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
125 // true, are not modeled as ordinary PHI nodes as they are not part of the
126 // region. However, we model the operands in the predecessor blocks that are
127 // part of the region as regular scalar accesses.
129 // If we can synthesize a PHI we can skip it, however only if it is in
130 // the region. If it is not it can only be in the exit block of the region.
131 // In this case we model the operands but not the PHI itself.
132 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
133 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
136 // PHI nodes are modeled as if they had been demoted prior to the SCoP
137 // detection. Hence, the PHI is a load of a new memory location in which the
138 // incoming value was written at the end of the incoming basic block.
139 bool OnlyNonAffineSubRegionOperands
= true;
140 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
141 Value
*Op
= PHI
->getIncomingValue(u
);
142 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
143 ScopStmt
*OpStmt
= scop
->getIncomingStmtFor(PHI
->getOperandUse(u
));
145 // Do not build PHI dependences inside a non-affine subregion, but make
146 // sure that the necessary scalar values are still made available.
147 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
148 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
149 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
150 ensureValueRead(Op
, OpStmt
);
154 OnlyNonAffineSubRegionOperands
= false;
155 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
158 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
159 addPHIReadAccess(PHIStmt
, PHI
);
163 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
165 assert(!isa
<PHINode
>(Inst
));
167 // Pull-in required operands.
168 for (Use
&Op
: Inst
->operands())
169 ensureValueRead(Op
.get(), UserStmt
);
172 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
173 // Check for uses of this instruction outside the scop. Because we do not
174 // iterate over such instructions and therefore did not "ensure" the existence
175 // of a write, we must determine such use here.
176 if (scop
->isEscaping(Inst
))
177 ensureValueWrite(Inst
);
180 /// Check that a value is a Fortran Array descriptor.
182 /// We check if V has the following structure:
183 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
184 /// [<num> x %struct.descriptor_dimension] }
187 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
189 /// 1. V's type name starts with "struct.array"
190 /// 2. V's type has layout as shown.
191 /// 3. Final member of V's type has name "struct.descriptor_dimension",
192 /// 4. "struct.descriptor_dimension" has layout as shown.
193 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
195 /// We are interested in such types since this is the code that dragonegg
196 /// generates for Fortran array descriptors.
198 /// @param V the Value to be checked.
200 /// @returns True if V is a Fortran array descriptor, False otherwise.
201 bool isFortranArrayDescriptor(Value
*V
) {
202 PointerType
*PTy
= dyn_cast
<PointerType
>(V
->getType());
207 Type
*Ty
= PTy
->getElementType();
208 assert(Ty
&& "Ty expected to be initialized");
209 auto *StructArrTy
= dyn_cast
<StructType
>(Ty
);
211 if (!(StructArrTy
&& StructArrTy
->hasName()))
214 if (!StructArrTy
->getName().startswith("struct.array"))
217 if (StructArrTy
->getNumElements() != 4)
220 const ArrayRef
<Type
*> ArrMemberTys
= StructArrTy
->elements();
223 if (ArrMemberTys
[0] != Type::getInt8PtrTy(V
->getContext()))
226 // Get a reference to the int type and check that all the members
227 // share the same int type
228 Type
*IntTy
= ArrMemberTys
[1];
229 if (ArrMemberTys
[2] != IntTy
)
232 // type: [<num> x %struct.descriptor_dimension]
233 ArrayType
*DescriptorDimArrayTy
= dyn_cast
<ArrayType
>(ArrMemberTys
[3]);
234 if (!DescriptorDimArrayTy
)
237 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
238 StructType
*DescriptorDimTy
=
239 dyn_cast
<StructType
>(DescriptorDimArrayTy
->getElementType());
241 if (!(DescriptorDimTy
&& DescriptorDimTy
->hasName()))
244 if (DescriptorDimTy
->getName() != "struct.descriptor_dimension")
247 if (DescriptorDimTy
->getNumElements() != 3)
250 for (auto MemberTy
: DescriptorDimTy
->elements()) {
251 if (MemberTy
!= IntTy
)
258 Value
*ScopBuilder::findFADAllocationVisible(MemAccInst Inst
) {
259 // match: 4.1 & 4.2 store/load
260 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
264 if (Inst
.getAlignment() != 8)
267 Value
*Address
= Inst
.getPointerOperand();
269 const BitCastInst
*Bitcast
= nullptr;
271 if (auto *Slot
= dyn_cast
<GetElementPtrInst
>(Address
)) {
272 Value
*TypedMem
= Slot
->getPointerOperand();
274 Bitcast
= dyn_cast
<BitCastInst
>(TypedMem
);
277 Bitcast
= dyn_cast
<BitCastInst
>(Address
);
283 auto *MallocMem
= Bitcast
->getOperand(0);
286 auto *MallocCall
= dyn_cast
<CallInst
>(MallocMem
);
290 Function
*MallocFn
= MallocCall
->getCalledFunction();
291 if (!(MallocFn
&& MallocFn
->hasName() && MallocFn
->getName() == "malloc"))
294 // Find all uses the malloc'd memory.
295 // We are looking for a "store" into a struct with the type being the Fortran
297 for (auto user
: MallocMem
->users()) {
299 auto *MallocStore
= dyn_cast
<StoreInst
>(user
);
303 auto *DescriptorGEP
=
304 dyn_cast
<GEPOperator
>(MallocStore
->getPointerOperand());
309 auto DescriptorType
=
310 dyn_cast
<StructType
>(DescriptorGEP
->getSourceElementType());
311 if (!(DescriptorType
&& DescriptorType
->hasName()))
314 Value
*Descriptor
= dyn_cast
<Value
>(DescriptorGEP
->getPointerOperand());
319 if (!isFortranArrayDescriptor(Descriptor
))
328 Value
*ScopBuilder::findFADAllocationInvisible(MemAccInst Inst
) {
330 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
333 Value
*Slot
= Inst
.getPointerOperand();
335 LoadInst
*MemLoad
= nullptr;
337 if (auto *SlotGEP
= dyn_cast
<GetElementPtrInst
>(Slot
)) {
339 MemLoad
= dyn_cast
<LoadInst
>(SlotGEP
->getPointerOperand());
342 MemLoad
= dyn_cast
<LoadInst
>(Slot
);
348 auto *BitcastOperator
=
349 dyn_cast
<BitCastOperator
>(MemLoad
->getPointerOperand());
350 if (!BitcastOperator
)
353 Value
*Descriptor
= dyn_cast
<Value
>(BitcastOperator
->getOperand(0));
357 if (!isFortranArrayDescriptor(Descriptor
))
363 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
364 Value
*Val
= Inst
.getValueOperand();
365 Type
*ElementType
= Val
->getType();
366 Value
*Address
= Inst
.getPointerOperand();
367 const SCEV
*AccessFunction
=
368 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
369 const SCEVUnknown
*BasePointer
=
370 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
371 enum MemoryAccess::AccessType AccType
=
372 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
374 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
)) {
375 auto *Src
= BitCast
->getOperand(0);
376 auto *SrcTy
= Src
->getType();
377 auto *DstTy
= BitCast
->getType();
378 // Do not try to delinearize non-sized (opaque) pointers.
379 if ((SrcTy
->isPointerTy() && !SrcTy
->getPointerElementType()->isSized()) ||
380 (DstTy
->isPointerTy() && !DstTy
->getPointerElementType()->isSized())) {
383 if (SrcTy
->isPointerTy() && DstTy
->isPointerTy() &&
384 DL
.getTypeAllocSize(SrcTy
->getPointerElementType()) ==
385 DL
.getTypeAllocSize(DstTy
->getPointerElementType()))
389 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
393 std::vector
<const SCEV
*> Subscripts
;
394 std::vector
<int> Sizes
;
395 std::tie(Subscripts
, Sizes
) = getIndexExpressionsFromGEP(GEP
, SE
);
396 auto *BasePtr
= GEP
->getOperand(0);
398 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
399 BasePtr
= BasePtrCast
->getOperand(0);
401 // Check for identical base pointers to ensure that we do not miss index
402 // offsets that have been added before this GEP is applied.
403 if (BasePtr
!= BasePointer
->getValue())
406 std::vector
<const SCEV
*> SizesSCEV
;
408 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
410 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
411 for (auto *Subscript
: Subscripts
) {
412 InvariantLoadsSetTy AccessILS
;
413 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
417 for (LoadInst
*LInst
: AccessILS
)
418 if (!ScopRIL
.count(LInst
))
425 SizesSCEV
.push_back(nullptr);
428 SizesSCEV
.push_back(SE
.getSCEV(
429 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
431 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
432 true, Subscripts
, SizesSCEV
, Val
);
436 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
437 if (!PollyDelinearize
)
440 Value
*Address
= Inst
.getPointerOperand();
441 Value
*Val
= Inst
.getValueOperand();
442 Type
*ElementType
= Val
->getType();
443 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
444 enum MemoryAccess::AccessType AccType
=
445 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
447 const SCEV
*AccessFunction
=
448 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
449 const SCEVUnknown
*BasePointer
=
450 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
452 assert(BasePointer
&& "Could not find base pointer");
454 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
455 auto AccItr
= InsnToMemAcc
.find(Inst
);
456 if (AccItr
== InsnToMemAcc
.end())
459 std::vector
<const SCEV
*> Sizes
= {nullptr};
461 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
462 AccItr
->second
.Shape
->DelinearizedSizes
.end());
464 // In case only the element size is contained in the 'Sizes' array, the
465 // access does not access a real multi-dimensional array. Hence, we allow
466 // the normal single-dimensional access construction to handle this.
467 if (Sizes
.size() == 1)
470 // Remove the element size. This information is already provided by the
471 // ElementSize parameter. In case the element size of this access and the
472 // element size used for delinearization differs the delinearization is
473 // incorrect. Hence, we invalidate the scop.
475 // TODO: Handle delinearization with differing element sizes.
476 auto DelinearizedSize
=
477 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
479 if (ElementSize
!= DelinearizedSize
)
480 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
482 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
483 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
487 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
488 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
490 if (MemIntr
== nullptr)
493 auto *L
= LI
.getLoopFor(Inst
->getParent());
494 auto *LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
497 // Check if the length val is actually affine or if we overapproximate it
498 InvariantLoadsSetTy AccessILS
;
499 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
501 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
502 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
503 LengthVal
, SE
, &AccessILS
);
504 for (LoadInst
*LInst
: AccessILS
)
505 if (!ScopRIL
.count(LInst
))
506 LengthIsAffine
= false;
510 auto *DestPtrVal
= MemIntr
->getDest();
513 auto *DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
515 // Ignore accesses to "NULL".
516 // TODO: We could use this to optimize the region further, e.g., intersect
518 // isl_set_complement(isl_set_params(getDomain()))
519 // as we know it would be undefined to execute this instruction anyway.
520 if (DestAccFunc
->isZero())
523 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
525 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
526 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
527 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
528 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
529 Inst
.getValueOperand());
531 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
535 auto *SrcPtrVal
= MemTrans
->getSource();
538 auto *SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
540 // Ignore accesses to "NULL".
541 // TODO: See above TODO
542 if (SrcAccFunc
->isZero())
545 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
547 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
548 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
549 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
550 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
551 Inst
.getValueOperand());
556 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
557 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
562 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
))
565 bool ReadOnly
= false;
566 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
567 auto *CalledFunction
= CI
->getCalledFunction();
568 switch (AA
.getModRefBehavior(CalledFunction
)) {
569 case FMRB_UnknownModRefBehavior
:
570 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
571 case FMRB_DoesNotAccessMemory
:
573 case FMRB_DoesNotReadMemory
:
574 case FMRB_OnlyAccessesInaccessibleMem
:
575 case FMRB_OnlyAccessesInaccessibleOrArgMem
:
577 case FMRB_OnlyReadsMemory
:
578 GlobalReads
.emplace_back(Stmt
, CI
);
580 case FMRB_OnlyReadsArgumentPointees
:
583 case FMRB_OnlyAccessesArgumentPointees
: {
584 auto AccType
= ReadOnly
? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
585 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
586 for (const auto &Arg
: CI
->arg_operands()) {
587 if (!Arg
->getType()->isPointerTy())
590 auto *ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
591 if (ArgSCEV
->isZero())
594 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
595 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
596 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
605 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
606 Value
*Address
= Inst
.getPointerOperand();
607 Value
*Val
= Inst
.getValueOperand();
608 Type
*ElementType
= Val
->getType();
609 enum MemoryAccess::AccessType AccType
=
610 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
612 const SCEV
*AccessFunction
=
613 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
614 const SCEVUnknown
*BasePointer
=
615 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
617 assert(BasePointer
&& "Could not find base pointer");
618 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
620 // Check if the access depends on a loop contained in a non-affine subregion.
621 bool isVariantInNonAffineLoop
= false;
622 SetVector
<const Loop
*> Loops
;
623 findLoops(AccessFunction
, Loops
);
624 for (const Loop
*L
: Loops
)
625 if (Stmt
->contains(L
)) {
626 isVariantInNonAffineLoop
= true;
630 InvariantLoadsSetTy AccessILS
;
632 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
633 bool IsAffine
= !isVariantInNonAffineLoop
&&
634 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
635 AccessFunction
, SE
, &AccessILS
);
637 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
638 for (LoadInst
*LInst
: AccessILS
)
639 if (!ScopRIL
.count(LInst
))
642 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
643 AccType
= MemoryAccess::MAY_WRITE
;
645 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
646 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
649 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
650 if (buildAccessMemIntrinsic(Inst
, Stmt
))
653 if (buildAccessCallInst(Inst
, Stmt
))
656 if (buildAccessMultiDimFixed(Inst
, Stmt
))
659 if (buildAccessMultiDimParam(Inst
, Stmt
))
662 buildAccessSingleDim(Inst
, Stmt
);
665 void ScopBuilder::buildAccessFunctions() {
666 for (auto &Stmt
: *scop
) {
667 if (Stmt
.isBlockStmt()) {
668 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
672 Region
*R
= Stmt
.getRegion();
673 for (BasicBlock
*BB
: R
->blocks())
674 buildAccessFunctions(&Stmt
, *BB
, R
);
677 // Build write accesses for values that are used after the SCoP.
678 // The instructions defining them might be synthesizable and therefore not
679 // contained in any statement, hence we iterate over the original instructions
680 // to identify all escaping values.
681 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
682 for (Instruction
&Inst
: *BB
)
683 buildEscapingDependences(&Inst
);
687 bool ScopBuilder::shouldModelInst(Instruction
*Inst
, Loop
*L
) {
688 return !isa
<TerminatorInst
>(Inst
) && !isIgnoredIntrinsic(Inst
) &&
689 !canSynthesize(Inst
, *scop
, &SE
, L
);
692 /// Generate a name for a statement.
694 /// @param BB The basic block the statement will represent.
695 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
696 /// @param Count The index of the created statement in @p BB.
697 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
698 /// no suffix will be added.
699 /// @param IsLast Uses a special indicator for the last statement of a BB.
700 static std::string
makeStmtName(BasicBlock
*BB
, long BBIdx
, int Count
,
701 bool IsMain
, bool IsLast
= false) {
704 if (UseInstructionNames
)
709 Suffix
+= 'a' + Count
;
711 Suffix
+= std::to_string(Count
);
713 return getIslCompatibleName("Stmt", BB
, BBIdx
, Suffix
, UseInstructionNames
);
716 /// Generate a name for a statement that represents a non-affine subregion.
718 /// @param R The region the statement will represent.
719 /// @param RIdx The index of the @p R relative to other BBs/regions.
720 static std::string
makeStmtName(Region
*R
, long RIdx
) {
721 return getIslCompatibleName("Stmt", R
->getNameStr(), RIdx
, "",
722 UseInstructionNames
);
725 void ScopBuilder::buildSequentialBlockStmts(BasicBlock
*BB
, bool SplitOnStore
) {
726 Loop
*SurroundingLoop
= LI
.getLoopFor(BB
);
729 long BBIdx
= scop
->getNextStmtIdx();
730 std::vector
<Instruction
*> Instructions
;
731 for (Instruction
&Inst
: *BB
) {
732 if (shouldModelInst(&Inst
, SurroundingLoop
))
733 Instructions
.push_back(&Inst
);
734 if (Inst
.getMetadata("polly_split_after") ||
735 (SplitOnStore
&& isa
<StoreInst
>(Inst
))) {
736 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
737 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
739 Instructions
.clear();
743 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
744 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
747 /// Is @p Inst an ordered instruction?
749 /// An unordered instruction is an instruction, such that a sequence of
750 /// unordered instructions can be permuted without changing semantics. Any
751 /// instruction for which this is not always the case is ordered.
752 static bool isOrderedInstruction(Instruction
*Inst
) {
753 return Inst
->mayHaveSideEffects() || Inst
->mayReadOrWriteMemory();
756 /// Join instructions to the same statement if one uses the scalar result of the
758 static void joinOperandTree(EquivalenceClasses
<Instruction
*> &UnionFind
,
759 ArrayRef
<Instruction
*> ModeledInsts
) {
760 for (Instruction
*Inst
: ModeledInsts
) {
761 if (isa
<PHINode
>(Inst
))
764 for (Use
&Op
: Inst
->operands()) {
765 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
.get());
769 // Check if OpInst is in the BB and is a modeled instruction.
770 auto OpVal
= UnionFind
.findValue(OpInst
);
771 if (OpVal
== UnionFind
.end())
774 UnionFind
.unionSets(Inst
, OpInst
);
779 /// Ensure that the order of ordered instructions does not change.
781 /// If we encounter an ordered instruction enclosed in instructions belonging to
782 /// a different statement (which might as well contain ordered instructions, but
783 /// this is not tested here), join them.
785 joinOrderedInstructions(EquivalenceClasses
<Instruction
*> &UnionFind
,
786 ArrayRef
<Instruction
*> ModeledInsts
) {
787 SetVector
<Instruction
*> SeenLeaders
;
788 for (Instruction
*Inst
: ModeledInsts
) {
789 if (!isOrderedInstruction(Inst
))
792 Instruction
*Leader
= UnionFind
.getLeaderValue(Inst
);
793 bool Inserted
= SeenLeaders
.insert(Leader
);
797 // Merge statements to close holes. Say, we have already seen statements A
798 // and B, in this order. Then we see an instruction of A again and we would
799 // see the pattern "A B A". This function joins all statements until the
800 // only seen occurrence of A.
801 for (Instruction
*Prev
: reverse(SeenLeaders
)) {
802 // Items added to 'SeenLeaders' are leaders, but may have lost their
803 // leadership status when merged into another statement.
804 Instruction
*PrevLeader
= UnionFind
.getLeaderValue(SeenLeaders
.back());
805 if (PrevLeader
== Leader
)
807 UnionFind
.unionSets(Prev
, Leader
);
812 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock
*BB
) {
813 Loop
*L
= LI
.getLoopFor(BB
);
815 // Extracting out modeled instructions saves us from checking
816 // shouldModelInst() repeatedly.
817 SmallVector
<Instruction
*, 32> ModeledInsts
;
818 EquivalenceClasses
<Instruction
*> UnionFind
;
819 Instruction
*MainInst
= nullptr;
820 for (Instruction
&Inst
: *BB
) {
821 if (!shouldModelInst(&Inst
, L
))
823 ModeledInsts
.push_back(&Inst
);
824 UnionFind
.insert(&Inst
);
826 // When a BB is split into multiple statements, the main statement is the
827 // one containing the 'main' instruction. We select the first instruction
828 // that is unlikely to be removed (because it has side-effects) as the main
829 // one. It is used to ensure that at least one statement from the bb has the
830 // same name as with -polly-stmt-granularity=bb.
831 if (!MainInst
&& (isa
<StoreInst
>(Inst
) ||
832 (isa
<CallInst
>(Inst
) && !isa
<IntrinsicInst
>(Inst
))))
836 joinOperandTree(UnionFind
, ModeledInsts
);
837 joinOrderedInstructions(UnionFind
, ModeledInsts
);
839 // The list of instructions for statement (statement represented by the leader
840 // instruction). The order of statements instructions is reversed such that
841 // the epilogue is first. This makes it easier to ensure that the epilogue is
842 // the last statement.
843 MapVector
<Instruction
*, std::vector
<Instruction
*>> LeaderToInstList
;
845 // Collect the instructions of all leaders. UnionFind's member iterator
846 // unfortunately are not in any specific order.
847 for (Instruction
&Inst
: reverse(*BB
)) {
848 auto LeaderIt
= UnionFind
.findLeader(&Inst
);
849 if (LeaderIt
== UnionFind
.member_end())
852 std::vector
<Instruction
*> &InstList
= LeaderToInstList
[*LeaderIt
];
853 InstList
.push_back(&Inst
);
856 // Finally build the statements.
858 long BBIdx
= scop
->getNextStmtIdx();
859 for (auto &Instructions
: reverse(LeaderToInstList
)) {
860 std::vector
<Instruction
*> &InstList
= Instructions
.second
;
862 // If there is no main instruction, make the first statement the main.
865 IsMain
= std::find(InstList
.begin(), InstList
.end(), MainInst
) !=
868 IsMain
= (Count
== 0);
870 std::reverse(InstList
.begin(), InstList
.end());
871 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, IsMain
);
872 scop
->addScopStmt(BB
, Name
, L
, std::move(InstList
));
876 // Unconditionally add an epilogue (last statement). It contains no
877 // instructions, but holds the PHI write accesses for successor basic blocks,
878 // if the incoming value is not defined in another statement if the same BB.
879 // The epilogue will be removed if no PHIWrite is added to it.
880 std::string EpilogueName
= makeStmtName(BB
, BBIdx
, Count
, false, true);
881 scop
->addScopStmt(BB
, EpilogueName
, L
, {});
884 void ScopBuilder::buildStmts(Region
&SR
) {
885 if (scop
->isNonAffineSubRegion(&SR
)) {
886 std::vector
<Instruction
*> Instructions
;
887 Loop
*SurroundingLoop
=
888 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
889 for (Instruction
&Inst
: *SR
.getEntry())
890 if (shouldModelInst(&Inst
, SurroundingLoop
))
891 Instructions
.push_back(&Inst
);
892 long RIdx
= scop
->getNextStmtIdx();
893 std::string Name
= makeStmtName(&SR
, RIdx
);
894 scop
->addScopStmt(&SR
, Name
, SurroundingLoop
, Instructions
);
898 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
899 if (I
->isSubRegion())
900 buildStmts(*I
->getNodeAs
<Region
>());
902 BasicBlock
*BB
= I
->getNodeAs
<BasicBlock
>();
903 switch (StmtGranularity
) {
904 case GranularityChoice::BasicBlocks
:
905 buildSequentialBlockStmts(BB
);
907 case GranularityChoice::ScalarIndependence
:
908 buildEqivClassBlockStmts(BB
);
910 case GranularityChoice::Stores
:
911 buildSequentialBlockStmts(BB
, true);
917 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
918 Region
*NonAffineSubRegion
) {
921 "The exit BB is the only one that cannot be represented by a statement");
922 assert(Stmt
->represents(&BB
));
924 // We do not build access functions for error blocks, as they may contain
925 // instructions we can not model.
926 if (isErrorBlock(BB
, scop
->getRegion(), LI
, DT
))
929 auto BuildAccessesForInst
= [this, Stmt
,
930 NonAffineSubRegion
](Instruction
*Inst
) {
931 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
933 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, false);
935 if (auto MemInst
= MemAccInst::dyn_cast(*Inst
)) {
936 assert(Stmt
&& "Cannot build access function in non-existing statement");
937 buildMemoryAccess(MemInst
, Stmt
);
940 // PHI nodes have already been modeled above and TerminatorInsts that are
941 // not part of a non-affine subregion are fully modeled and regenerated
942 // from the polyhedral domains. Hence, they do not need to be modeled as
943 // explicit data dependences.
945 buildScalarDependences(Stmt
, Inst
);
948 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
949 bool IsEntryBlock
= (Stmt
->getEntryBlock() == &BB
);
951 for (Instruction
*Inst
: Stmt
->getInstructions())
952 BuildAccessesForInst(Inst
);
953 if (Stmt
->isRegionStmt())
954 BuildAccessesForInst(BB
.getTerminator());
956 for (Instruction
&Inst
: BB
) {
957 if (isIgnoredIntrinsic(&Inst
))
960 // Invariant loads already have been processed.
961 if (isa
<LoadInst
>(Inst
) && RIL
.count(cast
<LoadInst
>(&Inst
)))
964 BuildAccessesForInst(&Inst
);
969 MemoryAccess
*ScopBuilder::addMemoryAccess(
970 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
971 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
972 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
974 bool isKnownMustAccess
= false;
976 // Accesses in single-basic block statements are always executed.
977 if (Stmt
->isBlockStmt())
978 isKnownMustAccess
= true;
980 if (Stmt
->isRegionStmt()) {
981 // Accesses that dominate the exit block of a non-affine region are always
982 // executed. In non-affine regions there may exist MemoryKind::Values that
983 // do not dominate the exit. MemoryKind::Values will always dominate the
984 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
985 // non-affine region.
986 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
987 isKnownMustAccess
= true;
990 // Non-affine PHI writes do not "happen" at a particular instruction, but
991 // after exiting the statement. Therefore they are guaranteed to execute and
992 // overwrite the old value.
993 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
994 isKnownMustAccess
= true;
996 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
997 AccType
= MemoryAccess::MAY_WRITE
;
999 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
1000 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
1002 scop
->addAccessFunction(Access
);
1003 Stmt
->addAccess(Access
);
1007 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
1008 MemoryAccess::AccessType AccType
,
1009 Value
*BaseAddress
, Type
*ElementType
,
1011 ArrayRef
<const SCEV
*> Subscripts
,
1012 ArrayRef
<const SCEV
*> Sizes
,
1013 Value
*AccessValue
) {
1014 ArrayBasePointers
.insert(BaseAddress
);
1015 auto *MemAccess
= addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
,
1016 ElementType
, IsAffine
, AccessValue
,
1017 Subscripts
, Sizes
, MemoryKind::Array
);
1019 if (!DetectFortranArrays
)
1022 if (Value
*FAD
= findFADAllocationInvisible(MemAccInst
))
1023 MemAccess
->setFortranArrayDescriptor(FAD
);
1024 else if (Value
*FAD
= findFADAllocationVisible(MemAccInst
))
1025 MemAccess
->setFortranArrayDescriptor(FAD
);
1028 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
1029 // Find the statement that defines the value of Inst. That statement has to
1030 // write the value to make it available to those statements that read it.
1031 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
1033 // It is possible that the value is synthesizable within a loop (such that it
1034 // is not part of any statement), but not after the loop (where you need the
1035 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1036 // avoid this. In case the IR has no such PHI, use the last statement (where
1037 // the value is synthesizable) to write the value.
1039 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
1041 // Inst not defined within this SCoP.
1045 // Do not process further if the instruction is already written.
1046 if (Stmt
->lookupValueWriteOf(Inst
))
1049 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
1050 true, Inst
, ArrayRef
<const SCEV
*>(),
1051 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
1054 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
1055 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1056 // to be able to replace this one. Currently, there is a split responsibility.
1057 // In a first step, the MemoryAccess is created, but without the
1058 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1059 // AccessRelation is created. At least for scalar accesses, there is no new
1060 // information available at ScopStmt::buildAccessRelations(), so we could
1061 // create the AccessRelation right away. This is what
1062 // ScopStmt::ensureValueRead(Value*) does.
1064 auto *Scope
= UserStmt
->getSurroundingLoop();
1065 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
1066 switch (VUse
.getKind()) {
1067 case VirtualUse::Constant
:
1068 case VirtualUse::Block
:
1069 case VirtualUse::Synthesizable
:
1070 case VirtualUse::Hoisted
:
1071 case VirtualUse::Intra
:
1072 // Uses of these kinds do not need a MemoryAccess.
1075 case VirtualUse::ReadOnly
:
1076 // Add MemoryAccess for invariant values only if requested.
1077 if (!ModelReadOnlyScalars
)
1081 case VirtualUse::Inter
:
1083 // Do not create another MemoryAccess for reloading the value if one already
1085 if (UserStmt
->lookupValueReadOf(V
))
1088 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1089 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1092 // Inter-statement uses need to write the value in their defining statement.
1094 ensureValueWrite(cast
<Instruction
>(V
));
1099 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
1100 BasicBlock
*IncomingBlock
,
1101 Value
*IncomingValue
, bool IsExitBlock
) {
1102 // As the incoming block might turn out to be an error statement ensure we
1103 // will create an exit PHI SAI object. It is needed during code generation
1104 // and would be created later anyway.
1106 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
1107 MemoryKind::ExitPHI
);
1109 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1110 // from outside the SCoP's region have no statement representation.
1114 // Take care for the incoming value being available in the incoming block.
1115 // This must be done before the check for multiple PHI writes because multiple
1116 // exiting edges from subregion each can be the effective written value of the
1117 // subregion. As such, all of them must be made available in the subregion
1119 ensureValueRead(IncomingValue
, IncomingStmt
);
1121 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1122 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
1123 assert(Acc
->getAccessInstruction() == PHI
);
1124 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
1128 MemoryAccess
*Acc
= addMemoryAccess(
1129 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
1130 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1131 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
1133 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
1136 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
1137 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
1138 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
1142 void ScopBuilder::buildDomain(ScopStmt
&Stmt
) {
1143 isl::id Id
= isl::id::alloc(scop
->getIslCtx(), Stmt
.getBaseName(), &Stmt
);
1145 Stmt
.Domain
= scop
->getDomainConditions(&Stmt
);
1146 Stmt
.Domain
= Stmt
.Domain
.set_tuple_id(Id
);
1149 void ScopBuilder::collectSurroundingLoops(ScopStmt
&Stmt
) {
1150 isl::set Domain
= Stmt
.getDomain();
1151 for (unsigned u
= 0, e
= Domain
.dim(isl::dim::set
); u
< e
; u
++) {
1152 isl::id DimId
= Domain
.get_dim_id(isl::dim::set
, u
);
1153 Stmt
.NestLoops
.push_back(static_cast<Loop
*>(DimId
.get_user()));
1157 /// Return the reduction type for a given binary operator.
1158 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
1159 const Instruction
*Load
) {
1161 return MemoryAccess::RT_NONE
;
1162 switch (BinOp
->getOpcode()) {
1163 case Instruction::FAdd
:
1164 if (!BinOp
->isFast())
1165 return MemoryAccess::RT_NONE
;
1167 case Instruction::Add
:
1168 return MemoryAccess::RT_ADD
;
1169 case Instruction::Or
:
1170 return MemoryAccess::RT_BOR
;
1171 case Instruction::Xor
:
1172 return MemoryAccess::RT_BXOR
;
1173 case Instruction::And
:
1174 return MemoryAccess::RT_BAND
;
1175 case Instruction::FMul
:
1176 if (!BinOp
->isFast())
1177 return MemoryAccess::RT_NONE
;
1179 case Instruction::Mul
:
1180 if (DisableMultiplicativeReductions
)
1181 return MemoryAccess::RT_NONE
;
1182 return MemoryAccess::RT_MUL
;
1184 return MemoryAccess::RT_NONE
;
1188 void ScopBuilder::checkForReductions(ScopStmt
&Stmt
) {
1189 SmallVector
<MemoryAccess
*, 2> Loads
;
1190 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1192 // First collect candidate load-store reduction chains by iterating over all
1193 // stores and collecting possible reduction loads.
1194 for (MemoryAccess
*StoreMA
: Stmt
) {
1195 if (StoreMA
->isRead())
1199 collectCandidateReductionLoads(StoreMA
, Loads
);
1200 for (MemoryAccess
*LoadMA
: Loads
)
1201 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1204 // Then check each possible candidate pair.
1205 for (const auto &CandidatePair
: Candidates
) {
1207 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
1208 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
1210 // Skip those with obviously unequal base addresses.
1211 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
1215 // And check if the remaining for overlap with other memory accesses.
1216 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
1217 AllAccsRel
= AllAccsRel
.intersect_domain(Stmt
.getDomain());
1218 isl::set AllAccs
= AllAccsRel
.range();
1220 for (MemoryAccess
*MA
: Stmt
) {
1221 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1225 MA
->getAccessRelation().intersect_domain(Stmt
.getDomain());
1226 isl::set Accs
= AccRel
.range();
1228 if (AllAccs
.has_equal_space(Accs
)) {
1229 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
1230 Valid
= Valid
&& OverlapAccs
.is_empty();
1237 const LoadInst
*Load
=
1238 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1239 MemoryAccess::ReductionType RT
=
1240 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1242 // If no overlapping access was found we mark the load and store as
1244 CandidatePair
.first
->markAsReductionLike(RT
);
1245 CandidatePair
.second
->markAsReductionLike(RT
);
1249 void ScopBuilder::collectCandidateReductionLoads(
1250 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1251 ScopStmt
*Stmt
= StoreMA
->getStatement();
1253 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1257 // Skip if there is not one binary operator between the load and the store
1258 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1262 // Skip if the binary operators has multiple uses
1263 if (BinOp
->getNumUses() != 1)
1266 // Skip if the opcode of the binary operator is not commutative/associative
1267 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1270 // Skip if the binary operator is outside the current SCoP
1271 if (BinOp
->getParent() != Store
->getParent())
1274 // Skip if it is a multiplicative reduction and we disabled them
1275 if (DisableMultiplicativeReductions
&&
1276 (BinOp
->getOpcode() == Instruction::Mul
||
1277 BinOp
->getOpcode() == Instruction::FMul
))
1280 // Check the binary operator operands for a candidate load
1281 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1282 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1283 if (!PossibleLoad0
&& !PossibleLoad1
)
1286 // A load is only a candidate if it cannot escape (thus has only this use)
1287 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1288 if (PossibleLoad0
->getParent() == Store
->getParent())
1289 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad0
));
1290 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1291 if (PossibleLoad1
->getParent() == Store
->getParent())
1292 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad1
));
1295 void ScopBuilder::buildAccessRelations(ScopStmt
&Stmt
) {
1296 for (MemoryAccess
*Access
: Stmt
.MemAccs
) {
1297 Type
*ElementType
= Access
->getElementType();
1300 if (Access
->isPHIKind())
1301 Ty
= MemoryKind::PHI
;
1302 else if (Access
->isExitPHIKind())
1303 Ty
= MemoryKind::ExitPHI
;
1304 else if (Access
->isValueKind())
1305 Ty
= MemoryKind::Value
;
1307 Ty
= MemoryKind::Array
;
1309 auto *SAI
= scop
->getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1310 ElementType
, Access
->Sizes
, Ty
);
1311 Access
->buildAccessRelation(SAI
);
1312 scop
->addAccessData(Access
);
1317 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
1318 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
1319 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
1320 assert(PhysUse
.getKind() == VirtUse
.getKind());
1323 /// Check the consistency of every statement's MemoryAccesses.
1325 /// The check is carried out by expecting the "physical" kind of use (derived
1326 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1327 /// kind of use (derived from a statement's MemoryAccess).
1329 /// The "physical" uses are taken by ensureValueRead to determine whether to
1330 /// create MemoryAccesses. When done, the kind of scalar access should be the
1331 /// same no matter which way it was derived.
1333 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1334 /// can intentionally influence on the kind of uses (not corresponding to the
1335 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1336 /// to pick up the virtual uses. But here in the code generator, this has not
1337 /// happened yet, such that virtual and physical uses are equivalent.
1338 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
1339 for (auto *BB
: S
->getRegion().blocks()) {
1340 for (auto &Inst
: *BB
) {
1341 auto *Stmt
= S
->getStmtFor(&Inst
);
1345 if (isIgnoredIntrinsic(&Inst
))
1348 // Branch conditions are encoded in the statement domains.
1349 if (isa
<TerminatorInst
>(&Inst
) && Stmt
->isBlockStmt())
1353 for (auto &Op
: Inst
.operands())
1354 verifyUse(S
, Op
, LI
);
1356 // Stores do not produce values used by other statements.
1357 if (isa
<StoreInst
>(Inst
))
1360 // For every value defined in the block, also check that a use of that
1361 // value in the same statement would not be an inter-statement use. It can
1362 // still be synthesizable or load-hoisted, but these kind of instructions
1363 // are not directly copied in code-generation.
1365 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
1366 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
1367 VirtDef
.getKind() == VirtualUse::Intra
||
1368 VirtDef
.getKind() == VirtualUse::Hoisted
);
1372 if (S
->hasSingleExitEdge())
1375 // PHINodes in the SCoP region's exit block are also uses to be checked.
1376 if (!S
->getRegion().isTopLevelRegion()) {
1377 for (auto &Inst
: *S
->getRegion().getExit()) {
1378 if (!isa
<PHINode
>(Inst
))
1381 for (auto &Op
: Inst
.operands())
1382 verifyUse(S
, Op
, LI
);
1388 /// Return the block that is the representing block for @p RN.
1389 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
1390 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
1391 : RN
->getNodeAs
<BasicBlock
>();
1394 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
,
1395 OptimizationRemarkEmitter
&ORE
) {
1396 scop
.reset(new Scop(R
, SE
, LI
, DT
, *SD
.getDetectionContext(&R
), ORE
));
1400 // Create all invariant load instructions first. These are categorized as
1401 // 'synthesizable', therefore are not part of any ScopStmt but need to be
1402 // created somewhere.
1403 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
1404 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
1405 if (isErrorBlock(*BB
, scop
->getRegion(), LI
, DT
))
1408 for (Instruction
&Inst
: *BB
) {
1409 LoadInst
*Load
= dyn_cast
<LoadInst
>(&Inst
);
1413 if (!RIL
.count(Load
))
1416 // Invariant loads require a MemoryAccess to be created in some statement.
1417 // It is not important to which statement the MemoryAccess is added
1418 // because it will later be removed from the ScopStmt again. We chose the
1419 // first statement of the basic block the LoadInst is in.
1420 ArrayRef
<ScopStmt
*> List
= scop
->getStmtListFor(BB
);
1421 assert(!List
.empty());
1422 ScopStmt
*RILStmt
= List
.front();
1423 buildMemoryAccess(Load
, RILStmt
);
1426 buildAccessFunctions();
1428 // In case the region does not have an exiting block we will later (during
1429 // code generation) split the exit block. This will move potential PHI nodes
1430 // from the current exit block into the new region exiting block. Hence, PHI
1431 // nodes that are at this point not part of the region will be.
1432 // To handle these PHI nodes later we will now model their operands as scalar
1433 // accesses. Note that we do not model anything in the exit block if we have
1434 // an exiting block in the region, as there will not be any splitting later.
1435 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge()) {
1436 for (Instruction
&Inst
: *R
.getExit()) {
1437 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
1441 buildPHIAccesses(nullptr, PHI
, nullptr, true);
1445 // Create memory accesses for global reads since all arrays are now known.
1446 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
1447 for (auto GlobalReadPair
: GlobalReads
) {
1448 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
1449 Instruction
*GlobalRead
= GlobalReadPair
.second
;
1450 for (auto *BP
: ArrayBasePointers
)
1451 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
1452 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
1455 scop
->buildInvariantEquivalenceClasses();
1457 /// A map from basic blocks to their invalid domains.
1458 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
1460 if (!scop
->buildDomains(&R
, DT
, LI
, InvalidDomainMap
)) {
1461 DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1465 scop
->addUserAssumptions(AC
, DT
, LI
, InvalidDomainMap
);
1467 // Initialize the invalid domain.
1468 for (ScopStmt
&Stmt
: scop
->Stmts
)
1469 if (Stmt
.isBlockStmt())
1470 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()]);
1472 Stmt
.setInvalidDomain(InvalidDomainMap
[getRegionNodeBasicBlock(
1473 Stmt
.getRegion()->getNode())]);
1475 // Remove empty statements.
1476 // Exit early in case there are no executable statements left in this scop.
1477 scop
->removeStmtNotInDomainMap();
1478 scop
->simplifySCoP(false);
1479 if (scop
->isEmpty()) {
1480 DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1484 // The ScopStmts now have enough information to initialize themselves.
1485 for (ScopStmt
&Stmt
: *scop
) {
1487 collectSurroundingLoops(Stmt
);
1488 buildAccessRelations(Stmt
);
1490 if (DetectReductions
)
1491 checkForReductions(Stmt
);
1494 // Check early for a feasible runtime context.
1495 if (!scop
->hasFeasibleRuntimeContext()) {
1496 DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1500 // Check early for profitability. Afterwards it cannot change anymore,
1501 // only the runtime context could become infeasible.
1502 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
1503 scop
->invalidate(PROFITABLE
, DebugLoc());
1504 DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1508 scop
->buildSchedule(LI
);
1510 scop
->finalizeAccesses();
1512 scop
->realignParams();
1513 scop
->addUserContext();
1515 // After the context was fully constructed, thus all our knowledge about
1516 // the parameters is in there, we add all recorded assumptions to the
1517 // assumed/invalid context.
1518 scop
->addRecordedAssumptions();
1520 scop
->simplifyContexts();
1521 if (!scop
->buildAliasChecks(AA
)) {
1522 DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1526 scop
->hoistInvariantLoads();
1527 scop
->canonicalizeDynamicBasePtrs();
1528 scop
->verifyInvariantLoads();
1529 scop
->simplifySCoP(true);
1531 // Check late for a feasible runtime context because profitability did not
1533 if (!scop
->hasFeasibleRuntimeContext()) {
1534 DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1539 verifyUses(scop
.get(), LI
, DT
);
1543 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AliasAnalysis
&AA
,
1544 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
1545 ScopDetection
&SD
, ScalarEvolution
&SE
,
1546 OptimizationRemarkEmitter
&ORE
)
1547 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
) {
1549 auto P
= getBBPairForRegion(R
);
1550 getDebugLocations(P
, Beg
, End
);
1552 std::string Msg
= "SCoP begins here.";
1553 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
1556 buildScop(*R
, AC
, ORE
);
1558 DEBUG(dbgs() << *scop
);
1560 if (!scop
->hasFeasibleRuntimeContext()) {
1562 Msg
= "SCoP ends here but was dismissed.";
1563 DEBUG(dbgs() << "SCoP detected but dismissed\n");
1566 Msg
= "SCoP ends here.";
1568 if (scop
->getMaxLoopDepth() > 0)
1572 if (R
->isTopLevelRegion())
1573 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
1576 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)