1 //===- ScopBuilder.cpp ---------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/SCEVValidator.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "llvm/Analysis/RegionIterator.h"
23 #include "llvm/IR/DiagnosticInfo.h"
26 using namespace polly
;
28 #define DEBUG_TYPE "polly-scops"
30 STATISTIC(ScopFound
, "Number of valid Scops");
31 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
32 STATISTIC(InfeasibleScops
,
33 "Number of SCoPs with statically infeasible context.");
35 static cl::opt
<bool> ModelReadOnlyScalars(
36 "polly-analyze-read-only-scalars",
37 cl::desc("Model read-only scalar values in the scop description"),
38 cl::Hidden
, cl::ZeroOrMore
, cl::init(true), cl::cat(PollyCategory
));
40 static cl::opt
<bool> UnprofitableScalarAccs(
41 "polly-unprofitable-scalar-accs",
42 cl::desc("Count statements with scalar accesses as not optimizable"),
43 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
45 static cl::opt
<bool> DetectFortranArrays(
46 "polly-detect-fortran-arrays",
47 cl::desc("Detect Fortran arrays and use this for code generation"),
48 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
50 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
51 Region
*NonAffineSubRegion
,
54 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
55 // true, are not modeled as ordinary PHI nodes as they are not part of the
56 // region. However, we model the operands in the predecessor blocks that are
57 // part of the region as regular scalar accesses.
59 // If we can synthesize a PHI we can skip it, however only if it is in
60 // the region. If it is not it can only be in the exit block of the region.
61 // In this case we model the operands but not the PHI itself.
62 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
63 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
66 // PHI nodes are modeled as if they had been demoted prior to the SCoP
67 // detection. Hence, the PHI is a load of a new memory location in which the
68 // incoming value was written at the end of the incoming basic block.
69 bool OnlyNonAffineSubRegionOperands
= true;
70 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
71 Value
*Op
= PHI
->getIncomingValue(u
);
72 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
73 ScopStmt
*OpStmt
= scop
->getLastStmtFor(OpBB
);
75 // Do not build PHI dependences inside a non-affine subregion, but make
76 // sure that the necessary scalar values are still made available.
77 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
78 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
79 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
80 ensureValueRead(Op
, OpStmt
);
84 OnlyNonAffineSubRegionOperands
= false;
85 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
88 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
89 addPHIReadAccess(PHIStmt
, PHI
);
93 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
95 assert(!isa
<PHINode
>(Inst
));
97 // Pull-in required operands.
98 for (Use
&Op
: Inst
->operands())
99 ensureValueRead(Op
.get(), UserStmt
);
102 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
103 // Check for uses of this instruction outside the scop. Because we do not
104 // iterate over such instructions and therefore did not "ensure" the existence
105 // of a write, we must determine such use here.
106 for (Use
&U
: Inst
->uses()) {
107 Instruction
*UI
= dyn_cast
<Instruction
>(U
.getUser());
111 BasicBlock
*UseParent
= getUseBlock(U
);
112 BasicBlock
*UserParent
= UI
->getParent();
114 // An escaping value is either used by an instruction not within the scop,
115 // or (when the scop region's exit needs to be simplified) by a PHI in the
116 // scop's exit block. This is because region simplification before code
117 // generation inserts new basic blocks before the PHI such that its incoming
118 // blocks are not in the scop anymore.
119 if (!scop
->contains(UseParent
) ||
120 (isa
<PHINode
>(UI
) && scop
->isExit(UserParent
) &&
121 scop
->hasSingleExitEdge())) {
122 // At least one escaping use found.
123 ensureValueWrite(Inst
);
129 /// Check that a value is a Fortran Array descriptor.
131 /// We check if V has the following structure:
132 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
133 /// [<num> x %struct.descriptor_dimension] }
136 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
138 /// 1. V's type name starts with "struct.array"
139 /// 2. V's type has layout as shown.
140 /// 3. Final member of V's type has name "struct.descriptor_dimension",
141 /// 4. "struct.descriptor_dimension" has layout as shown.
142 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
144 /// We are interested in such types since this is the code that dragonegg
145 /// generates for Fortran array descriptors.
147 /// @param V the Value to be checked.
149 /// @returns True if V is a Fortran array descriptor, False otherwise.
150 bool isFortranArrayDescriptor(Value
*V
) {
151 PointerType
*PTy
= dyn_cast
<PointerType
>(V
->getType());
156 Type
*Ty
= PTy
->getElementType();
157 assert(Ty
&& "Ty expected to be initialized");
158 auto *StructArrTy
= dyn_cast
<StructType
>(Ty
);
160 if (!(StructArrTy
&& StructArrTy
->hasName()))
163 if (!StructArrTy
->getName().startswith("struct.array"))
166 if (StructArrTy
->getNumElements() != 4)
169 const ArrayRef
<Type
*> ArrMemberTys
= StructArrTy
->elements();
172 if (ArrMemberTys
[0] != Type::getInt8PtrTy(V
->getContext()))
175 // Get a reference to the int type and check that all the members
176 // share the same int type
177 Type
*IntTy
= ArrMemberTys
[1];
178 if (ArrMemberTys
[2] != IntTy
)
181 // type: [<num> x %struct.descriptor_dimension]
182 ArrayType
*DescriptorDimArrayTy
= dyn_cast
<ArrayType
>(ArrMemberTys
[3]);
183 if (!DescriptorDimArrayTy
)
186 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
187 StructType
*DescriptorDimTy
=
188 dyn_cast
<StructType
>(DescriptorDimArrayTy
->getElementType());
190 if (!(DescriptorDimTy
&& DescriptorDimTy
->hasName()))
193 if (DescriptorDimTy
->getName() != "struct.descriptor_dimension")
196 if (DescriptorDimTy
->getNumElements() != 3)
199 for (auto MemberTy
: DescriptorDimTy
->elements()) {
200 if (MemberTy
!= IntTy
)
207 Value
*ScopBuilder::findFADAllocationVisible(MemAccInst Inst
) {
208 // match: 4.1 & 4.2 store/load
209 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
213 if (Inst
.getAlignment() != 8)
216 Value
*Address
= Inst
.getPointerOperand();
218 const BitCastInst
*Bitcast
= nullptr;
220 if (auto *Slot
= dyn_cast
<GetElementPtrInst
>(Address
)) {
221 Value
*TypedMem
= Slot
->getPointerOperand();
223 Bitcast
= dyn_cast
<BitCastInst
>(TypedMem
);
226 Bitcast
= dyn_cast
<BitCastInst
>(Address
);
232 auto *MallocMem
= Bitcast
->getOperand(0);
235 auto *MallocCall
= dyn_cast
<CallInst
>(MallocMem
);
239 Function
*MallocFn
= MallocCall
->getCalledFunction();
240 if (!(MallocFn
&& MallocFn
->hasName() && MallocFn
->getName() == "malloc"))
243 // Find all uses the malloc'd memory.
244 // We are looking for a "store" into a struct with the type being the Fortran
246 for (auto user
: MallocMem
->users()) {
249 auto *MallocStore
= dyn_cast
<StoreInst
>(user
);
253 auto *DescriptorGEP
=
254 dyn_cast
<GEPOperator
>(MallocStore
->getPointerOperand());
259 auto DescriptorType
=
260 dyn_cast
<StructType
>(DescriptorGEP
->getSourceElementType());
261 if (!(DescriptorType
&& DescriptorType
->hasName()))
264 Value
*Descriptor
= dyn_cast
<Value
>(DescriptorGEP
->getPointerOperand());
269 if (!isFortranArrayDescriptor(Descriptor
))
278 Value
*ScopBuilder::findFADAllocationInvisible(MemAccInst Inst
) {
280 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
283 Value
*Slot
= Inst
.getPointerOperand();
285 LoadInst
*MemLoad
= nullptr;
287 if (auto *SlotGEP
= dyn_cast
<GetElementPtrInst
>(Slot
)) {
289 MemLoad
= dyn_cast
<LoadInst
>(SlotGEP
->getPointerOperand());
292 MemLoad
= dyn_cast
<LoadInst
>(Slot
);
298 auto *BitcastOperator
=
299 dyn_cast
<BitCastOperator
>(MemLoad
->getPointerOperand());
300 if (!BitcastOperator
)
303 Value
*Descriptor
= dyn_cast
<Value
>(BitcastOperator
->getOperand(0));
307 if (!isFortranArrayDescriptor(Descriptor
))
313 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
314 Value
*Val
= Inst
.getValueOperand();
315 Type
*ElementType
= Val
->getType();
316 Value
*Address
= Inst
.getPointerOperand();
317 const SCEV
*AccessFunction
=
318 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
319 const SCEVUnknown
*BasePointer
=
320 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
321 enum MemoryAccess::AccessType AccType
=
322 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
324 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
)) {
325 auto *Src
= BitCast
->getOperand(0);
326 auto *SrcTy
= Src
->getType();
327 auto *DstTy
= BitCast
->getType();
328 // Do not try to delinearize non-sized (opaque) pointers.
329 if ((SrcTy
->isPointerTy() && !SrcTy
->getPointerElementType()->isSized()) ||
330 (DstTy
->isPointerTy() && !DstTy
->getPointerElementType()->isSized())) {
333 if (SrcTy
->isPointerTy() && DstTy
->isPointerTy() &&
334 DL
.getTypeAllocSize(SrcTy
->getPointerElementType()) ==
335 DL
.getTypeAllocSize(DstTy
->getPointerElementType()))
339 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
343 std::vector
<const SCEV
*> Subscripts
;
344 std::vector
<int> Sizes
;
345 std::tie(Subscripts
, Sizes
) = getIndexExpressionsFromGEP(GEP
, SE
);
346 auto *BasePtr
= GEP
->getOperand(0);
348 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
349 BasePtr
= BasePtrCast
->getOperand(0);
351 // Check for identical base pointers to ensure that we do not miss index
352 // offsets that have been added before this GEP is applied.
353 if (BasePtr
!= BasePointer
->getValue())
356 std::vector
<const SCEV
*> SizesSCEV
;
358 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
360 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
361 for (auto *Subscript
: Subscripts
) {
362 InvariantLoadsSetTy AccessILS
;
363 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
367 for (LoadInst
*LInst
: AccessILS
)
368 if (!ScopRIL
.count(LInst
))
375 SizesSCEV
.push_back(nullptr);
378 SizesSCEV
.push_back(SE
.getSCEV(
379 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
381 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
382 true, Subscripts
, SizesSCEV
, Val
);
386 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
387 if (!PollyDelinearize
)
390 Value
*Address
= Inst
.getPointerOperand();
391 Value
*Val
= Inst
.getValueOperand();
392 Type
*ElementType
= Val
->getType();
393 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
394 enum MemoryAccess::AccessType AccType
=
395 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
397 const SCEV
*AccessFunction
=
398 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
399 const SCEVUnknown
*BasePointer
=
400 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
402 assert(BasePointer
&& "Could not find base pointer");
404 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
405 auto AccItr
= InsnToMemAcc
.find(Inst
);
406 if (AccItr
== InsnToMemAcc
.end())
409 std::vector
<const SCEV
*> Sizes
= {nullptr};
411 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
412 AccItr
->second
.Shape
->DelinearizedSizes
.end());
414 // In case only the element size is contained in the 'Sizes' array, the
415 // access does not access a real multi-dimensional array. Hence, we allow
416 // the normal single-dimensional access construction to handle this.
417 if (Sizes
.size() == 1)
420 // Remove the element size. This information is already provided by the
421 // ElementSize parameter. In case the element size of this access and the
422 // element size used for delinearization differs the delinearization is
423 // incorrect. Hence, we invalidate the scop.
425 // TODO: Handle delinearization with differing element sizes.
426 auto DelinearizedSize
=
427 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
429 if (ElementSize
!= DelinearizedSize
)
430 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
432 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
433 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
437 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
438 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
440 if (MemIntr
== nullptr)
443 auto *L
= LI
.getLoopFor(Inst
->getParent());
444 auto *LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
447 // Check if the length val is actually affine or if we overapproximate it
448 InvariantLoadsSetTy AccessILS
;
449 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
451 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
452 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
453 LengthVal
, SE
, &AccessILS
);
454 for (LoadInst
*LInst
: AccessILS
)
455 if (!ScopRIL
.count(LInst
))
456 LengthIsAffine
= false;
460 auto *DestPtrVal
= MemIntr
->getDest();
463 auto *DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
465 // Ignore accesses to "NULL".
466 // TODO: We could use this to optimize the region further, e.g., intersect
468 // isl_set_complement(isl_set_params(getDomain()))
469 // as we know it would be undefined to execute this instruction anyway.
470 if (DestAccFunc
->isZero())
473 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
475 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
476 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
477 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
478 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
479 Inst
.getValueOperand());
481 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
485 auto *SrcPtrVal
= MemTrans
->getSource();
488 auto *SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
490 // Ignore accesses to "NULL".
491 // TODO: See above TODO
492 if (SrcAccFunc
->isZero())
495 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
497 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
498 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
499 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
500 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
501 Inst
.getValueOperand());
506 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
507 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
512 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
))
515 bool ReadOnly
= false;
516 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
517 auto *CalledFunction
= CI
->getCalledFunction();
518 switch (AA
.getModRefBehavior(CalledFunction
)) {
519 case FMRB_UnknownModRefBehavior
:
520 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
521 case FMRB_DoesNotAccessMemory
:
523 case FMRB_DoesNotReadMemory
:
524 case FMRB_OnlyAccessesInaccessibleMem
:
525 case FMRB_OnlyAccessesInaccessibleOrArgMem
:
527 case FMRB_OnlyReadsMemory
:
528 GlobalReads
.emplace_back(Stmt
, CI
);
530 case FMRB_OnlyReadsArgumentPointees
:
533 case FMRB_OnlyAccessesArgumentPointees
:
534 auto AccType
= ReadOnly
? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
535 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
536 for (const auto &Arg
: CI
->arg_operands()) {
537 if (!Arg
->getType()->isPointerTy())
540 auto *ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
541 if (ArgSCEV
->isZero())
544 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
545 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
546 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
554 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
555 Value
*Address
= Inst
.getPointerOperand();
556 Value
*Val
= Inst
.getValueOperand();
557 Type
*ElementType
= Val
->getType();
558 enum MemoryAccess::AccessType AccType
=
559 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
561 const SCEV
*AccessFunction
=
562 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
563 const SCEVUnknown
*BasePointer
=
564 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
566 assert(BasePointer
&& "Could not find base pointer");
567 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
569 // Check if the access depends on a loop contained in a non-affine subregion.
570 bool isVariantInNonAffineLoop
= false;
571 SetVector
<const Loop
*> Loops
;
572 findLoops(AccessFunction
, Loops
);
573 for (const Loop
*L
: Loops
)
574 if (Stmt
->contains(L
)) {
575 isVariantInNonAffineLoop
= true;
579 InvariantLoadsSetTy AccessILS
;
581 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
582 bool IsAffine
= !isVariantInNonAffineLoop
&&
583 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
584 AccessFunction
, SE
, &AccessILS
);
586 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
587 for (LoadInst
*LInst
: AccessILS
)
588 if (!ScopRIL
.count(LInst
))
591 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
592 AccType
= MemoryAccess::MAY_WRITE
;
594 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
595 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
598 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
600 if (buildAccessMemIntrinsic(Inst
, Stmt
))
603 if (buildAccessCallInst(Inst
, Stmt
))
606 if (buildAccessMultiDimFixed(Inst
, Stmt
))
609 if (buildAccessMultiDimParam(Inst
, Stmt
))
612 buildAccessSingleDim(Inst
, Stmt
);
615 void ScopBuilder::buildAccessFunctions() {
616 for (auto &Stmt
: *scop
) {
617 if (Stmt
.isBlockStmt()) {
618 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
622 Region
*R
= Stmt
.getRegion();
623 for (BasicBlock
*BB
: R
->blocks())
624 buildAccessFunctions(&Stmt
, *BB
, R
);
628 void ScopBuilder::buildStmts(Region
&SR
) {
629 if (scop
->isNonAffineSubRegion(&SR
)) {
630 Loop
*SurroundingLoop
=
631 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
632 scop
->addScopStmt(&SR
, SurroundingLoop
);
636 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
637 if (I
->isSubRegion())
638 buildStmts(*I
->getNodeAs
<Region
>());
640 std::vector
<Instruction
*> Instructions
;
641 for (Instruction
&Inst
: *I
->getNodeAs
<BasicBlock
>()) {
642 Loop
*L
= LI
.getLoopFor(Inst
.getParent());
643 if (!isa
<TerminatorInst
>(&Inst
) && !isIgnoredIntrinsic(&Inst
) &&
644 !canSynthesize(&Inst
, *scop
, &SE
, L
))
645 Instructions
.push_back(&Inst
);
647 Loop
*SurroundingLoop
= LI
.getLoopFor(I
->getNodeAs
<BasicBlock
>());
648 scop
->addScopStmt(I
->getNodeAs
<BasicBlock
>(), SurroundingLoop
,
653 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
654 Region
*NonAffineSubRegion
,
657 !Stmt
== IsExitBlock
&&
658 "The exit BB is the only one that cannot be represented by a statement");
659 assert(IsExitBlock
|| Stmt
->contains(&BB
));
661 // We do not build access functions for error blocks, as they may contain
662 // instructions we can not model.
663 if (isErrorBlock(BB
, scop
->getRegion(), LI
, DT
) && !IsExitBlock
)
666 for (Instruction
&Inst
: BB
) {
667 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
669 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, IsExitBlock
);
671 // For the exit block we stop modeling after the last PHI node.
672 if (!PHI
&& IsExitBlock
)
675 if (auto MemInst
= MemAccInst::dyn_cast(Inst
)) {
676 assert(Stmt
&& "Cannot build access function in non-existing statement");
677 buildMemoryAccess(MemInst
, Stmt
);
680 if (isIgnoredIntrinsic(&Inst
))
683 // PHI nodes have already been modeled above and TerminatorInsts that are
684 // not part of a non-affine subregion are fully modeled and regenerated
685 // from the polyhedral domains. Hence, they do not need to be modeled as
686 // explicit data dependences.
687 if (!PHI
&& (!isa
<TerminatorInst
>(&Inst
) || NonAffineSubRegion
))
688 buildScalarDependences(Stmt
, &Inst
);
691 buildEscapingDependences(&Inst
);
695 MemoryAccess
*ScopBuilder::addMemoryAccess(
696 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
697 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
698 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
700 bool isKnownMustAccess
= false;
702 // Accesses in single-basic block statements are always executed.
703 if (Stmt
->isBlockStmt())
704 isKnownMustAccess
= true;
706 if (Stmt
->isRegionStmt()) {
707 // Accesses that dominate the exit block of a non-affine region are always
708 // executed. In non-affine regions there may exist MemoryKind::Values that
709 // do not dominate the exit. MemoryKind::Values will always dominate the
710 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
711 // non-affine region.
712 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
713 isKnownMustAccess
= true;
716 // Non-affine PHI writes do not "happen" at a particular instruction, but
717 // after exiting the statement. Therefore they are guaranteed to execute and
718 // overwrite the old value.
719 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
720 isKnownMustAccess
= true;
722 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
723 AccType
= MemoryAccess::MAY_WRITE
;
725 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
726 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
728 scop
->addAccessFunction(Access
);
729 Stmt
->addAccess(Access
);
733 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
734 MemoryAccess::AccessType AccType
,
735 Value
*BaseAddress
, Type
*ElementType
,
737 ArrayRef
<const SCEV
*> Subscripts
,
738 ArrayRef
<const SCEV
*> Sizes
,
739 Value
*AccessValue
) {
740 ArrayBasePointers
.insert(BaseAddress
);
741 auto *MemAccess
= addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
,
742 ElementType
, IsAffine
, AccessValue
,
743 Subscripts
, Sizes
, MemoryKind::Array
);
745 if (!DetectFortranArrays
)
748 if (Value
*FAD
= findFADAllocationInvisible(MemAccInst
))
749 MemAccess
->setFortranArrayDescriptor(FAD
);
750 else if (Value
*FAD
= findFADAllocationVisible(MemAccInst
))
751 MemAccess
->setFortranArrayDescriptor(FAD
);
754 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
755 // Find the statement that defines the value of Inst. That statement has to
756 // write the value to make it available to those statements that read it.
757 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
759 // It is possible that the value is synthesizable within a loop (such that it
760 // is not part of any statement), but not after the loop (where you need the
761 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
762 // avoid this. In case the IR has no such PHI, use the last statement (where
763 // the value is synthesizable) to write the value.
765 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
767 // Inst not defined within this SCoP.
771 // Do not process further if the instruction is already written.
772 if (Stmt
->lookupValueWriteOf(Inst
))
775 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
776 true, Inst
, ArrayRef
<const SCEV
*>(),
777 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
780 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
781 auto *Scope
= UserStmt
->getSurroundingLoop();
782 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
783 switch (VUse
.getKind()) {
784 case VirtualUse::Constant
:
785 case VirtualUse::Block
:
786 case VirtualUse::Synthesizable
:
787 case VirtualUse::Hoisted
:
788 case VirtualUse::Intra
:
789 // Uses of these kinds do not need a MemoryAccess.
792 case VirtualUse::ReadOnly
:
793 // Add MemoryAccess for invariant values only if requested.
794 if (!ModelReadOnlyScalars
)
798 case VirtualUse::Inter
:
800 // Do not create another MemoryAccess for reloading the value if one already
802 if (UserStmt
->lookupValueReadOf(V
))
805 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
806 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
809 // Inter-statement uses need to write the value in their defining statement.
811 ensureValueWrite(cast
<Instruction
>(V
));
816 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
817 BasicBlock
*IncomingBlock
,
818 Value
*IncomingValue
, bool IsExitBlock
) {
819 // As the incoming block might turn out to be an error statement ensure we
820 // will create an exit PHI SAI object. It is needed during code generation
821 // and would be created later anyway.
823 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
824 MemoryKind::ExitPHI
);
826 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
827 // from outside the SCoP's region have no statement representation.
831 // Take care for the incoming value being available in the incoming block.
832 // This must be done before the check for multiple PHI writes because multiple
833 // exiting edges from subregion each can be the effective written value of the
834 // subregion. As such, all of them must be made available in the subregion
836 ensureValueRead(IncomingValue
, IncomingStmt
);
838 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
839 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
840 assert(Acc
->getAccessInstruction() == PHI
);
841 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
845 MemoryAccess
*Acc
= addMemoryAccess(
846 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
847 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
848 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
850 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
853 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
854 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
855 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
860 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
861 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
862 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
863 assert(PhysUse
.getKind() == VirtUse
.getKind());
866 /// Check the consistency of every statement's MemoryAccesses.
868 /// The check is carried out by expecting the "physical" kind of use (derived
869 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
870 /// kind of use (derived from a statement's MemoryAccess).
872 /// The "physical" uses are taken by ensureValueRead to determine whether to
873 /// create MemoryAccesses. When done, the kind of scalar access should be the
874 /// same no matter which way it was derived.
876 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
877 /// can intentionally influence on the kind of uses (not corresponding to the
878 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
879 /// to pick up the virtual uses. But here in the code generator, this has not
880 /// happened yet, such that virtual and physical uses are equivalent.
881 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
882 for (auto *BB
: S
->getRegion().blocks()) {
883 for (auto &Inst
: *BB
) {
884 auto *Stmt
= S
->getStmtFor(&Inst
);
888 if (isIgnoredIntrinsic(&Inst
))
891 // Branch conditions are encoded in the statement domains.
892 if (isa
<TerminatorInst
>(&Inst
) && Stmt
->isBlockStmt())
896 for (auto &Op
: Inst
.operands())
897 verifyUse(S
, Op
, LI
);
899 // Stores do not produce values used by other statements.
900 if (isa
<StoreInst
>(Inst
))
903 // For every value defined in the block, also check that a use of that
904 // value in the same statement would not be an inter-statement use. It can
905 // still be synthesizable or load-hoisted, but these kind of instructions
906 // are not directly copied in code-generation.
908 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
909 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
910 VirtDef
.getKind() == VirtualUse::Intra
||
911 VirtDef
.getKind() == VirtualUse::Hoisted
);
915 if (S
->hasSingleExitEdge())
918 // PHINodes in the SCoP region's exit block are also uses to be checked.
919 if (!S
->getRegion().isTopLevelRegion()) {
920 for (auto &Inst
: *S
->getRegion().getExit()) {
921 if (!isa
<PHINode
>(Inst
))
924 for (auto &Op
: Inst
.operands())
925 verifyUse(S
, Op
, LI
);
931 /// Return the block that is the representing block for @p RN.
932 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
933 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
934 : RN
->getNodeAs
<BasicBlock
>();
937 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
) {
938 scop
.reset(new Scop(R
, SE
, LI
, *SD
.getDetectionContext(&R
), SD
.ORE
));
941 buildAccessFunctions();
943 // In case the region does not have an exiting block we will later (during
944 // code generation) split the exit block. This will move potential PHI nodes
945 // from the current exit block into the new region exiting block. Hence, PHI
946 // nodes that are at this point not part of the region will be.
947 // To handle these PHI nodes later we will now model their operands as scalar
948 // accesses. Note that we do not model anything in the exit block if we have
949 // an exiting block in the region, as there will not be any splitting later.
950 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge())
951 buildAccessFunctions(nullptr, *R
.getExit(), nullptr,
952 /* IsExitBlock */ true);
954 // Create memory accesses for global reads since all arrays are now known.
955 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
956 for (auto GlobalReadPair
: GlobalReads
) {
957 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
958 Instruction
*GlobalRead
= GlobalReadPair
.second
;
959 for (auto *BP
: ArrayBasePointers
)
960 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
961 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
964 scop
->buildInvariantEquivalenceClasses();
966 /// A map from basic blocks to their invalid domains.
967 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
969 if (!scop
->buildDomains(&R
, DT
, LI
, InvalidDomainMap
))
972 scop
->addUserAssumptions(AC
, DT
, LI
, InvalidDomainMap
);
974 // Initialize the invalid domain.
975 for (ScopStmt
&Stmt
: scop
->Stmts
)
976 if (Stmt
.isBlockStmt())
977 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()].copy());
979 Stmt
.setInvalidDomain(
980 InvalidDomainMap
[getRegionNodeBasicBlock(Stmt
.getRegion()->getNode())]
983 // Remove empty statements.
984 // Exit early in case there are no executable statements left in this scop.
985 scop
->removeStmtNotInDomainMap();
986 scop
->simplifySCoP(false);
990 // The ScopStmts now have enough information to initialize themselves.
991 for (ScopStmt
&Stmt
: *scop
)
994 // Check early for a feasible runtime context.
995 if (!scop
->hasFeasibleRuntimeContext())
998 // Check early for profitability. Afterwards it cannot change anymore,
999 // only the runtime context could become infeasible.
1000 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
1001 scop
->invalidate(PROFITABLE
, DebugLoc());
1005 scop
->buildSchedule(LI
);
1007 scop
->finalizeAccesses();
1009 scop
->realignParams();
1010 scop
->addUserContext();
1012 // After the context was fully constructed, thus all our knowledge about
1013 // the parameters is in there, we add all recorded assumptions to the
1014 // assumed/invalid context.
1015 scop
->addRecordedAssumptions();
1017 scop
->simplifyContexts();
1018 if (!scop
->buildAliasChecks(AA
))
1021 scop
->hoistInvariantLoads();
1022 scop
->canonicalizeDynamicBasePtrs();
1023 scop
->verifyInvariantLoads();
1024 scop
->simplifySCoP(true);
1026 // Check late for a feasible runtime context because profitability did not
1028 if (!scop
->hasFeasibleRuntimeContext())
1032 verifyUses(scop
.get(), LI
, DT
);
1036 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AliasAnalysis
&AA
,
1037 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
1038 ScopDetection
&SD
, ScalarEvolution
&SE
)
1039 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
) {
1042 auto P
= getBBPairForRegion(R
);
1043 getDebugLocations(P
, Beg
, End
);
1045 std::string Msg
= "SCoP begins here.";
1046 SD
.ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
1051 DEBUG(scop
->print(dbgs()));
1053 if (!scop
->hasFeasibleRuntimeContext()) {
1055 Msg
= "SCoP ends here but was dismissed.";
1058 Msg
= "SCoP ends here.";
1060 if (scop
->getMaxLoopDepth() > 0)
1064 if (R
->isTopLevelRegion())
1065 SD
.ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
1068 SD
.ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)