[IR] Replace `isa<TerminatorInst>` with `isTerminator()`.
[polly-mirror.git] / lib / Analysis / ScopBuilder.cpp
blobc1918f5b866d30bbab69885d73dbc80315eb526b
1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopDetection.h"
20 #include "polly/ScopDetectionDiagnostic.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/SCEVValidator.h"
24 #include "polly/Support/ScopHelper.h"
25 #include "polly/Support/VirtualInstruction.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/EquivalenceClasses.h"
30 #include "llvm/ADT/SetVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/RegionIterator.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/IntrinsicInst.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Use.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CommandLine.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include <cassert>
62 #include <string>
63 #include <tuple>
64 #include <vector>
66 using namespace llvm;
67 using namespace polly;
69 #define DEBUG_TYPE "polly-scops"
71 STATISTIC(ScopFound, "Number of valid Scops");
72 STATISTIC(RichScopFound, "Number of Scops containing a loop");
73 STATISTIC(InfeasibleScops,
74 "Number of SCoPs with statically infeasible context.");
76 bool polly::ModelReadOnlyScalars;
78 static cl::opt<bool, true> XModelReadOnlyScalars(
79 "polly-analyze-read-only-scalars",
80 cl::desc("Model read-only scalar values in the scop description"),
81 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
82 cl::init(true), cl::cat(PollyCategory));
84 static cl::opt<bool> UnprofitableScalarAccs(
85 "polly-unprofitable-scalar-accs",
86 cl::desc("Count statements with scalar accesses as not optimizable"),
87 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
89 static cl::opt<bool> DetectFortranArrays(
90 "polly-detect-fortran-arrays",
91 cl::desc("Detect Fortran arrays and use this for code generation"),
92 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
94 static cl::opt<bool> DetectReductions("polly-detect-reductions",
95 cl::desc("Detect and exploit reductions"),
96 cl::Hidden, cl::ZeroOrMore,
97 cl::init(true), cl::cat(PollyCategory));
99 // Multiplicative reductions can be disabled separately as these kind of
100 // operations can overflow easily. Additive reductions and bit operations
101 // are in contrast pretty stable.
102 static cl::opt<bool> DisableMultiplicativeReductions(
103 "polly-disable-multiplicative-reductions",
104 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
105 cl::init(false), cl::cat(PollyCategory));
107 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
109 static cl::opt<GranularityChoice> StmtGranularity(
110 "polly-stmt-granularity",
111 cl::desc(
112 "Algorithm to use for splitting basic blocks into multiple statements"),
113 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
114 "One statement per basic block"),
115 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
116 "Scalar independence heuristic"),
117 clEnumValN(GranularityChoice::Stores, "store",
118 "Store-level granularity")),
119 cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
121 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
122 Region *NonAffineSubRegion,
123 bool IsExitBlock) {
124 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
125 // true, are not modeled as ordinary PHI nodes as they are not part of the
126 // region. However, we model the operands in the predecessor blocks that are
127 // part of the region as regular scalar accesses.
129 // If we can synthesize a PHI we can skip it, however only if it is in
130 // the region. If it is not it can only be in the exit block of the region.
131 // In this case we model the operands but not the PHI itself.
132 auto *Scope = LI.getLoopFor(PHI->getParent());
133 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
134 return;
136 // PHI nodes are modeled as if they had been demoted prior to the SCoP
137 // detection. Hence, the PHI is a load of a new memory location in which the
138 // incoming value was written at the end of the incoming basic block.
139 bool OnlyNonAffineSubRegionOperands = true;
140 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
141 Value *Op = PHI->getIncomingValue(u);
142 BasicBlock *OpBB = PHI->getIncomingBlock(u);
143 ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
145 // Do not build PHI dependences inside a non-affine subregion, but make
146 // sure that the necessary scalar values are still made available.
147 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
148 auto *OpInst = dyn_cast<Instruction>(Op);
149 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
150 ensureValueRead(Op, OpStmt);
151 continue;
154 OnlyNonAffineSubRegionOperands = false;
155 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
158 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
159 addPHIReadAccess(PHIStmt, PHI);
163 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
164 Instruction *Inst) {
165 assert(!isa<PHINode>(Inst));
167 // Pull-in required operands.
168 for (Use &Op : Inst->operands())
169 ensureValueRead(Op.get(), UserStmt);
172 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
173 // Check for uses of this instruction outside the scop. Because we do not
174 // iterate over such instructions and therefore did not "ensure" the existence
175 // of a write, we must determine such use here.
176 if (scop->isEscaping(Inst))
177 ensureValueWrite(Inst);
180 /// Check that a value is a Fortran Array descriptor.
182 /// We check if V has the following structure:
183 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
184 /// [<num> x %struct.descriptor_dimension] }
187 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
189 /// 1. V's type name starts with "struct.array"
190 /// 2. V's type has layout as shown.
191 /// 3. Final member of V's type has name "struct.descriptor_dimension",
192 /// 4. "struct.descriptor_dimension" has layout as shown.
193 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
195 /// We are interested in such types since this is the code that dragonegg
196 /// generates for Fortran array descriptors.
198 /// @param V the Value to be checked.
200 /// @returns True if V is a Fortran array descriptor, False otherwise.
201 bool isFortranArrayDescriptor(Value *V) {
202 PointerType *PTy = dyn_cast<PointerType>(V->getType());
204 if (!PTy)
205 return false;
207 Type *Ty = PTy->getElementType();
208 assert(Ty && "Ty expected to be initialized");
209 auto *StructArrTy = dyn_cast<StructType>(Ty);
211 if (!(StructArrTy && StructArrTy->hasName()))
212 return false;
214 if (!StructArrTy->getName().startswith("struct.array"))
215 return false;
217 if (StructArrTy->getNumElements() != 4)
218 return false;
220 const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
222 // i8* match
223 if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
224 return false;
226 // Get a reference to the int type and check that all the members
227 // share the same int type
228 Type *IntTy = ArrMemberTys[1];
229 if (ArrMemberTys[2] != IntTy)
230 return false;
232 // type: [<num> x %struct.descriptor_dimension]
233 ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
234 if (!DescriptorDimArrayTy)
235 return false;
237 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
238 StructType *DescriptorDimTy =
239 dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
241 if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
242 return false;
244 if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
245 return false;
247 if (DescriptorDimTy->getNumElements() != 3)
248 return false;
250 for (auto MemberTy : DescriptorDimTy->elements()) {
251 if (MemberTy != IntTy)
252 return false;
255 return true;
258 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
259 // match: 4.1 & 4.2 store/load
260 if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
261 return nullptr;
263 // match: 4
264 if (Inst.getAlignment() != 8)
265 return nullptr;
267 Value *Address = Inst.getPointerOperand();
269 const BitCastInst *Bitcast = nullptr;
270 // [match: 3]
271 if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
272 Value *TypedMem = Slot->getPointerOperand();
273 // match: 2
274 Bitcast = dyn_cast<BitCastInst>(TypedMem);
275 } else {
276 // match: 2
277 Bitcast = dyn_cast<BitCastInst>(Address);
280 if (!Bitcast)
281 return nullptr;
283 auto *MallocMem = Bitcast->getOperand(0);
285 // match: 1
286 auto *MallocCall = dyn_cast<CallInst>(MallocMem);
287 if (!MallocCall)
288 return nullptr;
290 Function *MallocFn = MallocCall->getCalledFunction();
291 if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
292 return nullptr;
294 // Find all uses the malloc'd memory.
295 // We are looking for a "store" into a struct with the type being the Fortran
296 // descriptor type
297 for (auto user : MallocMem->users()) {
298 /// match: 5
299 auto *MallocStore = dyn_cast<StoreInst>(user);
300 if (!MallocStore)
301 continue;
303 auto *DescriptorGEP =
304 dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
305 if (!DescriptorGEP)
306 continue;
308 // match: 5
309 auto DescriptorType =
310 dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
311 if (!(DescriptorType && DescriptorType->hasName()))
312 continue;
314 Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
316 if (!Descriptor)
317 continue;
319 if (!isFortranArrayDescriptor(Descriptor))
320 continue;
322 return Descriptor;
325 return nullptr;
328 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
329 // match: 3
330 if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
331 return nullptr;
333 Value *Slot = Inst.getPointerOperand();
335 LoadInst *MemLoad = nullptr;
336 // [match: 2]
337 if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
338 // match: 1
339 MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
340 } else {
341 // match: 1
342 MemLoad = dyn_cast<LoadInst>(Slot);
345 if (!MemLoad)
346 return nullptr;
348 auto *BitcastOperator =
349 dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
350 if (!BitcastOperator)
351 return nullptr;
353 Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
354 if (!Descriptor)
355 return nullptr;
357 if (!isFortranArrayDescriptor(Descriptor))
358 return nullptr;
360 return Descriptor;
363 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
364 Value *Val = Inst.getValueOperand();
365 Type *ElementType = Val->getType();
366 Value *Address = Inst.getPointerOperand();
367 const SCEV *AccessFunction =
368 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
369 const SCEVUnknown *BasePointer =
370 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
371 enum MemoryAccess::AccessType AccType =
372 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
374 if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
375 auto *Src = BitCast->getOperand(0);
376 auto *SrcTy = Src->getType();
377 auto *DstTy = BitCast->getType();
378 // Do not try to delinearize non-sized (opaque) pointers.
379 if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
380 (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
381 return false;
383 if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
384 DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
385 DL.getTypeAllocSize(DstTy->getPointerElementType()))
386 Address = Src;
389 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
390 if (!GEP)
391 return false;
393 std::vector<const SCEV *> Subscripts;
394 std::vector<int> Sizes;
395 std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
396 auto *BasePtr = GEP->getOperand(0);
398 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
399 BasePtr = BasePtrCast->getOperand(0);
401 // Check for identical base pointers to ensure that we do not miss index
402 // offsets that have been added before this GEP is applied.
403 if (BasePtr != BasePointer->getValue())
404 return false;
406 std::vector<const SCEV *> SizesSCEV;
408 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
410 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
411 for (auto *Subscript : Subscripts) {
412 InvariantLoadsSetTy AccessILS;
413 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
414 &AccessILS))
415 return false;
417 for (LoadInst *LInst : AccessILS)
418 if (!ScopRIL.count(LInst))
419 return false;
422 if (Sizes.empty())
423 return false;
425 SizesSCEV.push_back(nullptr);
427 for (auto V : Sizes)
428 SizesSCEV.push_back(SE.getSCEV(
429 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
431 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
432 true, Subscripts, SizesSCEV, Val);
433 return true;
436 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
437 if (!PollyDelinearize)
438 return false;
440 Value *Address = Inst.getPointerOperand();
441 Value *Val = Inst.getValueOperand();
442 Type *ElementType = Val->getType();
443 unsigned ElementSize = DL.getTypeAllocSize(ElementType);
444 enum MemoryAccess::AccessType AccType =
445 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
447 const SCEV *AccessFunction =
448 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
449 const SCEVUnknown *BasePointer =
450 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
452 assert(BasePointer && "Could not find base pointer");
454 auto &InsnToMemAcc = scop->getInsnToMemAccMap();
455 auto AccItr = InsnToMemAcc.find(Inst);
456 if (AccItr == InsnToMemAcc.end())
457 return false;
459 std::vector<const SCEV *> Sizes = {nullptr};
461 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
462 AccItr->second.Shape->DelinearizedSizes.end());
464 // In case only the element size is contained in the 'Sizes' array, the
465 // access does not access a real multi-dimensional array. Hence, we allow
466 // the normal single-dimensional access construction to handle this.
467 if (Sizes.size() == 1)
468 return false;
470 // Remove the element size. This information is already provided by the
471 // ElementSize parameter. In case the element size of this access and the
472 // element size used for delinearization differs the delinearization is
473 // incorrect. Hence, we invalidate the scop.
475 // TODO: Handle delinearization with differing element sizes.
476 auto DelinearizedSize =
477 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
478 Sizes.pop_back();
479 if (ElementSize != DelinearizedSize)
480 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
482 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
483 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
484 return true;
487 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
488 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
490 if (MemIntr == nullptr)
491 return false;
493 auto *L = LI.getLoopFor(Inst->getParent());
494 auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
495 assert(LengthVal);
497 // Check if the length val is actually affine or if we overapproximate it
498 InvariantLoadsSetTy AccessILS;
499 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
501 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
502 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
503 LengthVal, SE, &AccessILS);
504 for (LoadInst *LInst : AccessILS)
505 if (!ScopRIL.count(LInst))
506 LengthIsAffine = false;
507 if (!LengthIsAffine)
508 LengthVal = nullptr;
510 auto *DestPtrVal = MemIntr->getDest();
511 assert(DestPtrVal);
513 auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
514 assert(DestAccFunc);
515 // Ignore accesses to "NULL".
516 // TODO: We could use this to optimize the region further, e.g., intersect
517 // the context with
518 // isl_set_complement(isl_set_params(getDomain()))
519 // as we know it would be undefined to execute this instruction anyway.
520 if (DestAccFunc->isZero())
521 return true;
523 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
524 assert(DestPtrSCEV);
525 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
526 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
527 IntegerType::getInt8Ty(DestPtrVal->getContext()),
528 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
529 Inst.getValueOperand());
531 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
532 if (!MemTrans)
533 return true;
535 auto *SrcPtrVal = MemTrans->getSource();
536 assert(SrcPtrVal);
538 auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
539 assert(SrcAccFunc);
540 // Ignore accesses to "NULL".
541 // TODO: See above TODO
542 if (SrcAccFunc->isZero())
543 return true;
545 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
546 assert(SrcPtrSCEV);
547 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
548 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
549 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
550 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
551 Inst.getValueOperand());
553 return true;
556 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
557 auto *CI = dyn_cast_or_null<CallInst>(Inst);
559 if (CI == nullptr)
560 return false;
562 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
563 return true;
565 bool ReadOnly = false;
566 auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
567 auto *CalledFunction = CI->getCalledFunction();
568 switch (AA.getModRefBehavior(CalledFunction)) {
569 case FMRB_UnknownModRefBehavior:
570 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
571 case FMRB_DoesNotAccessMemory:
572 return true;
573 case FMRB_DoesNotReadMemory:
574 case FMRB_OnlyAccessesInaccessibleMem:
575 case FMRB_OnlyAccessesInaccessibleOrArgMem:
576 return false;
577 case FMRB_OnlyReadsMemory:
578 GlobalReads.emplace_back(Stmt, CI);
579 return true;
580 case FMRB_OnlyReadsArgumentPointees:
581 ReadOnly = true;
582 // Fall through
583 case FMRB_OnlyAccessesArgumentPointees: {
584 auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
585 Loop *L = LI.getLoopFor(Inst->getParent());
586 for (const auto &Arg : CI->arg_operands()) {
587 if (!Arg->getType()->isPointerTy())
588 continue;
590 auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
591 if (ArgSCEV->isZero())
592 continue;
594 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
595 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
596 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
598 return true;
602 return true;
605 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
606 Value *Address = Inst.getPointerOperand();
607 Value *Val = Inst.getValueOperand();
608 Type *ElementType = Val->getType();
609 enum MemoryAccess::AccessType AccType =
610 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
612 const SCEV *AccessFunction =
613 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
614 const SCEVUnknown *BasePointer =
615 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
617 assert(BasePointer && "Could not find base pointer");
618 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
620 // Check if the access depends on a loop contained in a non-affine subregion.
621 bool isVariantInNonAffineLoop = false;
622 SetVector<const Loop *> Loops;
623 findLoops(AccessFunction, Loops);
624 for (const Loop *L : Loops)
625 if (Stmt->contains(L)) {
626 isVariantInNonAffineLoop = true;
627 break;
630 InvariantLoadsSetTy AccessILS;
632 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
633 bool IsAffine = !isVariantInNonAffineLoop &&
634 isAffineExpr(&scop->getRegion(), SurroundingLoop,
635 AccessFunction, SE, &AccessILS);
637 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
638 for (LoadInst *LInst : AccessILS)
639 if (!ScopRIL.count(LInst))
640 IsAffine = false;
642 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
643 AccType = MemoryAccess::MAY_WRITE;
645 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
646 IsAffine, {AccessFunction}, {nullptr}, Val);
649 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
650 if (buildAccessMemIntrinsic(Inst, Stmt))
651 return;
653 if (buildAccessCallInst(Inst, Stmt))
654 return;
656 if (buildAccessMultiDimFixed(Inst, Stmt))
657 return;
659 if (buildAccessMultiDimParam(Inst, Stmt))
660 return;
662 buildAccessSingleDim(Inst, Stmt);
665 void ScopBuilder::buildAccessFunctions() {
666 for (auto &Stmt : *scop) {
667 if (Stmt.isBlockStmt()) {
668 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
669 continue;
672 Region *R = Stmt.getRegion();
673 for (BasicBlock *BB : R->blocks())
674 buildAccessFunctions(&Stmt, *BB, R);
677 // Build write accesses for values that are used after the SCoP.
678 // The instructions defining them might be synthesizable and therefore not
679 // contained in any statement, hence we iterate over the original instructions
680 // to identify all escaping values.
681 for (BasicBlock *BB : scop->getRegion().blocks()) {
682 for (Instruction &Inst : *BB)
683 buildEscapingDependences(&Inst);
687 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
688 return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
689 !canSynthesize(Inst, *scop, &SE, L);
692 /// Generate a name for a statement.
694 /// @param BB The basic block the statement will represent.
695 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
696 /// @param Count The index of the created statement in @p BB.
697 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
698 /// no suffix will be added.
699 /// @param IsLast Uses a special indicator for the last statement of a BB.
700 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
701 bool IsMain, bool IsLast = false) {
702 std::string Suffix;
703 if (!IsMain) {
704 if (UseInstructionNames)
705 Suffix = '_';
706 if (IsLast)
707 Suffix += "last";
708 else if (Count < 26)
709 Suffix += 'a' + Count;
710 else
711 Suffix += std::to_string(Count);
713 return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
716 /// Generate a name for a statement that represents a non-affine subregion.
718 /// @param R The region the statement will represent.
719 /// @param RIdx The index of the @p R relative to other BBs/regions.
720 static std::string makeStmtName(Region *R, long RIdx) {
721 return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
722 UseInstructionNames);
725 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
726 Loop *SurroundingLoop = LI.getLoopFor(BB);
728 int Count = 0;
729 long BBIdx = scop->getNextStmtIdx();
730 std::vector<Instruction *> Instructions;
731 for (Instruction &Inst : *BB) {
732 if (shouldModelInst(&Inst, SurroundingLoop))
733 Instructions.push_back(&Inst);
734 if (Inst.getMetadata("polly_split_after") ||
735 (SplitOnStore && isa<StoreInst>(Inst))) {
736 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
737 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
738 Count++;
739 Instructions.clear();
743 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
744 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
747 /// Is @p Inst an ordered instruction?
749 /// An unordered instruction is an instruction, such that a sequence of
750 /// unordered instructions can be permuted without changing semantics. Any
751 /// instruction for which this is not always the case is ordered.
752 static bool isOrderedInstruction(Instruction *Inst) {
753 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
756 /// Join instructions to the same statement if one uses the scalar result of the
757 /// other.
758 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
759 ArrayRef<Instruction *> ModeledInsts) {
760 for (Instruction *Inst : ModeledInsts) {
761 if (isa<PHINode>(Inst))
762 continue;
764 for (Use &Op : Inst->operands()) {
765 Instruction *OpInst = dyn_cast<Instruction>(Op.get());
766 if (!OpInst)
767 continue;
769 // Check if OpInst is in the BB and is a modeled instruction.
770 auto OpVal = UnionFind.findValue(OpInst);
771 if (OpVal == UnionFind.end())
772 continue;
774 UnionFind.unionSets(Inst, OpInst);
779 /// Ensure that the order of ordered instructions does not change.
781 /// If we encounter an ordered instruction enclosed in instructions belonging to
782 /// a different statement (which might as well contain ordered instructions, but
783 /// this is not tested here), join them.
784 static void
785 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
786 ArrayRef<Instruction *> ModeledInsts) {
787 SetVector<Instruction *> SeenLeaders;
788 for (Instruction *Inst : ModeledInsts) {
789 if (!isOrderedInstruction(Inst))
790 continue;
792 Instruction *Leader = UnionFind.getLeaderValue(Inst);
793 bool Inserted = SeenLeaders.insert(Leader);
794 if (Inserted)
795 continue;
797 // Merge statements to close holes. Say, we have already seen statements A
798 // and B, in this order. Then we see an instruction of A again and we would
799 // see the pattern "A B A". This function joins all statements until the
800 // only seen occurrence of A.
801 for (Instruction *Prev : reverse(SeenLeaders)) {
802 // Items added to 'SeenLeaders' are leaders, but may have lost their
803 // leadership status when merged into another statement.
804 Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
805 if (PrevLeader == Leader)
806 break;
807 UnionFind.unionSets(Prev, Leader);
812 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
813 /// the incoming values from this block are executed after the PHI READ.
815 /// Otherwise it could overwrite the incoming value from before the BB with the
816 /// value for the next execution. This can happen if the PHI WRITE is added to
817 /// the statement with the instruction that defines the incoming value (instead
818 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
819 /// are in order, we put both into the statement. PHI WRITEs are always executed
820 /// after PHI READs when they are in the same statement.
822 /// TODO: This is an overpessimization. We only have to ensure that the PHI
823 /// WRITE is not put into a statement containing the PHI itself. That could also
824 /// be done by
825 /// - having all (strongly connected) PHIs in a single statement,
826 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
827 /// has a chance of being lifted before a PHI by being in a statement with a
828 /// PHI that comes before in the basic block), or
829 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
830 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
831 ArrayRef<Instruction *> ModeledInsts) {
832 for (Instruction *Inst : ModeledInsts) {
833 PHINode *PHI = dyn_cast<PHINode>(Inst);
834 if (!PHI)
835 continue;
837 int Idx = PHI->getBasicBlockIndex(PHI->getParent());
838 if (Idx < 0)
839 continue;
841 Instruction *IncomingVal =
842 dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
843 if (!IncomingVal)
844 continue;
846 UnionFind.unionSets(PHI, IncomingVal);
850 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
851 Loop *L = LI.getLoopFor(BB);
853 // Extracting out modeled instructions saves us from checking
854 // shouldModelInst() repeatedly.
855 SmallVector<Instruction *, 32> ModeledInsts;
856 EquivalenceClasses<Instruction *> UnionFind;
857 Instruction *MainInst = nullptr;
858 for (Instruction &Inst : *BB) {
859 if (!shouldModelInst(&Inst, L))
860 continue;
861 ModeledInsts.push_back(&Inst);
862 UnionFind.insert(&Inst);
864 // When a BB is split into multiple statements, the main statement is the
865 // one containing the 'main' instruction. We select the first instruction
866 // that is unlikely to be removed (because it has side-effects) as the main
867 // one. It is used to ensure that at least one statement from the bb has the
868 // same name as with -polly-stmt-granularity=bb.
869 if (!MainInst && (isa<StoreInst>(Inst) ||
870 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
871 MainInst = &Inst;
874 joinOperandTree(UnionFind, ModeledInsts);
875 joinOrderedInstructions(UnionFind, ModeledInsts);
876 joinOrderedPHIs(UnionFind, ModeledInsts);
878 // The list of instructions for statement (statement represented by the leader
879 // instruction). The order of statements instructions is reversed such that
880 // the epilogue is first. This makes it easier to ensure that the epilogue is
881 // the last statement.
882 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
884 // Collect the instructions of all leaders. UnionFind's member iterator
885 // unfortunately are not in any specific order.
886 for (Instruction &Inst : reverse(*BB)) {
887 auto LeaderIt = UnionFind.findLeader(&Inst);
888 if (LeaderIt == UnionFind.member_end())
889 continue;
891 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
892 InstList.push_back(&Inst);
895 // Finally build the statements.
896 int Count = 0;
897 long BBIdx = scop->getNextStmtIdx();
898 bool MainFound = false;
899 for (auto &Instructions : reverse(LeaderToInstList)) {
900 std::vector<Instruction *> &InstList = Instructions.second;
902 // If there is no main instruction, make the first statement the main.
903 bool IsMain;
904 if (MainInst)
905 IsMain = std::find(InstList.begin(), InstList.end(), MainInst) !=
906 InstList.end();
907 else
908 IsMain = (Count == 0);
909 if (IsMain)
910 MainFound = true;
912 std::reverse(InstList.begin(), InstList.end());
913 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
914 scop->addScopStmt(BB, Name, L, std::move(InstList));
915 Count += 1;
918 // Unconditionally add an epilogue (last statement). It contains no
919 // instructions, but holds the PHI write accesses for successor basic blocks,
920 // if the incoming value is not defined in another statement if the same BB.
921 // The epilogue will be removed if no PHIWrite is added to it.
922 std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true);
923 scop->addScopStmt(BB, EpilogueName, L, {});
926 void ScopBuilder::buildStmts(Region &SR) {
927 if (scop->isNonAffineSubRegion(&SR)) {
928 std::vector<Instruction *> Instructions;
929 Loop *SurroundingLoop =
930 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
931 for (Instruction &Inst : *SR.getEntry())
932 if (shouldModelInst(&Inst, SurroundingLoop))
933 Instructions.push_back(&Inst);
934 long RIdx = scop->getNextStmtIdx();
935 std::string Name = makeStmtName(&SR, RIdx);
936 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
937 return;
940 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
941 if (I->isSubRegion())
942 buildStmts(*I->getNodeAs<Region>());
943 else {
944 BasicBlock *BB = I->getNodeAs<BasicBlock>();
945 switch (StmtGranularity) {
946 case GranularityChoice::BasicBlocks:
947 buildSequentialBlockStmts(BB);
948 break;
949 case GranularityChoice::ScalarIndependence:
950 buildEqivClassBlockStmts(BB);
951 break;
952 case GranularityChoice::Stores:
953 buildSequentialBlockStmts(BB, true);
954 break;
959 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
960 Region *NonAffineSubRegion) {
961 assert(
962 Stmt &&
963 "The exit BB is the only one that cannot be represented by a statement");
964 assert(Stmt->represents(&BB));
966 // We do not build access functions for error blocks, as they may contain
967 // instructions we can not model.
968 if (isErrorBlock(BB, scop->getRegion(), LI, DT))
969 return;
971 auto BuildAccessesForInst = [this, Stmt,
972 NonAffineSubRegion](Instruction *Inst) {
973 PHINode *PHI = dyn_cast<PHINode>(Inst);
974 if (PHI)
975 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
977 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
978 assert(Stmt && "Cannot build access function in non-existing statement");
979 buildMemoryAccess(MemInst, Stmt);
982 // PHI nodes have already been modeled above and TerminatorInsts that are
983 // not part of a non-affine subregion are fully modeled and regenerated
984 // from the polyhedral domains. Hence, they do not need to be modeled as
985 // explicit data dependences.
986 if (!PHI)
987 buildScalarDependences(Stmt, Inst);
990 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
991 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
992 if (IsEntryBlock) {
993 for (Instruction *Inst : Stmt->getInstructions())
994 BuildAccessesForInst(Inst);
995 if (Stmt->isRegionStmt())
996 BuildAccessesForInst(BB.getTerminator());
997 } else {
998 for (Instruction &Inst : BB) {
999 if (isIgnoredIntrinsic(&Inst))
1000 continue;
1002 // Invariant loads already have been processed.
1003 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
1004 continue;
1006 BuildAccessesForInst(&Inst);
1011 MemoryAccess *ScopBuilder::addMemoryAccess(
1012 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
1013 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
1014 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
1015 MemoryKind Kind) {
1016 bool isKnownMustAccess = false;
1018 // Accesses in single-basic block statements are always executed.
1019 if (Stmt->isBlockStmt())
1020 isKnownMustAccess = true;
1022 if (Stmt->isRegionStmt()) {
1023 // Accesses that dominate the exit block of a non-affine region are always
1024 // executed. In non-affine regions there may exist MemoryKind::Values that
1025 // do not dominate the exit. MemoryKind::Values will always dominate the
1026 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1027 // non-affine region.
1028 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
1029 isKnownMustAccess = true;
1032 // Non-affine PHI writes do not "happen" at a particular instruction, but
1033 // after exiting the statement. Therefore they are guaranteed to execute and
1034 // overwrite the old value.
1035 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
1036 isKnownMustAccess = true;
1038 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
1039 AccType = MemoryAccess::MAY_WRITE;
1041 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
1042 Affine, Subscripts, Sizes, AccessValue, Kind);
1044 scop->addAccessFunction(Access);
1045 Stmt->addAccess(Access);
1046 return Access;
1049 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1050 MemoryAccess::AccessType AccType,
1051 Value *BaseAddress, Type *ElementType,
1052 bool IsAffine,
1053 ArrayRef<const SCEV *> Subscripts,
1054 ArrayRef<const SCEV *> Sizes,
1055 Value *AccessValue) {
1056 ArrayBasePointers.insert(BaseAddress);
1057 auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1058 ElementType, IsAffine, AccessValue,
1059 Subscripts, Sizes, MemoryKind::Array);
1061 if (!DetectFortranArrays)
1062 return;
1064 if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1065 MemAccess->setFortranArrayDescriptor(FAD);
1066 else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1067 MemAccess->setFortranArrayDescriptor(FAD);
1070 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1071 // Find the statement that defines the value of Inst. That statement has to
1072 // write the value to make it available to those statements that read it.
1073 ScopStmt *Stmt = scop->getStmtFor(Inst);
1075 // It is possible that the value is synthesizable within a loop (such that it
1076 // is not part of any statement), but not after the loop (where you need the
1077 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1078 // avoid this. In case the IR has no such PHI, use the last statement (where
1079 // the value is synthesizable) to write the value.
1080 if (!Stmt)
1081 Stmt = scop->getLastStmtFor(Inst->getParent());
1083 // Inst not defined within this SCoP.
1084 if (!Stmt)
1085 return;
1087 // Do not process further if the instruction is already written.
1088 if (Stmt->lookupValueWriteOf(Inst))
1089 return;
1091 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1092 true, Inst, ArrayRef<const SCEV *>(),
1093 ArrayRef<const SCEV *>(), MemoryKind::Value);
1096 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1097 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1098 // to be able to replace this one. Currently, there is a split responsibility.
1099 // In a first step, the MemoryAccess is created, but without the
1100 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1101 // AccessRelation is created. At least for scalar accesses, there is no new
1102 // information available at ScopStmt::buildAccessRelations(), so we could
1103 // create the AccessRelation right away. This is what
1104 // ScopStmt::ensureValueRead(Value*) does.
1106 auto *Scope = UserStmt->getSurroundingLoop();
1107 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1108 switch (VUse.getKind()) {
1109 case VirtualUse::Constant:
1110 case VirtualUse::Block:
1111 case VirtualUse::Synthesizable:
1112 case VirtualUse::Hoisted:
1113 case VirtualUse::Intra:
1114 // Uses of these kinds do not need a MemoryAccess.
1115 break;
1117 case VirtualUse::ReadOnly:
1118 // Add MemoryAccess for invariant values only if requested.
1119 if (!ModelReadOnlyScalars)
1120 break;
1122 LLVM_FALLTHROUGH;
1123 case VirtualUse::Inter:
1125 // Do not create another MemoryAccess for reloading the value if one already
1126 // exists.
1127 if (UserStmt->lookupValueReadOf(V))
1128 break;
1130 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1131 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1132 MemoryKind::Value);
1134 // Inter-statement uses need to write the value in their defining statement.
1135 if (VUse.isInter())
1136 ensureValueWrite(cast<Instruction>(V));
1137 break;
1141 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1142 BasicBlock *IncomingBlock,
1143 Value *IncomingValue, bool IsExitBlock) {
1144 // As the incoming block might turn out to be an error statement ensure we
1145 // will create an exit PHI SAI object. It is needed during code generation
1146 // and would be created later anyway.
1147 if (IsExitBlock)
1148 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1149 MemoryKind::ExitPHI);
1151 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1152 // from outside the SCoP's region have no statement representation.
1153 if (!IncomingStmt)
1154 return;
1156 // Take care for the incoming value being available in the incoming block.
1157 // This must be done before the check for multiple PHI writes because multiple
1158 // exiting edges from subregion each can be the effective written value of the
1159 // subregion. As such, all of them must be made available in the subregion
1160 // statement.
1161 ensureValueRead(IncomingValue, IncomingStmt);
1163 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1164 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1165 assert(Acc->getAccessInstruction() == PHI);
1166 Acc->addIncoming(IncomingBlock, IncomingValue);
1167 return;
1170 MemoryAccess *Acc = addMemoryAccess(
1171 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1172 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1173 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
1174 assert(Acc);
1175 Acc->addIncoming(IncomingBlock, IncomingValue);
1178 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1179 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1180 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1181 MemoryKind::PHI);
1184 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1185 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1187 Stmt.Domain = scop->getDomainConditions(&Stmt);
1188 Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1191 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1192 isl::set Domain = Stmt.getDomain();
1193 BasicBlock *BB = Stmt.getEntryBlock();
1195 Loop *L = LI.getLoopFor(BB);
1197 while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
1198 L = L->getParentLoop();
1200 SmallVector<llvm::Loop *, 8> Loops;
1202 while (L && Stmt.getParent()->getRegion().contains(L)) {
1203 Loops.push_back(L);
1204 L = L->getParentLoop();
1207 Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
1210 /// Return the reduction type for a given binary operator.
1211 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1212 const Instruction *Load) {
1213 if (!BinOp)
1214 return MemoryAccess::RT_NONE;
1215 switch (BinOp->getOpcode()) {
1216 case Instruction::FAdd:
1217 if (!BinOp->isFast())
1218 return MemoryAccess::RT_NONE;
1219 // Fall through
1220 case Instruction::Add:
1221 return MemoryAccess::RT_ADD;
1222 case Instruction::Or:
1223 return MemoryAccess::RT_BOR;
1224 case Instruction::Xor:
1225 return MemoryAccess::RT_BXOR;
1226 case Instruction::And:
1227 return MemoryAccess::RT_BAND;
1228 case Instruction::FMul:
1229 if (!BinOp->isFast())
1230 return MemoryAccess::RT_NONE;
1231 // Fall through
1232 case Instruction::Mul:
1233 if (DisableMultiplicativeReductions)
1234 return MemoryAccess::RT_NONE;
1235 return MemoryAccess::RT_MUL;
1236 default:
1237 return MemoryAccess::RT_NONE;
1241 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1242 SmallVector<MemoryAccess *, 2> Loads;
1243 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1245 // First collect candidate load-store reduction chains by iterating over all
1246 // stores and collecting possible reduction loads.
1247 for (MemoryAccess *StoreMA : Stmt) {
1248 if (StoreMA->isRead())
1249 continue;
1251 Loads.clear();
1252 collectCandidateReductionLoads(StoreMA, Loads);
1253 for (MemoryAccess *LoadMA : Loads)
1254 Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1257 // Then check each possible candidate pair.
1258 for (const auto &CandidatePair : Candidates) {
1259 bool Valid = true;
1260 isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1261 isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1263 // Skip those with obviously unequal base addresses.
1264 if (!LoadAccs.has_equal_space(StoreAccs)) {
1265 continue;
1268 // And check if the remaining for overlap with other memory accesses.
1269 isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1270 AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1271 isl::set AllAccs = AllAccsRel.range();
1273 for (MemoryAccess *MA : Stmt) {
1274 if (MA == CandidatePair.first || MA == CandidatePair.second)
1275 continue;
1277 isl::map AccRel =
1278 MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1279 isl::set Accs = AccRel.range();
1281 if (AllAccs.has_equal_space(Accs)) {
1282 isl::set OverlapAccs = Accs.intersect(AllAccs);
1283 Valid = Valid && OverlapAccs.is_empty();
1287 if (!Valid)
1288 continue;
1290 const LoadInst *Load =
1291 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1292 MemoryAccess::ReductionType RT =
1293 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1295 // If no overlapping access was found we mark the load and store as
1296 // reduction like.
1297 CandidatePair.first->markAsReductionLike(RT);
1298 CandidatePair.second->markAsReductionLike(RT);
1302 void ScopBuilder::collectCandidateReductionLoads(
1303 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1304 ScopStmt *Stmt = StoreMA->getStatement();
1306 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1307 if (!Store)
1308 return;
1310 // Skip if there is not one binary operator between the load and the store
1311 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1312 if (!BinOp)
1313 return;
1315 // Skip if the binary operators has multiple uses
1316 if (BinOp->getNumUses() != 1)
1317 return;
1319 // Skip if the opcode of the binary operator is not commutative/associative
1320 if (!BinOp->isCommutative() || !BinOp->isAssociative())
1321 return;
1323 // Skip if the binary operator is outside the current SCoP
1324 if (BinOp->getParent() != Store->getParent())
1325 return;
1327 // Skip if it is a multiplicative reduction and we disabled them
1328 if (DisableMultiplicativeReductions &&
1329 (BinOp->getOpcode() == Instruction::Mul ||
1330 BinOp->getOpcode() == Instruction::FMul))
1331 return;
1333 // Check the binary operator operands for a candidate load
1334 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1335 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1336 if (!PossibleLoad0 && !PossibleLoad1)
1337 return;
1339 // A load is only a candidate if it cannot escape (thus has only this use)
1340 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1341 if (PossibleLoad0->getParent() == Store->getParent())
1342 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1343 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1344 if (PossibleLoad1->getParent() == Store->getParent())
1345 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1348 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1349 for (MemoryAccess *Access : Stmt.MemAccs) {
1350 Type *ElementType = Access->getElementType();
1352 MemoryKind Ty;
1353 if (Access->isPHIKind())
1354 Ty = MemoryKind::PHI;
1355 else if (Access->isExitPHIKind())
1356 Ty = MemoryKind::ExitPHI;
1357 else if (Access->isValueKind())
1358 Ty = MemoryKind::Value;
1359 else
1360 Ty = MemoryKind::Array;
1362 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1363 ElementType, Access->Sizes, Ty);
1364 Access->buildAccessRelation(SAI);
1365 scop->addAccessData(Access);
1369 #ifndef NDEBUG
1370 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1371 auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1372 auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1373 assert(PhysUse.getKind() == VirtUse.getKind());
1376 /// Check the consistency of every statement's MemoryAccesses.
1378 /// The check is carried out by expecting the "physical" kind of use (derived
1379 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1380 /// kind of use (derived from a statement's MemoryAccess).
1382 /// The "physical" uses are taken by ensureValueRead to determine whether to
1383 /// create MemoryAccesses. When done, the kind of scalar access should be the
1384 /// same no matter which way it was derived.
1386 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1387 /// can intentionally influence on the kind of uses (not corresponding to the
1388 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1389 /// to pick up the virtual uses. But here in the code generator, this has not
1390 /// happened yet, such that virtual and physical uses are equivalent.
1391 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1392 for (auto *BB : S->getRegion().blocks()) {
1393 for (auto &Inst : *BB) {
1394 auto *Stmt = S->getStmtFor(&Inst);
1395 if (!Stmt)
1396 continue;
1398 if (isIgnoredIntrinsic(&Inst))
1399 continue;
1401 // Branch conditions are encoded in the statement domains.
1402 if (Inst.isTerminator() && Stmt->isBlockStmt())
1403 continue;
1405 // Verify all uses.
1406 for (auto &Op : Inst.operands())
1407 verifyUse(S, Op, LI);
1409 // Stores do not produce values used by other statements.
1410 if (isa<StoreInst>(Inst))
1411 continue;
1413 // For every value defined in the block, also check that a use of that
1414 // value in the same statement would not be an inter-statement use. It can
1415 // still be synthesizable or load-hoisted, but these kind of instructions
1416 // are not directly copied in code-generation.
1417 auto VirtDef =
1418 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1419 assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1420 VirtDef.getKind() == VirtualUse::Intra ||
1421 VirtDef.getKind() == VirtualUse::Hoisted);
1425 if (S->hasSingleExitEdge())
1426 return;
1428 // PHINodes in the SCoP region's exit block are also uses to be checked.
1429 if (!S->getRegion().isTopLevelRegion()) {
1430 for (auto &Inst : *S->getRegion().getExit()) {
1431 if (!isa<PHINode>(Inst))
1432 break;
1434 for (auto &Op : Inst.operands())
1435 verifyUse(S, Op, LI);
1439 #endif
1441 /// Return the block that is the representing block for @p RN.
1442 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1443 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1444 : RN->getNodeAs<BasicBlock>();
1447 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1448 OptimizationRemarkEmitter &ORE) {
1449 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1451 buildStmts(R);
1453 // Create all invariant load instructions first. These are categorized as
1454 // 'synthesizable', therefore are not part of any ScopStmt but need to be
1455 // created somewhere.
1456 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1457 for (BasicBlock *BB : scop->getRegion().blocks()) {
1458 if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1459 continue;
1461 for (Instruction &Inst : *BB) {
1462 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1463 if (!Load)
1464 continue;
1466 if (!RIL.count(Load))
1467 continue;
1469 // Invariant loads require a MemoryAccess to be created in some statement.
1470 // It is not important to which statement the MemoryAccess is added
1471 // because it will later be removed from the ScopStmt again. We chose the
1472 // first statement of the basic block the LoadInst is in.
1473 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1474 assert(!List.empty());
1475 ScopStmt *RILStmt = List.front();
1476 buildMemoryAccess(Load, RILStmt);
1479 buildAccessFunctions();
1481 // In case the region does not have an exiting block we will later (during
1482 // code generation) split the exit block. This will move potential PHI nodes
1483 // from the current exit block into the new region exiting block. Hence, PHI
1484 // nodes that are at this point not part of the region will be.
1485 // To handle these PHI nodes later we will now model their operands as scalar
1486 // accesses. Note that we do not model anything in the exit block if we have
1487 // an exiting block in the region, as there will not be any splitting later.
1488 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
1489 for (Instruction &Inst : *R.getExit()) {
1490 PHINode *PHI = dyn_cast<PHINode>(&Inst);
1491 if (!PHI)
1492 break;
1494 buildPHIAccesses(nullptr, PHI, nullptr, true);
1498 // Create memory accesses for global reads since all arrays are now known.
1499 auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1500 for (auto GlobalReadPair : GlobalReads) {
1501 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1502 Instruction *GlobalRead = GlobalReadPair.second;
1503 for (auto *BP : ArrayBasePointers)
1504 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1505 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1508 scop->buildInvariantEquivalenceClasses();
1510 /// A map from basic blocks to their invalid domains.
1511 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1513 if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1514 LLVM_DEBUG(
1515 dbgs() << "Bailing-out because buildDomains encountered problems\n");
1516 return;
1519 scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1521 // Initialize the invalid domain.
1522 for (ScopStmt &Stmt : scop->Stmts)
1523 if (Stmt.isBlockStmt())
1524 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1525 else
1526 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1527 Stmt.getRegion()->getNode())]);
1529 // Remove empty statements.
1530 // Exit early in case there are no executable statements left in this scop.
1531 scop->removeStmtNotInDomainMap();
1532 scop->simplifySCoP(false);
1533 if (scop->isEmpty()) {
1534 LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1535 return;
1538 // The ScopStmts now have enough information to initialize themselves.
1539 for (ScopStmt &Stmt : *scop) {
1540 collectSurroundingLoops(Stmt);
1542 buildDomain(Stmt);
1543 buildAccessRelations(Stmt);
1545 if (DetectReductions)
1546 checkForReductions(Stmt);
1549 // Check early for a feasible runtime context.
1550 if (!scop->hasFeasibleRuntimeContext()) {
1551 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1552 return;
1555 // Check early for profitability. Afterwards it cannot change anymore,
1556 // only the runtime context could become infeasible.
1557 if (!scop->isProfitable(UnprofitableScalarAccs)) {
1558 scop->invalidate(PROFITABLE, DebugLoc());
1559 LLVM_DEBUG(
1560 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1561 return;
1564 scop->buildSchedule(LI);
1566 scop->finalizeAccesses();
1568 scop->realignParams();
1569 scop->addUserContext();
1571 // After the context was fully constructed, thus all our knowledge about
1572 // the parameters is in there, we add all recorded assumptions to the
1573 // assumed/invalid context.
1574 scop->addRecordedAssumptions();
1576 scop->simplifyContexts();
1577 if (!scop->buildAliasChecks(AA)) {
1578 LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1579 return;
1582 scop->hoistInvariantLoads();
1583 scop->canonicalizeDynamicBasePtrs();
1584 scop->verifyInvariantLoads();
1585 scop->simplifySCoP(true);
1587 // Check late for a feasible runtime context because profitability did not
1588 // change.
1589 if (!scop->hasFeasibleRuntimeContext()) {
1590 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1591 return;
1594 #ifndef NDEBUG
1595 verifyUses(scop.get(), LI, DT);
1596 #endif
1599 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1600 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1601 ScopDetection &SD, ScalarEvolution &SE,
1602 OptimizationRemarkEmitter &ORE)
1603 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1604 DebugLoc Beg, End;
1605 auto P = getBBPairForRegion(R);
1606 getDebugLocations(P, Beg, End);
1608 std::string Msg = "SCoP begins here.";
1609 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1610 << Msg);
1612 buildScop(*R, AC, ORE);
1614 LLVM_DEBUG(dbgs() << *scop);
1616 if (!scop->hasFeasibleRuntimeContext()) {
1617 InfeasibleScops++;
1618 Msg = "SCoP ends here but was dismissed.";
1619 LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
1620 scop.reset();
1621 } else {
1622 Msg = "SCoP ends here.";
1623 ++ScopFound;
1624 if (scop->getMaxLoopDepth() > 0)
1625 ++RichScopFound;
1628 if (R->isTopLevelRegion())
1629 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1630 << Msg);
1631 else
1632 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1633 << Msg);