[ScopBuilder] Introduce -polly-stmt-granularity option. NFC.
[polly-mirror.git] / lib / Analysis / ScopBuilder.cpp
blob88d9cc665dfac801692452dc0373fcaee283b83a
1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopDetection.h"
20 #include "polly/ScopDetectionDiagnostic.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SetVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
33 #include "llvm/Analysis/RegionInfo.h"
34 #include "llvm/Analysis/RegionIterator.h"
35 #include "llvm/Analysis/ScalarEvolution.h"
36 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/DiagnosticInfo.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/ErrorHandling.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include <cassert>
60 #include <string>
61 #include <tuple>
62 #include <vector>
64 using namespace llvm;
65 using namespace polly;
67 #define DEBUG_TYPE "polly-scops"
69 STATISTIC(ScopFound, "Number of valid Scops");
70 STATISTIC(RichScopFound, "Number of Scops containing a loop");
71 STATISTIC(InfeasibleScops,
72 "Number of SCoPs with statically infeasible context.");
74 bool polly::ModelReadOnlyScalars;
76 static cl::opt<bool, true> XModelReadOnlyScalars(
77 "polly-analyze-read-only-scalars",
78 cl::desc("Model read-only scalar values in the scop description"),
79 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
80 cl::init(true), cl::cat(PollyCategory));
82 static cl::opt<bool> UnprofitableScalarAccs(
83 "polly-unprofitable-scalar-accs",
84 cl::desc("Count statements with scalar accesses as not optimizable"),
85 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
87 static cl::opt<bool> DetectFortranArrays(
88 "polly-detect-fortran-arrays",
89 cl::desc("Detect Fortran arrays and use this for code generation"),
90 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
92 static cl::opt<bool> DetectReductions("polly-detect-reductions",
93 cl::desc("Detect and exploit reductions"),
94 cl::Hidden, cl::ZeroOrMore,
95 cl::init(true), cl::cat(PollyCategory));
97 // Multiplicative reductions can be disabled separately as these kind of
98 // operations can overflow easily. Additive reductions and bit operations
99 // are in contrast pretty stable.
100 static cl::opt<bool> DisableMultiplicativeReductions(
101 "polly-disable-multiplicative-reductions",
102 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
103 cl::init(false), cl::cat(PollyCategory));
105 enum class GranularityChoice { BasicBlocks };
107 static cl::opt<GranularityChoice> StmtGranularity(
108 "polly-stmt-granularity",
109 cl::desc(
110 "Algorithm to use for splitting basic blocks into multiple statements"),
111 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
112 "One statement per basic block")),
113 cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory));
115 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
116 Region *NonAffineSubRegion,
117 bool IsExitBlock) {
118 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
119 // true, are not modeled as ordinary PHI nodes as they are not part of the
120 // region. However, we model the operands in the predecessor blocks that are
121 // part of the region as regular scalar accesses.
123 // If we can synthesize a PHI we can skip it, however only if it is in
124 // the region. If it is not it can only be in the exit block of the region.
125 // In this case we model the operands but not the PHI itself.
126 auto *Scope = LI.getLoopFor(PHI->getParent());
127 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
128 return;
130 // PHI nodes are modeled as if they had been demoted prior to the SCoP
131 // detection. Hence, the PHI is a load of a new memory location in which the
132 // incoming value was written at the end of the incoming basic block.
133 bool OnlyNonAffineSubRegionOperands = true;
134 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
135 Value *Op = PHI->getIncomingValue(u);
136 BasicBlock *OpBB = PHI->getIncomingBlock(u);
137 ScopStmt *OpStmt = scop->getLastStmtFor(OpBB);
139 // Do not build PHI dependences inside a non-affine subregion, but make
140 // sure that the necessary scalar values are still made available.
141 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
142 auto *OpInst = dyn_cast<Instruction>(Op);
143 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
144 ensureValueRead(Op, OpStmt);
145 continue;
148 OnlyNonAffineSubRegionOperands = false;
149 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
152 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
153 addPHIReadAccess(PHIStmt, PHI);
157 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
158 Instruction *Inst) {
159 assert(!isa<PHINode>(Inst));
161 // Pull-in required operands.
162 for (Use &Op : Inst->operands())
163 ensureValueRead(Op.get(), UserStmt);
166 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
167 // Check for uses of this instruction outside the scop. Because we do not
168 // iterate over such instructions and therefore did not "ensure" the existence
169 // of a write, we must determine such use here.
170 if (scop->isEscaping(Inst))
171 ensureValueWrite(Inst);
174 /// Check that a value is a Fortran Array descriptor.
176 /// We check if V has the following structure:
177 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
178 /// [<num> x %struct.descriptor_dimension] }
181 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
183 /// 1. V's type name starts with "struct.array"
184 /// 2. V's type has layout as shown.
185 /// 3. Final member of V's type has name "struct.descriptor_dimension",
186 /// 4. "struct.descriptor_dimension" has layout as shown.
187 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
189 /// We are interested in such types since this is the code that dragonegg
190 /// generates for Fortran array descriptors.
192 /// @param V the Value to be checked.
194 /// @returns True if V is a Fortran array descriptor, False otherwise.
195 bool isFortranArrayDescriptor(Value *V) {
196 PointerType *PTy = dyn_cast<PointerType>(V->getType());
198 if (!PTy)
199 return false;
201 Type *Ty = PTy->getElementType();
202 assert(Ty && "Ty expected to be initialized");
203 auto *StructArrTy = dyn_cast<StructType>(Ty);
205 if (!(StructArrTy && StructArrTy->hasName()))
206 return false;
208 if (!StructArrTy->getName().startswith("struct.array"))
209 return false;
211 if (StructArrTy->getNumElements() != 4)
212 return false;
214 const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
216 // i8* match
217 if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
218 return false;
220 // Get a reference to the int type and check that all the members
221 // share the same int type
222 Type *IntTy = ArrMemberTys[1];
223 if (ArrMemberTys[2] != IntTy)
224 return false;
226 // type: [<num> x %struct.descriptor_dimension]
227 ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
228 if (!DescriptorDimArrayTy)
229 return false;
231 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
232 StructType *DescriptorDimTy =
233 dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
235 if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
236 return false;
238 if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
239 return false;
241 if (DescriptorDimTy->getNumElements() != 3)
242 return false;
244 for (auto MemberTy : DescriptorDimTy->elements()) {
245 if (MemberTy != IntTy)
246 return false;
249 return true;
252 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
253 // match: 4.1 & 4.2 store/load
254 if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
255 return nullptr;
257 // match: 4
258 if (Inst.getAlignment() != 8)
259 return nullptr;
261 Value *Address = Inst.getPointerOperand();
263 const BitCastInst *Bitcast = nullptr;
264 // [match: 3]
265 if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
266 Value *TypedMem = Slot->getPointerOperand();
267 // match: 2
268 Bitcast = dyn_cast<BitCastInst>(TypedMem);
269 } else {
270 // match: 2
271 Bitcast = dyn_cast<BitCastInst>(Address);
274 if (!Bitcast)
275 return nullptr;
277 auto *MallocMem = Bitcast->getOperand(0);
279 // match: 1
280 auto *MallocCall = dyn_cast<CallInst>(MallocMem);
281 if (!MallocCall)
282 return nullptr;
284 Function *MallocFn = MallocCall->getCalledFunction();
285 if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
286 return nullptr;
288 // Find all uses the malloc'd memory.
289 // We are looking for a "store" into a struct with the type being the Fortran
290 // descriptor type
291 for (auto user : MallocMem->users()) {
292 /// match: 5
293 auto *MallocStore = dyn_cast<StoreInst>(user);
294 if (!MallocStore)
295 continue;
297 auto *DescriptorGEP =
298 dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
299 if (!DescriptorGEP)
300 continue;
302 // match: 5
303 auto DescriptorType =
304 dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
305 if (!(DescriptorType && DescriptorType->hasName()))
306 continue;
308 Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
310 if (!Descriptor)
311 continue;
313 if (!isFortranArrayDescriptor(Descriptor))
314 continue;
316 return Descriptor;
319 return nullptr;
322 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
323 // match: 3
324 if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
325 return nullptr;
327 Value *Slot = Inst.getPointerOperand();
329 LoadInst *MemLoad = nullptr;
330 // [match: 2]
331 if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
332 // match: 1
333 MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
334 } else {
335 // match: 1
336 MemLoad = dyn_cast<LoadInst>(Slot);
339 if (!MemLoad)
340 return nullptr;
342 auto *BitcastOperator =
343 dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
344 if (!BitcastOperator)
345 return nullptr;
347 Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
348 if (!Descriptor)
349 return nullptr;
351 if (!isFortranArrayDescriptor(Descriptor))
352 return nullptr;
354 return Descriptor;
357 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
358 Value *Val = Inst.getValueOperand();
359 Type *ElementType = Val->getType();
360 Value *Address = Inst.getPointerOperand();
361 const SCEV *AccessFunction =
362 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
363 const SCEVUnknown *BasePointer =
364 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
365 enum MemoryAccess::AccessType AccType =
366 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
368 if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
369 auto *Src = BitCast->getOperand(0);
370 auto *SrcTy = Src->getType();
371 auto *DstTy = BitCast->getType();
372 // Do not try to delinearize non-sized (opaque) pointers.
373 if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
374 (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
375 return false;
377 if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
378 DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
379 DL.getTypeAllocSize(DstTy->getPointerElementType()))
380 Address = Src;
383 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
384 if (!GEP)
385 return false;
387 std::vector<const SCEV *> Subscripts;
388 std::vector<int> Sizes;
389 std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
390 auto *BasePtr = GEP->getOperand(0);
392 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
393 BasePtr = BasePtrCast->getOperand(0);
395 // Check for identical base pointers to ensure that we do not miss index
396 // offsets that have been added before this GEP is applied.
397 if (BasePtr != BasePointer->getValue())
398 return false;
400 std::vector<const SCEV *> SizesSCEV;
402 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
404 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
405 for (auto *Subscript : Subscripts) {
406 InvariantLoadsSetTy AccessILS;
407 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
408 &AccessILS))
409 return false;
411 for (LoadInst *LInst : AccessILS)
412 if (!ScopRIL.count(LInst))
413 return false;
416 if (Sizes.empty())
417 return false;
419 SizesSCEV.push_back(nullptr);
421 for (auto V : Sizes)
422 SizesSCEV.push_back(SE.getSCEV(
423 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
425 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
426 true, Subscripts, SizesSCEV, Val);
427 return true;
430 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
431 if (!PollyDelinearize)
432 return false;
434 Value *Address = Inst.getPointerOperand();
435 Value *Val = Inst.getValueOperand();
436 Type *ElementType = Val->getType();
437 unsigned ElementSize = DL.getTypeAllocSize(ElementType);
438 enum MemoryAccess::AccessType AccType =
439 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
441 const SCEV *AccessFunction =
442 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
443 const SCEVUnknown *BasePointer =
444 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
446 assert(BasePointer && "Could not find base pointer");
448 auto &InsnToMemAcc = scop->getInsnToMemAccMap();
449 auto AccItr = InsnToMemAcc.find(Inst);
450 if (AccItr == InsnToMemAcc.end())
451 return false;
453 std::vector<const SCEV *> Sizes = {nullptr};
455 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
456 AccItr->second.Shape->DelinearizedSizes.end());
458 // In case only the element size is contained in the 'Sizes' array, the
459 // access does not access a real multi-dimensional array. Hence, we allow
460 // the normal single-dimensional access construction to handle this.
461 if (Sizes.size() == 1)
462 return false;
464 // Remove the element size. This information is already provided by the
465 // ElementSize parameter. In case the element size of this access and the
466 // element size used for delinearization differs the delinearization is
467 // incorrect. Hence, we invalidate the scop.
469 // TODO: Handle delinearization with differing element sizes.
470 auto DelinearizedSize =
471 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
472 Sizes.pop_back();
473 if (ElementSize != DelinearizedSize)
474 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
476 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
477 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
478 return true;
481 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
482 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
484 if (MemIntr == nullptr)
485 return false;
487 auto *L = LI.getLoopFor(Inst->getParent());
488 auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
489 assert(LengthVal);
491 // Check if the length val is actually affine or if we overapproximate it
492 InvariantLoadsSetTy AccessILS;
493 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
495 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
496 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
497 LengthVal, SE, &AccessILS);
498 for (LoadInst *LInst : AccessILS)
499 if (!ScopRIL.count(LInst))
500 LengthIsAffine = false;
501 if (!LengthIsAffine)
502 LengthVal = nullptr;
504 auto *DestPtrVal = MemIntr->getDest();
505 assert(DestPtrVal);
507 auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
508 assert(DestAccFunc);
509 // Ignore accesses to "NULL".
510 // TODO: We could use this to optimize the region further, e.g., intersect
511 // the context with
512 // isl_set_complement(isl_set_params(getDomain()))
513 // as we know it would be undefined to execute this instruction anyway.
514 if (DestAccFunc->isZero())
515 return true;
517 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
518 assert(DestPtrSCEV);
519 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
520 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
521 IntegerType::getInt8Ty(DestPtrVal->getContext()),
522 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
523 Inst.getValueOperand());
525 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
526 if (!MemTrans)
527 return true;
529 auto *SrcPtrVal = MemTrans->getSource();
530 assert(SrcPtrVal);
532 auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
533 assert(SrcAccFunc);
534 // Ignore accesses to "NULL".
535 // TODO: See above TODO
536 if (SrcAccFunc->isZero())
537 return true;
539 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
540 assert(SrcPtrSCEV);
541 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
542 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
543 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
544 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
545 Inst.getValueOperand());
547 return true;
550 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
551 auto *CI = dyn_cast_or_null<CallInst>(Inst);
553 if (CI == nullptr)
554 return false;
556 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI))
557 return true;
559 bool ReadOnly = false;
560 auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
561 auto *CalledFunction = CI->getCalledFunction();
562 switch (AA.getModRefBehavior(CalledFunction)) {
563 case FMRB_UnknownModRefBehavior:
564 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
565 case FMRB_DoesNotAccessMemory:
566 return true;
567 case FMRB_DoesNotReadMemory:
568 case FMRB_OnlyAccessesInaccessibleMem:
569 case FMRB_OnlyAccessesInaccessibleOrArgMem:
570 return false;
571 case FMRB_OnlyReadsMemory:
572 GlobalReads.emplace_back(Stmt, CI);
573 return true;
574 case FMRB_OnlyReadsArgumentPointees:
575 ReadOnly = true;
576 // Fall through
577 case FMRB_OnlyAccessesArgumentPointees: {
578 auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
579 Loop *L = LI.getLoopFor(Inst->getParent());
580 for (const auto &Arg : CI->arg_operands()) {
581 if (!Arg->getType()->isPointerTy())
582 continue;
584 auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
585 if (ArgSCEV->isZero())
586 continue;
588 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
589 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
590 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
592 return true;
596 return true;
599 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
600 Value *Address = Inst.getPointerOperand();
601 Value *Val = Inst.getValueOperand();
602 Type *ElementType = Val->getType();
603 enum MemoryAccess::AccessType AccType =
604 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
606 const SCEV *AccessFunction =
607 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
608 const SCEVUnknown *BasePointer =
609 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
611 assert(BasePointer && "Could not find base pointer");
612 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
614 // Check if the access depends on a loop contained in a non-affine subregion.
615 bool isVariantInNonAffineLoop = false;
616 SetVector<const Loop *> Loops;
617 findLoops(AccessFunction, Loops);
618 for (const Loop *L : Loops)
619 if (Stmt->contains(L)) {
620 isVariantInNonAffineLoop = true;
621 break;
624 InvariantLoadsSetTy AccessILS;
626 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
627 bool IsAffine = !isVariantInNonAffineLoop &&
628 isAffineExpr(&scop->getRegion(), SurroundingLoop,
629 AccessFunction, SE, &AccessILS);
631 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
632 for (LoadInst *LInst : AccessILS)
633 if (!ScopRIL.count(LInst))
634 IsAffine = false;
636 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
637 AccType = MemoryAccess::MAY_WRITE;
639 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
640 IsAffine, {AccessFunction}, {nullptr}, Val);
643 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
644 if (buildAccessMemIntrinsic(Inst, Stmt))
645 return;
647 if (buildAccessCallInst(Inst, Stmt))
648 return;
650 if (buildAccessMultiDimFixed(Inst, Stmt))
651 return;
653 if (buildAccessMultiDimParam(Inst, Stmt))
654 return;
656 buildAccessSingleDim(Inst, Stmt);
659 void ScopBuilder::buildAccessFunctions() {
660 for (auto &Stmt : *scop) {
661 if (Stmt.isBlockStmt()) {
662 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
663 continue;
666 Region *R = Stmt.getRegion();
667 for (BasicBlock *BB : R->blocks())
668 buildAccessFunctions(&Stmt, *BB, R);
671 // Build write accesses for values that are used after the SCoP.
672 // The instructions defining them might be synthesizable and therefore not
673 // contained in any statement, hence we iterate over the original instructions
674 // to identify all escaping values.
675 for (BasicBlock *BB : scop->getRegion().blocks()) {
676 for (Instruction &Inst : *BB)
677 buildEscapingDependences(&Inst);
681 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
682 return !isa<TerminatorInst>(Inst) && !isIgnoredIntrinsic(Inst) &&
683 !canSynthesize(Inst, *scop, &SE, L);
686 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB) {
687 Loop *SurroundingLoop = LI.getLoopFor(BB);
689 int Count = 0;
690 std::vector<Instruction *> Instructions;
691 for (Instruction &Inst : *BB) {
692 if (shouldModelInst(&Inst, SurroundingLoop))
693 Instructions.push_back(&Inst);
694 if (Inst.getMetadata("polly_split_after")) {
695 scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
696 Count++;
697 Instructions.clear();
701 scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
704 void ScopBuilder::buildStmts(Region &SR) {
705 if (scop->isNonAffineSubRegion(&SR)) {
706 std::vector<Instruction *> Instructions;
707 Loop *SurroundingLoop =
708 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
709 for (Instruction &Inst : *SR.getEntry())
710 if (shouldModelInst(&Inst, SurroundingLoop))
711 Instructions.push_back(&Inst);
712 scop->addScopStmt(&SR, SurroundingLoop, Instructions);
713 return;
716 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
717 if (I->isSubRegion())
718 buildStmts(*I->getNodeAs<Region>());
719 else {
720 BasicBlock *BB = I->getNodeAs<BasicBlock>();
721 switch (StmtGranularity) {
722 case GranularityChoice::BasicBlocks:
723 buildSequentialBlockStmts(BB);
724 break;
729 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
730 Region *NonAffineSubRegion) {
731 assert(
732 Stmt &&
733 "The exit BB is the only one that cannot be represented by a statement");
734 assert(Stmt->represents(&BB));
736 // We do not build access functions for error blocks, as they may contain
737 // instructions we can not model.
738 if (isErrorBlock(BB, scop->getRegion(), LI, DT))
739 return;
741 auto BuildAccessesForInst = [this, Stmt,
742 NonAffineSubRegion](Instruction *Inst) {
743 PHINode *PHI = dyn_cast<PHINode>(Inst);
744 if (PHI)
745 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
747 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
748 assert(Stmt && "Cannot build access function in non-existing statement");
749 buildMemoryAccess(MemInst, Stmt);
752 // PHI nodes have already been modeled above and TerminatorInsts that are
753 // not part of a non-affine subregion are fully modeled and regenerated
754 // from the polyhedral domains. Hence, they do not need to be modeled as
755 // explicit data dependences.
756 if (!PHI)
757 buildScalarDependences(Stmt, Inst);
760 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
761 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
762 if (IsEntryBlock) {
763 for (Instruction *Inst : Stmt->getInstructions())
764 BuildAccessesForInst(Inst);
765 if (Stmt->isRegionStmt())
766 BuildAccessesForInst(BB.getTerminator());
767 } else {
768 for (Instruction &Inst : BB) {
769 if (isIgnoredIntrinsic(&Inst))
770 continue;
772 // Invariant loads already have been processed.
773 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
774 continue;
776 BuildAccessesForInst(&Inst);
781 MemoryAccess *ScopBuilder::addMemoryAccess(
782 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
783 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
784 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
785 MemoryKind Kind) {
786 bool isKnownMustAccess = false;
788 // Accesses in single-basic block statements are always executed.
789 if (Stmt->isBlockStmt())
790 isKnownMustAccess = true;
792 if (Stmt->isRegionStmt()) {
793 // Accesses that dominate the exit block of a non-affine region are always
794 // executed. In non-affine regions there may exist MemoryKind::Values that
795 // do not dominate the exit. MemoryKind::Values will always dominate the
796 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
797 // non-affine region.
798 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
799 isKnownMustAccess = true;
802 // Non-affine PHI writes do not "happen" at a particular instruction, but
803 // after exiting the statement. Therefore they are guaranteed to execute and
804 // overwrite the old value.
805 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
806 isKnownMustAccess = true;
808 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
809 AccType = MemoryAccess::MAY_WRITE;
811 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
812 Affine, Subscripts, Sizes, AccessValue, Kind);
814 scop->addAccessFunction(Access);
815 Stmt->addAccess(Access);
816 return Access;
819 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
820 MemoryAccess::AccessType AccType,
821 Value *BaseAddress, Type *ElementType,
822 bool IsAffine,
823 ArrayRef<const SCEV *> Subscripts,
824 ArrayRef<const SCEV *> Sizes,
825 Value *AccessValue) {
826 ArrayBasePointers.insert(BaseAddress);
827 auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
828 ElementType, IsAffine, AccessValue,
829 Subscripts, Sizes, MemoryKind::Array);
831 if (!DetectFortranArrays)
832 return;
834 if (Value *FAD = findFADAllocationInvisible(MemAccInst))
835 MemAccess->setFortranArrayDescriptor(FAD);
836 else if (Value *FAD = findFADAllocationVisible(MemAccInst))
837 MemAccess->setFortranArrayDescriptor(FAD);
840 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
841 // Find the statement that defines the value of Inst. That statement has to
842 // write the value to make it available to those statements that read it.
843 ScopStmt *Stmt = scop->getStmtFor(Inst);
845 // It is possible that the value is synthesizable within a loop (such that it
846 // is not part of any statement), but not after the loop (where you need the
847 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
848 // avoid this. In case the IR has no such PHI, use the last statement (where
849 // the value is synthesizable) to write the value.
850 if (!Stmt)
851 Stmt = scop->getLastStmtFor(Inst->getParent());
853 // Inst not defined within this SCoP.
854 if (!Stmt)
855 return;
857 // Do not process further if the instruction is already written.
858 if (Stmt->lookupValueWriteOf(Inst))
859 return;
861 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
862 true, Inst, ArrayRef<const SCEV *>(),
863 ArrayRef<const SCEV *>(), MemoryKind::Value);
866 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
867 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
868 // to be able to replace this one. Currently, there is a split responsibility.
869 // In a first step, the MemoryAccess is created, but without the
870 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
871 // AccessRelation is created. At least for scalar accesses, there is no new
872 // information available at ScopStmt::buildAccessRelations(), so we could
873 // create the AccessRelation right away. This is what
874 // ScopStmt::ensureValueRead(Value*) does.
876 auto *Scope = UserStmt->getSurroundingLoop();
877 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
878 switch (VUse.getKind()) {
879 case VirtualUse::Constant:
880 case VirtualUse::Block:
881 case VirtualUse::Synthesizable:
882 case VirtualUse::Hoisted:
883 case VirtualUse::Intra:
884 // Uses of these kinds do not need a MemoryAccess.
885 break;
887 case VirtualUse::ReadOnly:
888 // Add MemoryAccess for invariant values only if requested.
889 if (!ModelReadOnlyScalars)
890 break;
892 LLVM_FALLTHROUGH;
893 case VirtualUse::Inter:
895 // Do not create another MemoryAccess for reloading the value if one already
896 // exists.
897 if (UserStmt->lookupValueReadOf(V))
898 break;
900 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
901 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
902 MemoryKind::Value);
904 // Inter-statement uses need to write the value in their defining statement.
905 if (VUse.isInter())
906 ensureValueWrite(cast<Instruction>(V));
907 break;
911 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
912 BasicBlock *IncomingBlock,
913 Value *IncomingValue, bool IsExitBlock) {
914 // As the incoming block might turn out to be an error statement ensure we
915 // will create an exit PHI SAI object. It is needed during code generation
916 // and would be created later anyway.
917 if (IsExitBlock)
918 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
919 MemoryKind::ExitPHI);
921 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
922 // from outside the SCoP's region have no statement representation.
923 if (!IncomingStmt)
924 return;
926 // Take care for the incoming value being available in the incoming block.
927 // This must be done before the check for multiple PHI writes because multiple
928 // exiting edges from subregion each can be the effective written value of the
929 // subregion. As such, all of them must be made available in the subregion
930 // statement.
931 ensureValueRead(IncomingValue, IncomingStmt);
933 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
934 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
935 assert(Acc->getAccessInstruction() == PHI);
936 Acc->addIncoming(IncomingBlock, IncomingValue);
937 return;
940 MemoryAccess *Acc = addMemoryAccess(
941 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
942 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
943 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
944 assert(Acc);
945 Acc->addIncoming(IncomingBlock, IncomingValue);
948 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
949 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
950 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
951 MemoryKind::PHI);
954 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
955 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
957 Stmt.Domain = scop->getDomainConditions(&Stmt);
958 Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
961 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
962 isl::set Domain = Stmt.getDomain();
963 for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
964 isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
965 Stmt.NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
969 /// Return the reduction type for a given binary operator.
970 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
971 const Instruction *Load) {
972 if (!BinOp)
973 return MemoryAccess::RT_NONE;
974 switch (BinOp->getOpcode()) {
975 case Instruction::FAdd:
976 if (!BinOp->hasUnsafeAlgebra())
977 return MemoryAccess::RT_NONE;
978 // Fall through
979 case Instruction::Add:
980 return MemoryAccess::RT_ADD;
981 case Instruction::Or:
982 return MemoryAccess::RT_BOR;
983 case Instruction::Xor:
984 return MemoryAccess::RT_BXOR;
985 case Instruction::And:
986 return MemoryAccess::RT_BAND;
987 case Instruction::FMul:
988 if (!BinOp->hasUnsafeAlgebra())
989 return MemoryAccess::RT_NONE;
990 // Fall through
991 case Instruction::Mul:
992 if (DisableMultiplicativeReductions)
993 return MemoryAccess::RT_NONE;
994 return MemoryAccess::RT_MUL;
995 default:
996 return MemoryAccess::RT_NONE;
1000 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1001 SmallVector<MemoryAccess *, 2> Loads;
1002 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1004 // First collect candidate load-store reduction chains by iterating over all
1005 // stores and collecting possible reduction loads.
1006 for (MemoryAccess *StoreMA : Stmt) {
1007 if (StoreMA->isRead())
1008 continue;
1010 Loads.clear();
1011 collectCandidateReductionLoads(StoreMA, Loads);
1012 for (MemoryAccess *LoadMA : Loads)
1013 Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1016 // Then check each possible candidate pair.
1017 for (const auto &CandidatePair : Candidates) {
1018 bool Valid = true;
1019 isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1020 isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1022 // Skip those with obviously unequal base addresses.
1023 if (!LoadAccs.has_equal_space(StoreAccs)) {
1024 continue;
1027 // And check if the remaining for overlap with other memory accesses.
1028 isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1029 AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1030 isl::set AllAccs = AllAccsRel.range();
1032 for (MemoryAccess *MA : Stmt) {
1033 if (MA == CandidatePair.first || MA == CandidatePair.second)
1034 continue;
1036 isl::map AccRel =
1037 MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1038 isl::set Accs = AccRel.range();
1040 if (AllAccs.has_equal_space(Accs)) {
1041 isl::set OverlapAccs = Accs.intersect(AllAccs);
1042 Valid = Valid && OverlapAccs.is_empty();
1046 if (!Valid)
1047 continue;
1049 const LoadInst *Load =
1050 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1051 MemoryAccess::ReductionType RT =
1052 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1054 // If no overlapping access was found we mark the load and store as
1055 // reduction like.
1056 CandidatePair.first->markAsReductionLike(RT);
1057 CandidatePair.second->markAsReductionLike(RT);
1061 void ScopBuilder::collectCandidateReductionLoads(
1062 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1063 ScopStmt *Stmt = StoreMA->getStatement();
1065 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1066 if (!Store)
1067 return;
1069 // Skip if there is not one binary operator between the load and the store
1070 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1071 if (!BinOp)
1072 return;
1074 // Skip if the binary operators has multiple uses
1075 if (BinOp->getNumUses() != 1)
1076 return;
1078 // Skip if the opcode of the binary operator is not commutative/associative
1079 if (!BinOp->isCommutative() || !BinOp->isAssociative())
1080 return;
1082 // Skip if the binary operator is outside the current SCoP
1083 if (BinOp->getParent() != Store->getParent())
1084 return;
1086 // Skip if it is a multiplicative reduction and we disabled them
1087 if (DisableMultiplicativeReductions &&
1088 (BinOp->getOpcode() == Instruction::Mul ||
1089 BinOp->getOpcode() == Instruction::FMul))
1090 return;
1092 // Check the binary operator operands for a candidate load
1093 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1094 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1095 if (!PossibleLoad0 && !PossibleLoad1)
1096 return;
1098 // A load is only a candidate if it cannot escape (thus has only this use)
1099 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1100 if (PossibleLoad0->getParent() == Store->getParent())
1101 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1102 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1103 if (PossibleLoad1->getParent() == Store->getParent())
1104 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1107 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1108 for (MemoryAccess *Access : Stmt.MemAccs) {
1109 Type *ElementType = Access->getElementType();
1111 MemoryKind Ty;
1112 if (Access->isPHIKind())
1113 Ty = MemoryKind::PHI;
1114 else if (Access->isExitPHIKind())
1115 Ty = MemoryKind::ExitPHI;
1116 else if (Access->isValueKind())
1117 Ty = MemoryKind::Value;
1118 else
1119 Ty = MemoryKind::Array;
1121 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1122 ElementType, Access->Sizes, Ty);
1123 Access->buildAccessRelation(SAI);
1124 scop->addAccessData(Access);
1128 #ifndef NDEBUG
1129 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1130 auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1131 auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1132 assert(PhysUse.getKind() == VirtUse.getKind());
1135 /// Check the consistency of every statement's MemoryAccesses.
1137 /// The check is carried out by expecting the "physical" kind of use (derived
1138 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1139 /// kind of use (derived from a statement's MemoryAccess).
1141 /// The "physical" uses are taken by ensureValueRead to determine whether to
1142 /// create MemoryAccesses. When done, the kind of scalar access should be the
1143 /// same no matter which way it was derived.
1145 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1146 /// can intentionally influence on the kind of uses (not corresponding to the
1147 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1148 /// to pick up the virtual uses. But here in the code generator, this has not
1149 /// happened yet, such that virtual and physical uses are equivalent.
1150 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1151 for (auto *BB : S->getRegion().blocks()) {
1152 for (auto &Inst : *BB) {
1153 auto *Stmt = S->getStmtFor(&Inst);
1154 if (!Stmt)
1155 continue;
1157 if (isIgnoredIntrinsic(&Inst))
1158 continue;
1160 // Branch conditions are encoded in the statement domains.
1161 if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt())
1162 continue;
1164 // Verify all uses.
1165 for (auto &Op : Inst.operands())
1166 verifyUse(S, Op, LI);
1168 // Stores do not produce values used by other statements.
1169 if (isa<StoreInst>(Inst))
1170 continue;
1172 // For every value defined in the block, also check that a use of that
1173 // value in the same statement would not be an inter-statement use. It can
1174 // still be synthesizable or load-hoisted, but these kind of instructions
1175 // are not directly copied in code-generation.
1176 auto VirtDef =
1177 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1178 assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1179 VirtDef.getKind() == VirtualUse::Intra ||
1180 VirtDef.getKind() == VirtualUse::Hoisted);
1184 if (S->hasSingleExitEdge())
1185 return;
1187 // PHINodes in the SCoP region's exit block are also uses to be checked.
1188 if (!S->getRegion().isTopLevelRegion()) {
1189 for (auto &Inst : *S->getRegion().getExit()) {
1190 if (!isa<PHINode>(Inst))
1191 break;
1193 for (auto &Op : Inst.operands())
1194 verifyUse(S, Op, LI);
1198 #endif
1200 /// Return the block that is the representing block for @p RN.
1201 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1202 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1203 : RN->getNodeAs<BasicBlock>();
1206 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1207 OptimizationRemarkEmitter &ORE) {
1208 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1210 buildStmts(R);
1212 // Create all invariant load instructions first. These are categorized as
1213 // 'synthesizable', therefore are not part of any ScopStmt but need to be
1214 // created somewhere.
1215 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1216 for (BasicBlock *BB : scop->getRegion().blocks()) {
1217 if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1218 continue;
1220 for (Instruction &Inst : *BB) {
1221 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1222 if (!Load)
1223 continue;
1225 if (!RIL.count(Load))
1226 continue;
1228 // Invariant loads require a MemoryAccess to be created in some statement.
1229 // It is not important to which statement the MemoryAccess is added
1230 // because it will later be removed from the ScopStmt again. We chose the
1231 // first statement of the basic block the LoadInst is in.
1232 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1233 assert(!List.empty());
1234 ScopStmt *RILStmt = List.front();
1235 buildMemoryAccess(Load, RILStmt);
1238 buildAccessFunctions();
1240 // In case the region does not have an exiting block we will later (during
1241 // code generation) split the exit block. This will move potential PHI nodes
1242 // from the current exit block into the new region exiting block. Hence, PHI
1243 // nodes that are at this point not part of the region will be.
1244 // To handle these PHI nodes later we will now model their operands as scalar
1245 // accesses. Note that we do not model anything in the exit block if we have
1246 // an exiting block in the region, as there will not be any splitting later.
1247 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
1248 for (Instruction &Inst : *R.getExit()) {
1249 PHINode *PHI = dyn_cast<PHINode>(&Inst);
1250 if (!PHI)
1251 break;
1253 buildPHIAccesses(nullptr, PHI, nullptr, true);
1257 // Create memory accesses for global reads since all arrays are now known.
1258 auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1259 for (auto GlobalReadPair : GlobalReads) {
1260 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1261 Instruction *GlobalRead = GlobalReadPair.second;
1262 for (auto *BP : ArrayBasePointers)
1263 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1264 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1267 scop->buildInvariantEquivalenceClasses();
1269 /// A map from basic blocks to their invalid domains.
1270 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1272 if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1273 DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1274 return;
1277 scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1279 // Initialize the invalid domain.
1280 for (ScopStmt &Stmt : scop->Stmts)
1281 if (Stmt.isBlockStmt())
1282 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1283 else
1284 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1285 Stmt.getRegion()->getNode())]);
1287 // Remove empty statements.
1288 // Exit early in case there are no executable statements left in this scop.
1289 scop->removeStmtNotInDomainMap();
1290 scop->simplifySCoP(false);
1291 if (scop->isEmpty()) {
1292 DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1293 return;
1296 // The ScopStmts now have enough information to initialize themselves.
1297 for (ScopStmt &Stmt : *scop) {
1298 buildDomain(Stmt);
1299 collectSurroundingLoops(Stmt);
1300 buildAccessRelations(Stmt);
1302 if (DetectReductions)
1303 checkForReductions(Stmt);
1306 // Check early for a feasible runtime context.
1307 if (!scop->hasFeasibleRuntimeContext()) {
1308 DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1309 return;
1312 // Check early for profitability. Afterwards it cannot change anymore,
1313 // only the runtime context could become infeasible.
1314 if (!scop->isProfitable(UnprofitableScalarAccs)) {
1315 scop->invalidate(PROFITABLE, DebugLoc());
1316 DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1317 return;
1320 scop->buildSchedule(LI);
1322 scop->finalizeAccesses();
1324 scop->realignParams();
1325 scop->addUserContext();
1327 // After the context was fully constructed, thus all our knowledge about
1328 // the parameters is in there, we add all recorded assumptions to the
1329 // assumed/invalid context.
1330 scop->addRecordedAssumptions();
1332 scop->simplifyContexts();
1333 if (!scop->buildAliasChecks(AA)) {
1334 DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1335 return;
1338 scop->hoistInvariantLoads();
1339 scop->canonicalizeDynamicBasePtrs();
1340 scop->verifyInvariantLoads();
1341 scop->simplifySCoP(true);
1343 // Check late for a feasible runtime context because profitability did not
1344 // change.
1345 if (!scop->hasFeasibleRuntimeContext()) {
1346 DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1347 return;
1350 #ifndef NDEBUG
1351 verifyUses(scop.get(), LI, DT);
1352 #endif
1355 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1356 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1357 ScopDetection &SD, ScalarEvolution &SE,
1358 OptimizationRemarkEmitter &ORE)
1359 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1360 DebugLoc Beg, End;
1361 auto P = getBBPairForRegion(R);
1362 getDebugLocations(P, Beg, End);
1364 std::string Msg = "SCoP begins here.";
1365 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1366 << Msg);
1368 buildScop(*R, AC, ORE);
1370 DEBUG(dbgs() << *scop);
1372 if (!scop->hasFeasibleRuntimeContext()) {
1373 InfeasibleScops++;
1374 Msg = "SCoP ends here but was dismissed.";
1375 DEBUG(dbgs() << "SCoP detected but dismissed\n");
1376 scop.reset();
1377 } else {
1378 Msg = "SCoP ends here.";
1379 ++ScopFound;
1380 if (scop->getMaxLoopDepth() > 0)
1381 ++RichScopFound;
1384 if (R->isTopLevelRegion())
1385 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1386 << Msg);
1387 else
1388 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1389 << Msg);