[ScopBuilder] Split statements on encountering store instructions.
[polly-mirror.git] / lib / Analysis / ScopBuilder.cpp
blob3c7f67be0ece4be11dc82debfd996255d178e874
1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the SCoP
13 // detection derived from their LLVM-IR code.
15 //===----------------------------------------------------------------------===//
17 #include "polly/ScopBuilder.h"
18 #include "polly/Options.h"
19 #include "polly/ScopDetection.h"
20 #include "polly/ScopDetectionDiagnostic.h"
21 #include "polly/ScopInfo.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/EquivalenceClasses.h"
29 #include "llvm/ADT/SetVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
34 #include "llvm/Analysis/RegionInfo.h"
35 #include "llvm/Analysis/RegionIterator.h"
36 #include "llvm/Analysis/ScalarEvolution.h"
37 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/DiagnosticInfo.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Use.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/CommandLine.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/ErrorHandling.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <cassert>
61 #include <string>
62 #include <tuple>
63 #include <vector>
65 using namespace llvm;
66 using namespace polly;
68 #define DEBUG_TYPE "polly-scops"
70 STATISTIC(ScopFound, "Number of valid Scops");
71 STATISTIC(RichScopFound, "Number of Scops containing a loop");
72 STATISTIC(InfeasibleScops,
73 "Number of SCoPs with statically infeasible context.");
75 bool polly::ModelReadOnlyScalars;
77 static cl::opt<bool, true> XModelReadOnlyScalars(
78 "polly-analyze-read-only-scalars",
79 cl::desc("Model read-only scalar values in the scop description"),
80 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
81 cl::init(true), cl::cat(PollyCategory));
83 static cl::opt<bool> UnprofitableScalarAccs(
84 "polly-unprofitable-scalar-accs",
85 cl::desc("Count statements with scalar accesses as not optimizable"),
86 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
88 static cl::opt<bool> DetectFortranArrays(
89 "polly-detect-fortran-arrays",
90 cl::desc("Detect Fortran arrays and use this for code generation"),
91 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
93 static cl::opt<bool> DetectReductions("polly-detect-reductions",
94 cl::desc("Detect and exploit reductions"),
95 cl::Hidden, cl::ZeroOrMore,
96 cl::init(true), cl::cat(PollyCategory));
98 // Multiplicative reductions can be disabled separately as these kind of
99 // operations can overflow easily. Additive reductions and bit operations
100 // are in contrast pretty stable.
101 static cl::opt<bool> DisableMultiplicativeReductions(
102 "polly-disable-multiplicative-reductions",
103 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
104 cl::init(false), cl::cat(PollyCategory));
106 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
108 static cl::opt<GranularityChoice> StmtGranularity(
109 "polly-stmt-granularity",
110 cl::desc(
111 "Algorithm to use for splitting basic blocks into multiple statements"),
112 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
113 "One statement per basic block"),
114 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
115 "Scalar independence heuristic"),
116 clEnumValN(GranularityChoice::Stores, "store",
117 "Store-level granularity")),
118 cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory));
120 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
121 Region *NonAffineSubRegion,
122 bool IsExitBlock) {
123 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
124 // true, are not modeled as ordinary PHI nodes as they are not part of the
125 // region. However, we model the operands in the predecessor blocks that are
126 // part of the region as regular scalar accesses.
128 // If we can synthesize a PHI we can skip it, however only if it is in
129 // the region. If it is not it can only be in the exit block of the region.
130 // In this case we model the operands but not the PHI itself.
131 auto *Scope = LI.getLoopFor(PHI->getParent());
132 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
133 return;
135 // PHI nodes are modeled as if they had been demoted prior to the SCoP
136 // detection. Hence, the PHI is a load of a new memory location in which the
137 // incoming value was written at the end of the incoming basic block.
138 bool OnlyNonAffineSubRegionOperands = true;
139 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
140 Value *Op = PHI->getIncomingValue(u);
141 BasicBlock *OpBB = PHI->getIncomingBlock(u);
142 ScopStmt *OpStmt = scop->getLastStmtFor(OpBB);
144 // Do not build PHI dependences inside a non-affine subregion, but make
145 // sure that the necessary scalar values are still made available.
146 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
147 auto *OpInst = dyn_cast<Instruction>(Op);
148 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
149 ensureValueRead(Op, OpStmt);
150 continue;
153 OnlyNonAffineSubRegionOperands = false;
154 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
157 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
158 addPHIReadAccess(PHIStmt, PHI);
162 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
163 Instruction *Inst) {
164 assert(!isa<PHINode>(Inst));
166 // Pull-in required operands.
167 for (Use &Op : Inst->operands())
168 ensureValueRead(Op.get(), UserStmt);
171 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
172 // Check for uses of this instruction outside the scop. Because we do not
173 // iterate over such instructions and therefore did not "ensure" the existence
174 // of a write, we must determine such use here.
175 if (scop->isEscaping(Inst))
176 ensureValueWrite(Inst);
179 /// Check that a value is a Fortran Array descriptor.
181 /// We check if V has the following structure:
182 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
183 /// [<num> x %struct.descriptor_dimension] }
186 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
188 /// 1. V's type name starts with "struct.array"
189 /// 2. V's type has layout as shown.
190 /// 3. Final member of V's type has name "struct.descriptor_dimension",
191 /// 4. "struct.descriptor_dimension" has layout as shown.
192 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
194 /// We are interested in such types since this is the code that dragonegg
195 /// generates for Fortran array descriptors.
197 /// @param V the Value to be checked.
199 /// @returns True if V is a Fortran array descriptor, False otherwise.
200 bool isFortranArrayDescriptor(Value *V) {
201 PointerType *PTy = dyn_cast<PointerType>(V->getType());
203 if (!PTy)
204 return false;
206 Type *Ty = PTy->getElementType();
207 assert(Ty && "Ty expected to be initialized");
208 auto *StructArrTy = dyn_cast<StructType>(Ty);
210 if (!(StructArrTy && StructArrTy->hasName()))
211 return false;
213 if (!StructArrTy->getName().startswith("struct.array"))
214 return false;
216 if (StructArrTy->getNumElements() != 4)
217 return false;
219 const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
221 // i8* match
222 if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
223 return false;
225 // Get a reference to the int type and check that all the members
226 // share the same int type
227 Type *IntTy = ArrMemberTys[1];
228 if (ArrMemberTys[2] != IntTy)
229 return false;
231 // type: [<num> x %struct.descriptor_dimension]
232 ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
233 if (!DescriptorDimArrayTy)
234 return false;
236 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
237 StructType *DescriptorDimTy =
238 dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
240 if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
241 return false;
243 if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
244 return false;
246 if (DescriptorDimTy->getNumElements() != 3)
247 return false;
249 for (auto MemberTy : DescriptorDimTy->elements()) {
250 if (MemberTy != IntTy)
251 return false;
254 return true;
257 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
258 // match: 4.1 & 4.2 store/load
259 if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
260 return nullptr;
262 // match: 4
263 if (Inst.getAlignment() != 8)
264 return nullptr;
266 Value *Address = Inst.getPointerOperand();
268 const BitCastInst *Bitcast = nullptr;
269 // [match: 3]
270 if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
271 Value *TypedMem = Slot->getPointerOperand();
272 // match: 2
273 Bitcast = dyn_cast<BitCastInst>(TypedMem);
274 } else {
275 // match: 2
276 Bitcast = dyn_cast<BitCastInst>(Address);
279 if (!Bitcast)
280 return nullptr;
282 auto *MallocMem = Bitcast->getOperand(0);
284 // match: 1
285 auto *MallocCall = dyn_cast<CallInst>(MallocMem);
286 if (!MallocCall)
287 return nullptr;
289 Function *MallocFn = MallocCall->getCalledFunction();
290 if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
291 return nullptr;
293 // Find all uses the malloc'd memory.
294 // We are looking for a "store" into a struct with the type being the Fortran
295 // descriptor type
296 for (auto user : MallocMem->users()) {
297 /// match: 5
298 auto *MallocStore = dyn_cast<StoreInst>(user);
299 if (!MallocStore)
300 continue;
302 auto *DescriptorGEP =
303 dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
304 if (!DescriptorGEP)
305 continue;
307 // match: 5
308 auto DescriptorType =
309 dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
310 if (!(DescriptorType && DescriptorType->hasName()))
311 continue;
313 Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
315 if (!Descriptor)
316 continue;
318 if (!isFortranArrayDescriptor(Descriptor))
319 continue;
321 return Descriptor;
324 return nullptr;
327 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
328 // match: 3
329 if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
330 return nullptr;
332 Value *Slot = Inst.getPointerOperand();
334 LoadInst *MemLoad = nullptr;
335 // [match: 2]
336 if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
337 // match: 1
338 MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
339 } else {
340 // match: 1
341 MemLoad = dyn_cast<LoadInst>(Slot);
344 if (!MemLoad)
345 return nullptr;
347 auto *BitcastOperator =
348 dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
349 if (!BitcastOperator)
350 return nullptr;
352 Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
353 if (!Descriptor)
354 return nullptr;
356 if (!isFortranArrayDescriptor(Descriptor))
357 return nullptr;
359 return Descriptor;
362 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
363 Value *Val = Inst.getValueOperand();
364 Type *ElementType = Val->getType();
365 Value *Address = Inst.getPointerOperand();
366 const SCEV *AccessFunction =
367 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
368 const SCEVUnknown *BasePointer =
369 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
370 enum MemoryAccess::AccessType AccType =
371 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
373 if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
374 auto *Src = BitCast->getOperand(0);
375 auto *SrcTy = Src->getType();
376 auto *DstTy = BitCast->getType();
377 // Do not try to delinearize non-sized (opaque) pointers.
378 if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
379 (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
380 return false;
382 if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
383 DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
384 DL.getTypeAllocSize(DstTy->getPointerElementType()))
385 Address = Src;
388 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
389 if (!GEP)
390 return false;
392 std::vector<const SCEV *> Subscripts;
393 std::vector<int> Sizes;
394 std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
395 auto *BasePtr = GEP->getOperand(0);
397 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
398 BasePtr = BasePtrCast->getOperand(0);
400 // Check for identical base pointers to ensure that we do not miss index
401 // offsets that have been added before this GEP is applied.
402 if (BasePtr != BasePointer->getValue())
403 return false;
405 std::vector<const SCEV *> SizesSCEV;
407 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
409 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
410 for (auto *Subscript : Subscripts) {
411 InvariantLoadsSetTy AccessILS;
412 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
413 &AccessILS))
414 return false;
416 for (LoadInst *LInst : AccessILS)
417 if (!ScopRIL.count(LInst))
418 return false;
421 if (Sizes.empty())
422 return false;
424 SizesSCEV.push_back(nullptr);
426 for (auto V : Sizes)
427 SizesSCEV.push_back(SE.getSCEV(
428 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
430 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
431 true, Subscripts, SizesSCEV, Val);
432 return true;
435 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
436 if (!PollyDelinearize)
437 return false;
439 Value *Address = Inst.getPointerOperand();
440 Value *Val = Inst.getValueOperand();
441 Type *ElementType = Val->getType();
442 unsigned ElementSize = DL.getTypeAllocSize(ElementType);
443 enum MemoryAccess::AccessType AccType =
444 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
446 const SCEV *AccessFunction =
447 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
448 const SCEVUnknown *BasePointer =
449 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
451 assert(BasePointer && "Could not find base pointer");
453 auto &InsnToMemAcc = scop->getInsnToMemAccMap();
454 auto AccItr = InsnToMemAcc.find(Inst);
455 if (AccItr == InsnToMemAcc.end())
456 return false;
458 std::vector<const SCEV *> Sizes = {nullptr};
460 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
461 AccItr->second.Shape->DelinearizedSizes.end());
463 // In case only the element size is contained in the 'Sizes' array, the
464 // access does not access a real multi-dimensional array. Hence, we allow
465 // the normal single-dimensional access construction to handle this.
466 if (Sizes.size() == 1)
467 return false;
469 // Remove the element size. This information is already provided by the
470 // ElementSize parameter. In case the element size of this access and the
471 // element size used for delinearization differs the delinearization is
472 // incorrect. Hence, we invalidate the scop.
474 // TODO: Handle delinearization with differing element sizes.
475 auto DelinearizedSize =
476 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
477 Sizes.pop_back();
478 if (ElementSize != DelinearizedSize)
479 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
481 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
482 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
483 return true;
486 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
487 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
489 if (MemIntr == nullptr)
490 return false;
492 auto *L = LI.getLoopFor(Inst->getParent());
493 auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
494 assert(LengthVal);
496 // Check if the length val is actually affine or if we overapproximate it
497 InvariantLoadsSetTy AccessILS;
498 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
500 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
501 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
502 LengthVal, SE, &AccessILS);
503 for (LoadInst *LInst : AccessILS)
504 if (!ScopRIL.count(LInst))
505 LengthIsAffine = false;
506 if (!LengthIsAffine)
507 LengthVal = nullptr;
509 auto *DestPtrVal = MemIntr->getDest();
510 assert(DestPtrVal);
512 auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
513 assert(DestAccFunc);
514 // Ignore accesses to "NULL".
515 // TODO: We could use this to optimize the region further, e.g., intersect
516 // the context with
517 // isl_set_complement(isl_set_params(getDomain()))
518 // as we know it would be undefined to execute this instruction anyway.
519 if (DestAccFunc->isZero())
520 return true;
522 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
523 assert(DestPtrSCEV);
524 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
525 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
526 IntegerType::getInt8Ty(DestPtrVal->getContext()),
527 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
528 Inst.getValueOperand());
530 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
531 if (!MemTrans)
532 return true;
534 auto *SrcPtrVal = MemTrans->getSource();
535 assert(SrcPtrVal);
537 auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
538 assert(SrcAccFunc);
539 // Ignore accesses to "NULL".
540 // TODO: See above TODO
541 if (SrcAccFunc->isZero())
542 return true;
544 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
545 assert(SrcPtrSCEV);
546 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
547 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
548 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
549 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
550 Inst.getValueOperand());
552 return true;
555 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
556 auto *CI = dyn_cast_or_null<CallInst>(Inst);
558 if (CI == nullptr)
559 return false;
561 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI))
562 return true;
564 bool ReadOnly = false;
565 auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
566 auto *CalledFunction = CI->getCalledFunction();
567 switch (AA.getModRefBehavior(CalledFunction)) {
568 case FMRB_UnknownModRefBehavior:
569 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
570 case FMRB_DoesNotAccessMemory:
571 return true;
572 case FMRB_DoesNotReadMemory:
573 case FMRB_OnlyAccessesInaccessibleMem:
574 case FMRB_OnlyAccessesInaccessibleOrArgMem:
575 return false;
576 case FMRB_OnlyReadsMemory:
577 GlobalReads.emplace_back(Stmt, CI);
578 return true;
579 case FMRB_OnlyReadsArgumentPointees:
580 ReadOnly = true;
581 // Fall through
582 case FMRB_OnlyAccessesArgumentPointees: {
583 auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
584 Loop *L = LI.getLoopFor(Inst->getParent());
585 for (const auto &Arg : CI->arg_operands()) {
586 if (!Arg->getType()->isPointerTy())
587 continue;
589 auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
590 if (ArgSCEV->isZero())
591 continue;
593 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
594 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
595 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
597 return true;
601 return true;
604 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
605 Value *Address = Inst.getPointerOperand();
606 Value *Val = Inst.getValueOperand();
607 Type *ElementType = Val->getType();
608 enum MemoryAccess::AccessType AccType =
609 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
611 const SCEV *AccessFunction =
612 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
613 const SCEVUnknown *BasePointer =
614 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
616 assert(BasePointer && "Could not find base pointer");
617 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
619 // Check if the access depends on a loop contained in a non-affine subregion.
620 bool isVariantInNonAffineLoop = false;
621 SetVector<const Loop *> Loops;
622 findLoops(AccessFunction, Loops);
623 for (const Loop *L : Loops)
624 if (Stmt->contains(L)) {
625 isVariantInNonAffineLoop = true;
626 break;
629 InvariantLoadsSetTy AccessILS;
631 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
632 bool IsAffine = !isVariantInNonAffineLoop &&
633 isAffineExpr(&scop->getRegion(), SurroundingLoop,
634 AccessFunction, SE, &AccessILS);
636 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
637 for (LoadInst *LInst : AccessILS)
638 if (!ScopRIL.count(LInst))
639 IsAffine = false;
641 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
642 AccType = MemoryAccess::MAY_WRITE;
644 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
645 IsAffine, {AccessFunction}, {nullptr}, Val);
648 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
649 if (buildAccessMemIntrinsic(Inst, Stmt))
650 return;
652 if (buildAccessCallInst(Inst, Stmt))
653 return;
655 if (buildAccessMultiDimFixed(Inst, Stmt))
656 return;
658 if (buildAccessMultiDimParam(Inst, Stmt))
659 return;
661 buildAccessSingleDim(Inst, Stmt);
664 void ScopBuilder::buildAccessFunctions() {
665 for (auto &Stmt : *scop) {
666 if (Stmt.isBlockStmt()) {
667 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
668 continue;
671 Region *R = Stmt.getRegion();
672 for (BasicBlock *BB : R->blocks())
673 buildAccessFunctions(&Stmt, *BB, R);
676 // Build write accesses for values that are used after the SCoP.
677 // The instructions defining them might be synthesizable and therefore not
678 // contained in any statement, hence we iterate over the original instructions
679 // to identify all escaping values.
680 for (BasicBlock *BB : scop->getRegion().blocks()) {
681 for (Instruction &Inst : *BB)
682 buildEscapingDependences(&Inst);
686 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
687 return !isa<TerminatorInst>(Inst) && !isIgnoredIntrinsic(Inst) &&
688 !canSynthesize(Inst, *scop, &SE, L);
691 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
692 Loop *SurroundingLoop = LI.getLoopFor(BB);
694 int Count = 0;
695 std::vector<Instruction *> Instructions;
696 for (Instruction &Inst : *BB) {
697 if (shouldModelInst(&Inst, SurroundingLoop))
698 Instructions.push_back(&Inst);
699 if (Inst.getMetadata("polly_split_after") ||
700 (SplitOnStore && isa<StoreInst>(Inst))) {
701 scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
702 Count++;
703 Instructions.clear();
707 scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
710 /// Is @p Inst an ordered instruction?
712 /// An unordered instruction is an instruction, such that a sequence of
713 /// unordered instructions can be permuted without changing semantics. Any
714 /// instruction for which this is not always the case is ordered.
715 static bool isOrderedInstruction(Instruction *Inst) {
716 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
719 /// Join instructions to the same statement if one uses the scalar result of the
720 /// other.
721 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
722 ArrayRef<Instruction *> ModeledInsts) {
723 for (Instruction *Inst : ModeledInsts) {
724 if (isa<PHINode>(Inst))
725 continue;
727 for (Use &Op : Inst->operands()) {
728 Instruction *OpInst = dyn_cast<Instruction>(Op.get());
729 if (!OpInst)
730 continue;
732 // Check if OpInst is in the BB and is a modeled instruction.
733 auto OpVal = UnionFind.findValue(OpInst);
734 if (OpVal == UnionFind.end())
735 continue;
737 UnionFind.unionSets(Inst, OpInst);
742 /// Join instructions that are used as incoming value in successor PHIs into the
743 /// epilogue.
744 static void
745 joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
746 ArrayRef<Instruction *> ModeledInsts,
747 BasicBlock *BB) {
748 for (BasicBlock *Succ : successors(BB)) {
749 for (Instruction &SuccInst : *Succ) {
750 PHINode *SuccPHI = dyn_cast<PHINode>(&SuccInst);
751 if (!SuccPHI)
752 break;
754 Value *IncomingVal = SuccPHI->getIncomingValueForBlock(BB);
755 Instruction *IncomingInst = dyn_cast<Instruction>(IncomingVal);
756 if (!IncomingInst)
757 continue;
758 if (IncomingInst->getParent() != BB)
759 continue;
760 if (UnionFind.findValue(IncomingInst) == UnionFind.end())
761 continue;
763 UnionFind.unionSets(nullptr, IncomingInst);
768 /// Ensure that the order of ordered instructions does not change.
770 /// If we encounter an ordered instruction enclosed in instructions belonging to
771 /// a different statement (which might as well contain ordered instructions, but
772 /// this is not tested here), join them.
773 static void
774 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
775 ArrayRef<Instruction *> ModeledInsts) {
776 SetVector<Instruction *> SeenLeaders;
777 for (Instruction *Inst : ModeledInsts) {
778 if (!isOrderedInstruction(Inst))
779 continue;
781 Instruction *Leader = UnionFind.getLeaderValue(Inst);
782 bool Inserted = SeenLeaders.insert(Leader);
783 if (Inserted)
784 continue;
786 // Merge statements to close holes. Say, we have already seen statements A
787 // and B, in this order. Then we see an instruction of A again and we would
788 // see the pattern "A B A". This function joins all statements until the
789 // only seen occurrence of A.
790 for (Instruction *Prev : reverse(SeenLeaders)) {
791 // Items added to 'SeenLeaders' are leaders, but may have lost their
792 // leadership status when merged into another statement.
793 Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
794 if (PrevLeader == Leader)
795 break;
796 UnionFind.unionSets(Prev, Leader);
801 /// Also ensure that the epilogue is the last statement relative to all ordered
802 /// instructions.
804 /// This is basically joinOrderedInstructions() but using the epilogue as
805 /// 'ordered instruction'.
806 static void joinAllAfterEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
807 ArrayRef<Instruction *> ModeledInsts) {
808 bool EpilogueSeen = false;
809 for (Instruction *Inst : ModeledInsts) {
810 auto PHIWritesLeader = UnionFind.findLeader(nullptr);
811 auto InstLeader = UnionFind.findLeader(Inst);
813 if (PHIWritesLeader == InstLeader)
814 EpilogueSeen = true;
816 if (!isOrderedInstruction(Inst))
817 continue;
819 if (EpilogueSeen)
820 UnionFind.unionSets(PHIWritesLeader, InstLeader);
824 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
825 Loop *L = LI.getLoopFor(BB);
827 // Extracting out modeled instructions saves us from checking
828 // shouldModelInst() repeatedly.
829 SmallVector<Instruction *, 32> ModeledInsts;
830 EquivalenceClasses<Instruction *> UnionFind;
831 for (Instruction &Inst : *BB) {
832 if (!shouldModelInst(&Inst, L))
833 continue;
834 ModeledInsts.push_back(&Inst);
835 UnionFind.insert(&Inst);
838 // 'nullptr' represents the last statement for a basic block. It contains no
839 // instructions, but holds the PHI write accesses for successor basic blocks.
840 // If a PHI has an incoming value defined in this BB, it can also be merged
841 // with other statements.
842 // TODO: We wouldn't need this if we would add PHIWrites into the statement
843 // that defines the incoming value (if in the BB) instead of always the last,
844 // so we could unconditionally always add a last statement.
845 UnionFind.insert(nullptr);
847 joinOperandTree(UnionFind, ModeledInsts);
848 joinIncomingPHIValuesIntoEpilogue(UnionFind, ModeledInsts, BB);
849 joinOrderedInstructions(UnionFind, ModeledInsts);
850 joinAllAfterEpilogue(UnionFind, ModeledInsts);
852 // The list of instructions for statement (statement represented by the leader
853 // instruction). The order of statements instructions is reversed such that
854 // the epilogue is first. This makes it easier to ensure that the epilogue is
855 // the last statement.
856 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
858 // Ensure that the epilogue is last.
859 LeaderToInstList[nullptr];
861 // Collect the instructions of all leaders. UnionFind's member iterator
862 // unfortunately are not in any specific order.
863 for (Instruction &Inst : reverse(*BB)) {
864 auto LeaderIt = UnionFind.findLeader(&Inst);
865 if (LeaderIt == UnionFind.member_end())
866 continue;
868 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
869 InstList.push_back(&Inst);
872 // Finally build the statements.
873 int Count = 0;
874 for (auto &Instructions : reverse(LeaderToInstList)) {
875 std::vector<Instruction *> &InstList = Instructions.second;
876 std::reverse(InstList.begin(), InstList.end());
877 scop->addScopStmt(BB, L, std::move(InstList), Count);
878 Count += 1;
882 void ScopBuilder::buildStmts(Region &SR) {
883 if (scop->isNonAffineSubRegion(&SR)) {
884 std::vector<Instruction *> Instructions;
885 Loop *SurroundingLoop =
886 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
887 for (Instruction &Inst : *SR.getEntry())
888 if (shouldModelInst(&Inst, SurroundingLoop))
889 Instructions.push_back(&Inst);
890 scop->addScopStmt(&SR, SurroundingLoop, Instructions);
891 return;
894 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
895 if (I->isSubRegion())
896 buildStmts(*I->getNodeAs<Region>());
897 else {
898 BasicBlock *BB = I->getNodeAs<BasicBlock>();
899 switch (StmtGranularity) {
900 case GranularityChoice::BasicBlocks:
901 buildSequentialBlockStmts(BB);
902 break;
903 case GranularityChoice::ScalarIndependence:
904 buildEqivClassBlockStmts(BB);
905 break;
906 case GranularityChoice::Stores:
907 buildSequentialBlockStmts(BB, true);
908 break;
913 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
914 Region *NonAffineSubRegion) {
915 assert(
916 Stmt &&
917 "The exit BB is the only one that cannot be represented by a statement");
918 assert(Stmt->represents(&BB));
920 // We do not build access functions for error blocks, as they may contain
921 // instructions we can not model.
922 if (isErrorBlock(BB, scop->getRegion(), LI, DT))
923 return;
925 auto BuildAccessesForInst = [this, Stmt,
926 NonAffineSubRegion](Instruction *Inst) {
927 PHINode *PHI = dyn_cast<PHINode>(Inst);
928 if (PHI)
929 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
931 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
932 assert(Stmt && "Cannot build access function in non-existing statement");
933 buildMemoryAccess(MemInst, Stmt);
936 // PHI nodes have already been modeled above and TerminatorInsts that are
937 // not part of a non-affine subregion are fully modeled and regenerated
938 // from the polyhedral domains. Hence, they do not need to be modeled as
939 // explicit data dependences.
940 if (!PHI)
941 buildScalarDependences(Stmt, Inst);
944 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
945 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
946 if (IsEntryBlock) {
947 for (Instruction *Inst : Stmt->getInstructions())
948 BuildAccessesForInst(Inst);
949 if (Stmt->isRegionStmt())
950 BuildAccessesForInst(BB.getTerminator());
951 } else {
952 for (Instruction &Inst : BB) {
953 if (isIgnoredIntrinsic(&Inst))
954 continue;
956 // Invariant loads already have been processed.
957 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
958 continue;
960 BuildAccessesForInst(&Inst);
965 MemoryAccess *ScopBuilder::addMemoryAccess(
966 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
967 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
968 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
969 MemoryKind Kind) {
970 bool isKnownMustAccess = false;
972 // Accesses in single-basic block statements are always executed.
973 if (Stmt->isBlockStmt())
974 isKnownMustAccess = true;
976 if (Stmt->isRegionStmt()) {
977 // Accesses that dominate the exit block of a non-affine region are always
978 // executed. In non-affine regions there may exist MemoryKind::Values that
979 // do not dominate the exit. MemoryKind::Values will always dominate the
980 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
981 // non-affine region.
982 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
983 isKnownMustAccess = true;
986 // Non-affine PHI writes do not "happen" at a particular instruction, but
987 // after exiting the statement. Therefore they are guaranteed to execute and
988 // overwrite the old value.
989 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
990 isKnownMustAccess = true;
992 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
993 AccType = MemoryAccess::MAY_WRITE;
995 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
996 Affine, Subscripts, Sizes, AccessValue, Kind);
998 scop->addAccessFunction(Access);
999 Stmt->addAccess(Access);
1000 return Access;
1003 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1004 MemoryAccess::AccessType AccType,
1005 Value *BaseAddress, Type *ElementType,
1006 bool IsAffine,
1007 ArrayRef<const SCEV *> Subscripts,
1008 ArrayRef<const SCEV *> Sizes,
1009 Value *AccessValue) {
1010 ArrayBasePointers.insert(BaseAddress);
1011 auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1012 ElementType, IsAffine, AccessValue,
1013 Subscripts, Sizes, MemoryKind::Array);
1015 if (!DetectFortranArrays)
1016 return;
1018 if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1019 MemAccess->setFortranArrayDescriptor(FAD);
1020 else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1021 MemAccess->setFortranArrayDescriptor(FAD);
1024 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1025 // Find the statement that defines the value of Inst. That statement has to
1026 // write the value to make it available to those statements that read it.
1027 ScopStmt *Stmt = scop->getStmtFor(Inst);
1029 // It is possible that the value is synthesizable within a loop (such that it
1030 // is not part of any statement), but not after the loop (where you need the
1031 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1032 // avoid this. In case the IR has no such PHI, use the last statement (where
1033 // the value is synthesizable) to write the value.
1034 if (!Stmt)
1035 Stmt = scop->getLastStmtFor(Inst->getParent());
1037 // Inst not defined within this SCoP.
1038 if (!Stmt)
1039 return;
1041 // Do not process further if the instruction is already written.
1042 if (Stmt->lookupValueWriteOf(Inst))
1043 return;
1045 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1046 true, Inst, ArrayRef<const SCEV *>(),
1047 ArrayRef<const SCEV *>(), MemoryKind::Value);
1050 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1051 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1052 // to be able to replace this one. Currently, there is a split responsibility.
1053 // In a first step, the MemoryAccess is created, but without the
1054 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1055 // AccessRelation is created. At least for scalar accesses, there is no new
1056 // information available at ScopStmt::buildAccessRelations(), so we could
1057 // create the AccessRelation right away. This is what
1058 // ScopStmt::ensureValueRead(Value*) does.
1060 auto *Scope = UserStmt->getSurroundingLoop();
1061 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1062 switch (VUse.getKind()) {
1063 case VirtualUse::Constant:
1064 case VirtualUse::Block:
1065 case VirtualUse::Synthesizable:
1066 case VirtualUse::Hoisted:
1067 case VirtualUse::Intra:
1068 // Uses of these kinds do not need a MemoryAccess.
1069 break;
1071 case VirtualUse::ReadOnly:
1072 // Add MemoryAccess for invariant values only if requested.
1073 if (!ModelReadOnlyScalars)
1074 break;
1076 LLVM_FALLTHROUGH;
1077 case VirtualUse::Inter:
1079 // Do not create another MemoryAccess for reloading the value if one already
1080 // exists.
1081 if (UserStmt->lookupValueReadOf(V))
1082 break;
1084 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1085 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1086 MemoryKind::Value);
1088 // Inter-statement uses need to write the value in their defining statement.
1089 if (VUse.isInter())
1090 ensureValueWrite(cast<Instruction>(V));
1091 break;
1095 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1096 BasicBlock *IncomingBlock,
1097 Value *IncomingValue, bool IsExitBlock) {
1098 // As the incoming block might turn out to be an error statement ensure we
1099 // will create an exit PHI SAI object. It is needed during code generation
1100 // and would be created later anyway.
1101 if (IsExitBlock)
1102 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1103 MemoryKind::ExitPHI);
1105 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1106 // from outside the SCoP's region have no statement representation.
1107 if (!IncomingStmt)
1108 return;
1110 // Take care for the incoming value being available in the incoming block.
1111 // This must be done before the check for multiple PHI writes because multiple
1112 // exiting edges from subregion each can be the effective written value of the
1113 // subregion. As such, all of them must be made available in the subregion
1114 // statement.
1115 ensureValueRead(IncomingValue, IncomingStmt);
1117 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1118 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1119 assert(Acc->getAccessInstruction() == PHI);
1120 Acc->addIncoming(IncomingBlock, IncomingValue);
1121 return;
1124 MemoryAccess *Acc = addMemoryAccess(
1125 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1126 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1127 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
1128 assert(Acc);
1129 Acc->addIncoming(IncomingBlock, IncomingValue);
1132 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1133 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1134 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1135 MemoryKind::PHI);
1138 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1139 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1141 Stmt.Domain = scop->getDomainConditions(&Stmt);
1142 Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1145 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1146 isl::set Domain = Stmt.getDomain();
1147 for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
1148 isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
1149 Stmt.NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
1153 /// Return the reduction type for a given binary operator.
1154 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1155 const Instruction *Load) {
1156 if (!BinOp)
1157 return MemoryAccess::RT_NONE;
1158 switch (BinOp->getOpcode()) {
1159 case Instruction::FAdd:
1160 if (!BinOp->isFast())
1161 return MemoryAccess::RT_NONE;
1162 // Fall through
1163 case Instruction::Add:
1164 return MemoryAccess::RT_ADD;
1165 case Instruction::Or:
1166 return MemoryAccess::RT_BOR;
1167 case Instruction::Xor:
1168 return MemoryAccess::RT_BXOR;
1169 case Instruction::And:
1170 return MemoryAccess::RT_BAND;
1171 case Instruction::FMul:
1172 if (!BinOp->isFast())
1173 return MemoryAccess::RT_NONE;
1174 // Fall through
1175 case Instruction::Mul:
1176 if (DisableMultiplicativeReductions)
1177 return MemoryAccess::RT_NONE;
1178 return MemoryAccess::RT_MUL;
1179 default:
1180 return MemoryAccess::RT_NONE;
1184 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1185 SmallVector<MemoryAccess *, 2> Loads;
1186 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1188 // First collect candidate load-store reduction chains by iterating over all
1189 // stores and collecting possible reduction loads.
1190 for (MemoryAccess *StoreMA : Stmt) {
1191 if (StoreMA->isRead())
1192 continue;
1194 Loads.clear();
1195 collectCandidateReductionLoads(StoreMA, Loads);
1196 for (MemoryAccess *LoadMA : Loads)
1197 Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1200 // Then check each possible candidate pair.
1201 for (const auto &CandidatePair : Candidates) {
1202 bool Valid = true;
1203 isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1204 isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1206 // Skip those with obviously unequal base addresses.
1207 if (!LoadAccs.has_equal_space(StoreAccs)) {
1208 continue;
1211 // And check if the remaining for overlap with other memory accesses.
1212 isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1213 AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1214 isl::set AllAccs = AllAccsRel.range();
1216 for (MemoryAccess *MA : Stmt) {
1217 if (MA == CandidatePair.first || MA == CandidatePair.second)
1218 continue;
1220 isl::map AccRel =
1221 MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1222 isl::set Accs = AccRel.range();
1224 if (AllAccs.has_equal_space(Accs)) {
1225 isl::set OverlapAccs = Accs.intersect(AllAccs);
1226 Valid = Valid && OverlapAccs.is_empty();
1230 if (!Valid)
1231 continue;
1233 const LoadInst *Load =
1234 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1235 MemoryAccess::ReductionType RT =
1236 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1238 // If no overlapping access was found we mark the load and store as
1239 // reduction like.
1240 CandidatePair.first->markAsReductionLike(RT);
1241 CandidatePair.second->markAsReductionLike(RT);
1245 void ScopBuilder::collectCandidateReductionLoads(
1246 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1247 ScopStmt *Stmt = StoreMA->getStatement();
1249 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1250 if (!Store)
1251 return;
1253 // Skip if there is not one binary operator between the load and the store
1254 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1255 if (!BinOp)
1256 return;
1258 // Skip if the binary operators has multiple uses
1259 if (BinOp->getNumUses() != 1)
1260 return;
1262 // Skip if the opcode of the binary operator is not commutative/associative
1263 if (!BinOp->isCommutative() || !BinOp->isAssociative())
1264 return;
1266 // Skip if the binary operator is outside the current SCoP
1267 if (BinOp->getParent() != Store->getParent())
1268 return;
1270 // Skip if it is a multiplicative reduction and we disabled them
1271 if (DisableMultiplicativeReductions &&
1272 (BinOp->getOpcode() == Instruction::Mul ||
1273 BinOp->getOpcode() == Instruction::FMul))
1274 return;
1276 // Check the binary operator operands for a candidate load
1277 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1278 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1279 if (!PossibleLoad0 && !PossibleLoad1)
1280 return;
1282 // A load is only a candidate if it cannot escape (thus has only this use)
1283 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1284 if (PossibleLoad0->getParent() == Store->getParent())
1285 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1286 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1287 if (PossibleLoad1->getParent() == Store->getParent())
1288 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1291 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1292 for (MemoryAccess *Access : Stmt.MemAccs) {
1293 Type *ElementType = Access->getElementType();
1295 MemoryKind Ty;
1296 if (Access->isPHIKind())
1297 Ty = MemoryKind::PHI;
1298 else if (Access->isExitPHIKind())
1299 Ty = MemoryKind::ExitPHI;
1300 else if (Access->isValueKind())
1301 Ty = MemoryKind::Value;
1302 else
1303 Ty = MemoryKind::Array;
1305 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1306 ElementType, Access->Sizes, Ty);
1307 Access->buildAccessRelation(SAI);
1308 scop->addAccessData(Access);
1312 #ifndef NDEBUG
1313 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1314 auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1315 auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1316 assert(PhysUse.getKind() == VirtUse.getKind());
1319 /// Check the consistency of every statement's MemoryAccesses.
1321 /// The check is carried out by expecting the "physical" kind of use (derived
1322 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1323 /// kind of use (derived from a statement's MemoryAccess).
1325 /// The "physical" uses are taken by ensureValueRead to determine whether to
1326 /// create MemoryAccesses. When done, the kind of scalar access should be the
1327 /// same no matter which way it was derived.
1329 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1330 /// can intentionally influence on the kind of uses (not corresponding to the
1331 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1332 /// to pick up the virtual uses. But here in the code generator, this has not
1333 /// happened yet, such that virtual and physical uses are equivalent.
1334 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1335 for (auto *BB : S->getRegion().blocks()) {
1336 for (auto &Inst : *BB) {
1337 auto *Stmt = S->getStmtFor(&Inst);
1338 if (!Stmt)
1339 continue;
1341 if (isIgnoredIntrinsic(&Inst))
1342 continue;
1344 // Branch conditions are encoded in the statement domains.
1345 if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt())
1346 continue;
1348 // Verify all uses.
1349 for (auto &Op : Inst.operands())
1350 verifyUse(S, Op, LI);
1352 // Stores do not produce values used by other statements.
1353 if (isa<StoreInst>(Inst))
1354 continue;
1356 // For every value defined in the block, also check that a use of that
1357 // value in the same statement would not be an inter-statement use. It can
1358 // still be synthesizable or load-hoisted, but these kind of instructions
1359 // are not directly copied in code-generation.
1360 auto VirtDef =
1361 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1362 assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1363 VirtDef.getKind() == VirtualUse::Intra ||
1364 VirtDef.getKind() == VirtualUse::Hoisted);
1368 if (S->hasSingleExitEdge())
1369 return;
1371 // PHINodes in the SCoP region's exit block are also uses to be checked.
1372 if (!S->getRegion().isTopLevelRegion()) {
1373 for (auto &Inst : *S->getRegion().getExit()) {
1374 if (!isa<PHINode>(Inst))
1375 break;
1377 for (auto &Op : Inst.operands())
1378 verifyUse(S, Op, LI);
1382 #endif
1384 /// Return the block that is the representing block for @p RN.
1385 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1386 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1387 : RN->getNodeAs<BasicBlock>();
1390 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1391 OptimizationRemarkEmitter &ORE) {
1392 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1394 buildStmts(R);
1396 // Create all invariant load instructions first. These are categorized as
1397 // 'synthesizable', therefore are not part of any ScopStmt but need to be
1398 // created somewhere.
1399 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1400 for (BasicBlock *BB : scop->getRegion().blocks()) {
1401 if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1402 continue;
1404 for (Instruction &Inst : *BB) {
1405 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1406 if (!Load)
1407 continue;
1409 if (!RIL.count(Load))
1410 continue;
1412 // Invariant loads require a MemoryAccess to be created in some statement.
1413 // It is not important to which statement the MemoryAccess is added
1414 // because it will later be removed from the ScopStmt again. We chose the
1415 // first statement of the basic block the LoadInst is in.
1416 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1417 assert(!List.empty());
1418 ScopStmt *RILStmt = List.front();
1419 buildMemoryAccess(Load, RILStmt);
1422 buildAccessFunctions();
1424 // In case the region does not have an exiting block we will later (during
1425 // code generation) split the exit block. This will move potential PHI nodes
1426 // from the current exit block into the new region exiting block. Hence, PHI
1427 // nodes that are at this point not part of the region will be.
1428 // To handle these PHI nodes later we will now model their operands as scalar
1429 // accesses. Note that we do not model anything in the exit block if we have
1430 // an exiting block in the region, as there will not be any splitting later.
1431 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
1432 for (Instruction &Inst : *R.getExit()) {
1433 PHINode *PHI = dyn_cast<PHINode>(&Inst);
1434 if (!PHI)
1435 break;
1437 buildPHIAccesses(nullptr, PHI, nullptr, true);
1441 // Create memory accesses for global reads since all arrays are now known.
1442 auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1443 for (auto GlobalReadPair : GlobalReads) {
1444 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1445 Instruction *GlobalRead = GlobalReadPair.second;
1446 for (auto *BP : ArrayBasePointers)
1447 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1448 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1451 scop->buildInvariantEquivalenceClasses();
1453 /// A map from basic blocks to their invalid domains.
1454 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1456 if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1457 DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1458 return;
1461 scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1463 // Initialize the invalid domain.
1464 for (ScopStmt &Stmt : scop->Stmts)
1465 if (Stmt.isBlockStmt())
1466 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1467 else
1468 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1469 Stmt.getRegion()->getNode())]);
1471 // Remove empty statements.
1472 // Exit early in case there are no executable statements left in this scop.
1473 scop->removeStmtNotInDomainMap();
1474 scop->simplifySCoP(false);
1475 if (scop->isEmpty()) {
1476 DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1477 return;
1480 // The ScopStmts now have enough information to initialize themselves.
1481 for (ScopStmt &Stmt : *scop) {
1482 buildDomain(Stmt);
1483 collectSurroundingLoops(Stmt);
1484 buildAccessRelations(Stmt);
1486 if (DetectReductions)
1487 checkForReductions(Stmt);
1490 // Check early for a feasible runtime context.
1491 if (!scop->hasFeasibleRuntimeContext()) {
1492 DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1493 return;
1496 // Check early for profitability. Afterwards it cannot change anymore,
1497 // only the runtime context could become infeasible.
1498 if (!scop->isProfitable(UnprofitableScalarAccs)) {
1499 scop->invalidate(PROFITABLE, DebugLoc());
1500 DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1501 return;
1504 scop->buildSchedule(LI);
1506 scop->finalizeAccesses();
1508 scop->realignParams();
1509 scop->addUserContext();
1511 // After the context was fully constructed, thus all our knowledge about
1512 // the parameters is in there, we add all recorded assumptions to the
1513 // assumed/invalid context.
1514 scop->addRecordedAssumptions();
1516 scop->simplifyContexts();
1517 if (!scop->buildAliasChecks(AA)) {
1518 DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1519 return;
1522 scop->hoistInvariantLoads();
1523 scop->canonicalizeDynamicBasePtrs();
1524 scop->verifyInvariantLoads();
1525 scop->simplifySCoP(true);
1527 // Check late for a feasible runtime context because profitability did not
1528 // change.
1529 if (!scop->hasFeasibleRuntimeContext()) {
1530 DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1531 return;
1534 #ifndef NDEBUG
1535 verifyUses(scop.get(), LI, DT);
1536 #endif
1539 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1540 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1541 ScopDetection &SD, ScalarEvolution &SE,
1542 OptimizationRemarkEmitter &ORE)
1543 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1544 DebugLoc Beg, End;
1545 auto P = getBBPairForRegion(R);
1546 getDebugLocations(P, Beg, End);
1548 std::string Msg = "SCoP begins here.";
1549 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1550 << Msg);
1552 buildScop(*R, AC, ORE);
1554 DEBUG(dbgs() << *scop);
1556 if (!scop->hasFeasibleRuntimeContext()) {
1557 InfeasibleScops++;
1558 Msg = "SCoP ends here but was dismissed.";
1559 DEBUG(dbgs() << "SCoP detected but dismissed\n");
1560 scop.reset();
1561 } else {
1562 Msg = "SCoP ends here.";
1563 ++ScopFound;
1564 if (scop->getMaxLoopDepth() > 0)
1565 ++RichScopFound;
1568 if (R->isTopLevelRegion())
1569 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1570 << Msg);
1571 else
1572 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1573 << Msg);