lib/Analysis/ScopBuilder.cpp

   1 //===- ScopBuilder.cpp ----------------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Create a polyhedral description for a static control flow region.
  11 //
  12 // The pass creates a polyhedral description of the Scops detected by the SCoP
  13 // detection derived from their LLVM-IR code.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #include "polly/ScopBuilder.h"
  18 #include "polly/Options.h"
  19 #include "polly/ScopDetection.h"
  20 #include "polly/ScopDetectionDiagnostic.h"
  21 #include "polly/ScopInfo.h"
  22 #include "polly/Support/GICHelper.h"
  23 #include "polly/Support/SCEVValidator.h"
  24 #include "polly/Support/ScopHelper.h"
  25 #include "polly/Support/VirtualInstruction.h"
  26 #include "llvm/ADT/APInt.h"
  27 #include "llvm/ADT/ArrayRef.h"
  28 #include "llvm/ADT/DenseMap.h"
  29 #include "llvm/ADT/EquivalenceClasses.h"
  30 #include "llvm/ADT/SetVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/LoopInfo.h"
  34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  35 #include "llvm/Analysis/RegionInfo.h"
  36 #include "llvm/Analysis/RegionIterator.h"
  37 #include "llvm/Analysis/ScalarEvolution.h"
  38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  39 #include "llvm/IR/BasicBlock.h"
  40 #include "llvm/IR/Constants.h"
  41 #include "llvm/IR/DataLayout.h"
  42 #include "llvm/IR/DebugLoc.h"
  43 #include "llvm/IR/DerivedTypes.h"
  44 #include "llvm/IR/DiagnosticInfo.h"
  45 #include "llvm/IR/Dominators.h"
  46 #include "llvm/IR/Function.h"
  47 #include "llvm/IR/InstrTypes.h"
  48 #include "llvm/IR/Instruction.h"
  49 #include "llvm/IR/Instructions.h"
  50 #include "llvm/IR/IntrinsicInst.h"
  51 #include "llvm/IR/Operator.h"
  52 #include "llvm/IR/Type.h"
  53 #include "llvm/IR/Use.h"
  54 #include "llvm/IR/Value.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CommandLine.h"
  57 #include "llvm/Support/Compiler.h"
  58 #include "llvm/Support/Debug.h"
  59 #include "llvm/Support/ErrorHandling.h"
  60 #include "llvm/Support/raw_ostream.h"
  61 #include <cassert>
  62 #include <string>
  63 #include <tuple>
  64 #include <vector>
  65
  66 using namespace llvm;
  67 using namespace polly;
  68
  69 #define DEBUG_TYPE "polly-scops"
  70
  71 STATISTIC(ScopFound, "Number of valid Scops");
  72 STATISTIC(RichScopFound, "Number of Scops containing a loop");
  73 STATISTIC(InfeasibleScops,
  74           "Number of SCoPs with statically infeasible context.");
  75
  76 bool polly::ModelReadOnlyScalars;
  77
  78 static cl::opt<bool, true> XModelReadOnlyScalars(
  79     "polly-analyze-read-only-scalars",
  80     cl::desc("Model read-only scalar values in the scop description"),
  81     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
  82     cl::init(true), cl::cat(PollyCategory));
  83
  84 static cl::opt<bool> UnprofitableScalarAccs(
  85     "polly-unprofitable-scalar-accs",
  86     cl::desc("Count statements with scalar accesses as not optimizable"),
  87     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  88
  89 static cl::opt<bool> DetectFortranArrays(
  90     "polly-detect-fortran-arrays",
  91     cl::desc("Detect Fortran arrays and use this for code generation"),
  92     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  93
  94 static cl::opt<bool> DetectReductions("polly-detect-reductions",
  95                                       cl::desc("Detect and exploit reductions"),
  96                                       cl::Hidden, cl::ZeroOrMore,
  97                                       cl::init(true), cl::cat(PollyCategory));
  98
  99 // Multiplicative reductions can be disabled separately as these kind of
 100 // operations can overflow easily. Additive reductions and bit operations
 101 // are in contrast pretty stable.
 102 static cl::opt<bool> DisableMultiplicativeReductions(
 103     "polly-disable-multiplicative-reductions",
 104     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
 105     cl::init(false), cl::cat(PollyCategory));
 106
 107 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
 108
 109 static cl::opt<GranularityChoice> StmtGranularity(
 110     "polly-stmt-granularity",
 111     cl::desc(
 112         "Algorithm to use for splitting basic blocks into multiple statements"),
 113     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
 114                           "One statement per basic block"),
 115                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
 116                           "Scalar independence heuristic"),
 117                clEnumValN(GranularityChoice::Stores, "store",
 118                           "Store-level granularity")),
 119     cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory));
 120
 121 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
 122                                    Region *NonAffineSubRegion,
 123                                    bool IsExitBlock) {
 124   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
 125   // true, are not modeled as ordinary PHI nodes as they are not part of the
 126   // region. However, we model the operands in the predecessor blocks that are
 127   // part of the region as regular scalar accesses.
 128
 129   // If we can synthesize a PHI we can skip it, however only if it is in
 130   // the region. If it is not it can only be in the exit block of the region.
 131   // In this case we model the operands but not the PHI itself.
 132   auto *Scope = LI.getLoopFor(PHI->getParent());
 133   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
 134     return;
 135
 136   // PHI nodes are modeled as if they had been demoted prior to the SCoP
 137   // detection. Hence, the PHI is a load of a new memory location in which the
 138   // incoming value was written at the end of the incoming basic block.
 139   bool OnlyNonAffineSubRegionOperands = true;
 140   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
 141     Value *Op = PHI->getIncomingValue(u);
 142     BasicBlock *OpBB = PHI->getIncomingBlock(u);
 143     ScopStmt *OpStmt = scop->getLastStmtFor(OpBB);
 144
 145     // Do not build PHI dependences inside a non-affine subregion, but make
 146     // sure that the necessary scalar values are still made available.
 147     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
 148       auto *OpInst = dyn_cast<Instruction>(Op);
 149       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
 150         ensureValueRead(Op, OpStmt);
 151       continue;
 152     }
 153
 154     OnlyNonAffineSubRegionOperands = false;
 155     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
 156   }
 157
 158   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
 159     addPHIReadAccess(PHIStmt, PHI);
 160   }
 161 }
 162
 163 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
 164                                          Instruction *Inst) {
 165   assert(!isa<PHINode>(Inst));
 166
 167   // Pull-in required operands.
 168   for (Use &Op : Inst->operands())
 169     ensureValueRead(Op.get(), UserStmt);
 170 }
 171
 172 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
 173   // Check for uses of this instruction outside the scop. Because we do not
 174   // iterate over such instructions and therefore did not "ensure" the existence
 175   // of a write, we must determine such use here.
 176   if (scop->isEscaping(Inst))
 177     ensureValueWrite(Inst);
 178 }
 179
 180 /// Check that a value is a Fortran Array descriptor.
 181 ///
 182 /// We check if V has the following structure:
 183 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
 184 ///                                   [<num> x %struct.descriptor_dimension] }
 185 ///
 186 ///
 187 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
 188 ///
 189 /// 1. V's type name starts with "struct.array"
 190 /// 2. V's type has layout as shown.
 191 /// 3. Final member of V's type has name "struct.descriptor_dimension",
 192 /// 4. "struct.descriptor_dimension" has layout as shown.
 193 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
 194 ///
 195 /// We are interested in such types since this is the code that dragonegg
 196 /// generates for Fortran array descriptors.
 197 ///
 198 /// @param V the Value to be checked.
 199 ///
 200 /// @returns True if V is a Fortran array descriptor, False otherwise.
 201 bool isFortranArrayDescriptor(Value *V) {
 202   PointerType *PTy = dyn_cast<PointerType>(V->getType());
 203
 204   if (!PTy)
 205     return false;
 206
 207   Type *Ty = PTy->getElementType();
 208   assert(Ty && "Ty expected to be initialized");
 209   auto *StructArrTy = dyn_cast<StructType>(Ty);
 210
 211   if (!(StructArrTy && StructArrTy->hasName()))
 212     return false;
 213
 214   if (!StructArrTy->getName().startswith("struct.array"))
 215     return false;
 216
 217   if (StructArrTy->getNumElements() != 4)
 218     return false;
 219
 220   const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
 221
 222   // i8* match
 223   if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
 224     return false;
 225
 226   // Get a reference to the int type and check that all the members
 227   // share the same int type
 228   Type *IntTy = ArrMemberTys[1];
 229   if (ArrMemberTys[2] != IntTy)
 230     return false;
 231
 232   // type: [<num> x %struct.descriptor_dimension]
 233   ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
 234   if (!DescriptorDimArrayTy)
 235     return false;
 236
 237   // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
 238   StructType *DescriptorDimTy =
 239       dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
 240
 241   if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
 242     return false;
 243
 244   if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
 245     return false;
 246
 247   if (DescriptorDimTy->getNumElements() != 3)
 248     return false;
 249
 250   for (auto MemberTy : DescriptorDimTy->elements()) {
 251     if (MemberTy != IntTy)
 252       return false;
 253   }
 254
 255   return true;
 256 }
 257
 258 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
 259   // match: 4.1 & 4.2 store/load
 260   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
 261     return nullptr;
 262
 263   // match: 4
 264   if (Inst.getAlignment() != 8)
 265     return nullptr;
 266
 267   Value *Address = Inst.getPointerOperand();
 268
 269   const BitCastInst *Bitcast = nullptr;
 270   // [match: 3]
 271   if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
 272     Value *TypedMem = Slot->getPointerOperand();
 273     // match: 2
 274     Bitcast = dyn_cast<BitCastInst>(TypedMem);
 275   } else {
 276     // match: 2
 277     Bitcast = dyn_cast<BitCastInst>(Address);
 278   }
 279
 280   if (!Bitcast)
 281     return nullptr;
 282
 283   auto *MallocMem = Bitcast->getOperand(0);
 284
 285   // match: 1
 286   auto *MallocCall = dyn_cast<CallInst>(MallocMem);
 287   if (!MallocCall)
 288     return nullptr;
 289
 290   Function *MallocFn = MallocCall->getCalledFunction();
 291   if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
 292     return nullptr;
 293
 294   // Find all uses the malloc'd memory.
 295   // We are looking for a "store" into a struct with the type being the Fortran
 296   // descriptor type
 297   for (auto user : MallocMem->users()) {
 298     /// match: 5
 299     auto *MallocStore = dyn_cast<StoreInst>(user);
 300     if (!MallocStore)
 301       continue;
 302
 303     auto *DescriptorGEP =
 304         dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
 305     if (!DescriptorGEP)
 306       continue;
 307
 308     // match: 5
 309     auto DescriptorType =
 310         dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
 311     if (!(DescriptorType && DescriptorType->hasName()))
 312       continue;
 313
 314     Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
 315
 316     if (!Descriptor)
 317       continue;
 318
 319     if (!isFortranArrayDescriptor(Descriptor))
 320       continue;
 321
 322     return Descriptor;
 323   }
 324
 325   return nullptr;
 326 }
 327
 328 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
 329   // match: 3
 330   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
 331     return nullptr;
 332
 333   Value *Slot = Inst.getPointerOperand();
 334
 335   LoadInst *MemLoad = nullptr;
 336   // [match: 2]
 337   if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
 338     // match: 1
 339     MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
 340   } else {
 341     // match: 1
 342     MemLoad = dyn_cast<LoadInst>(Slot);
 343   }
 344
 345   if (!MemLoad)
 346     return nullptr;
 347
 348   auto *BitcastOperator =
 349       dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
 350   if (!BitcastOperator)
 351     return nullptr;
 352
 353   Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
 354   if (!Descriptor)
 355     return nullptr;
 356
 357   if (!isFortranArrayDescriptor(Descriptor))
 358     return nullptr;
 359
 360   return Descriptor;
 361 }
 362
 363 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
 364   Value *Val = Inst.getValueOperand();
 365   Type *ElementType = Val->getType();
 366   Value *Address = Inst.getPointerOperand();
 367   const SCEV *AccessFunction =
 368       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
 369   const SCEVUnknown *BasePointer =
 370       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
 371   enum MemoryAccess::AccessType AccType =
 372       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
 373
 374   if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
 375     auto *Src = BitCast->getOperand(0);
 376     auto *SrcTy = Src->getType();
 377     auto *DstTy = BitCast->getType();
 378     // Do not try to delinearize non-sized (opaque) pointers.
 379     if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
 380         (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
 381       return false;
 382     }
 383     if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
 384         DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
 385             DL.getTypeAllocSize(DstTy->getPointerElementType()))
 386       Address = Src;
 387   }
 388
 389   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
 390   if (!GEP)
 391     return false;
 392
 393   std::vector<const SCEV *> Subscripts;
 394   std::vector<int> Sizes;
 395   std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
 396   auto *BasePtr = GEP->getOperand(0);
 397
 398   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
 399     BasePtr = BasePtrCast->getOperand(0);
 400
 401   // Check for identical base pointers to ensure that we do not miss index
 402   // offsets that have been added before this GEP is applied.
 403   if (BasePtr != BasePointer->getValue())
 404     return false;
 405
 406   std::vector<const SCEV *> SizesSCEV;
 407
 408   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
 409
 410   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
 411   for (auto *Subscript : Subscripts) {
 412     InvariantLoadsSetTy AccessILS;
 413     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
 414                       &AccessILS))
 415       return false;
 416
 417     for (LoadInst *LInst : AccessILS)
 418       if (!ScopRIL.count(LInst))
 419         return false;
 420   }
 421
 422   if (Sizes.empty())
 423     return false;
 424
 425   SizesSCEV.push_back(nullptr);
 426
 427   for (auto V : Sizes)
 428     SizesSCEV.push_back(SE.getSCEV(
 429         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
 430
 431   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
 432                  true, Subscripts, SizesSCEV, Val);
 433   return true;
 434 }
 435
 436 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
 437   if (!PollyDelinearize)
 438     return false;
 439
 440   Value *Address = Inst.getPointerOperand();
 441   Value *Val = Inst.getValueOperand();
 442   Type *ElementType = Val->getType();
 443   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
 444   enum MemoryAccess::AccessType AccType =
 445       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
 446
 447   const SCEV *AccessFunction =
 448       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
 449   const SCEVUnknown *BasePointer =
 450       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
 451
 452   assert(BasePointer && "Could not find base pointer");
 453
 454   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
 455   auto AccItr = InsnToMemAcc.find(Inst);
 456   if (AccItr == InsnToMemAcc.end())
 457     return false;
 458
 459   std::vector<const SCEV *> Sizes = {nullptr};
 460
 461   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
 462                AccItr->second.Shape->DelinearizedSizes.end());
 463
 464   // In case only the element size is contained in the 'Sizes' array, the
 465   // access does not access a real multi-dimensional array. Hence, we allow
 466   // the normal single-dimensional access construction to handle this.
 467   if (Sizes.size() == 1)
 468     return false;
 469
 470   // Remove the element size. This information is already provided by the
 471   // ElementSize parameter. In case the element size of this access and the
 472   // element size used for delinearization differs the delinearization is
 473   // incorrect. Hence, we invalidate the scop.
 474   //
 475   // TODO: Handle delinearization with differing element sizes.
 476   auto DelinearizedSize =
 477       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
 478   Sizes.pop_back();
 479   if (ElementSize != DelinearizedSize)
 480     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
 481
 482   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
 483                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
 484   return true;
 485 }
 486
 487 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
 488   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
 489
 490   if (MemIntr == nullptr)
 491     return false;
 492
 493   auto *L = LI.getLoopFor(Inst->getParent());
 494   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
 495   assert(LengthVal);
 496
 497   // Check if the length val is actually affine or if we overapproximate it
 498   InvariantLoadsSetTy AccessILS;
 499   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
 500
 501   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
 502   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
 503                                      LengthVal, SE, &AccessILS);
 504   for (LoadInst *LInst : AccessILS)
 505     if (!ScopRIL.count(LInst))
 506       LengthIsAffine = false;
 507   if (!LengthIsAffine)
 508     LengthVal = nullptr;
 509
 510   auto *DestPtrVal = MemIntr->getDest();
 511   assert(DestPtrVal);
 512
 513   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
 514   assert(DestAccFunc);
 515   // Ignore accesses to "NULL".
 516   // TODO: We could use this to optimize the region further, e.g., intersect
 517   //       the context with
 518   //          isl_set_complement(isl_set_params(getDomain()))
 519   //       as we know it would be undefined to execute this instruction anyway.
 520   if (DestAccFunc->isZero())
 521     return true;
 522
 523   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
 524   assert(DestPtrSCEV);
 525   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
 526   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
 527                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
 528                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
 529                  Inst.getValueOperand());
 530
 531   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
 532   if (!MemTrans)
 533     return true;
 534
 535   auto *SrcPtrVal = MemTrans->getSource();
 536   assert(SrcPtrVal);
 537
 538   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
 539   assert(SrcAccFunc);
 540   // Ignore accesses to "NULL".
 541   // TODO: See above TODO
 542   if (SrcAccFunc->isZero())
 543     return true;
 544
 545   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
 546   assert(SrcPtrSCEV);
 547   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
 548   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
 549                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
 550                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
 551                  Inst.getValueOperand());
 552
 553   return true;
 554 }
 555
 556 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
 557   auto *CI = dyn_cast_or_null<CallInst>(Inst);
 558
 559   if (CI == nullptr)
 560     return false;
 561
 562   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI))
 563     return true;
 564
 565   bool ReadOnly = false;
 566   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
 567   auto *CalledFunction = CI->getCalledFunction();
 568   switch (AA.getModRefBehavior(CalledFunction)) {
 569   case FMRB_UnknownModRefBehavior:
 570     llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
 571   case FMRB_DoesNotAccessMemory:
 572     return true;
 573   case FMRB_DoesNotReadMemory:
 574   case FMRB_OnlyAccessesInaccessibleMem:
 575   case FMRB_OnlyAccessesInaccessibleOrArgMem:
 576     return false;
 577   case FMRB_OnlyReadsMemory:
 578     GlobalReads.emplace_back(Stmt, CI);
 579     return true;
 580   case FMRB_OnlyReadsArgumentPointees:
 581     ReadOnly = true;
 582   // Fall through
 583   case FMRB_OnlyAccessesArgumentPointees: {
 584     auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
 585     Loop *L = LI.getLoopFor(Inst->getParent());
 586     for (const auto &Arg : CI->arg_operands()) {
 587       if (!Arg->getType()->isPointerTy())
 588         continue;
 589
 590       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
 591       if (ArgSCEV->isZero())
 592         continue;
 593
 594       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
 595       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
 596                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
 597     }
 598     return true;
 599   }
 600   }
 601
 602   return true;
 603 }
 604
 605 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
 606   Value *Address = Inst.getPointerOperand();
 607   Value *Val = Inst.getValueOperand();
 608   Type *ElementType = Val->getType();
 609   enum MemoryAccess::AccessType AccType =
 610       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
 611
 612   const SCEV *AccessFunction =
 613       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
 614   const SCEVUnknown *BasePointer =
 615       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
 616
 617   assert(BasePointer && "Could not find base pointer");
 618   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
 619
 620   // Check if the access depends on a loop contained in a non-affine subregion.
 621   bool isVariantInNonAffineLoop = false;
 622   SetVector<const Loop *> Loops;
 623   findLoops(AccessFunction, Loops);
 624   for (const Loop *L : Loops)
 625     if (Stmt->contains(L)) {
 626       isVariantInNonAffineLoop = true;
 627       break;
 628     }
 629
 630   InvariantLoadsSetTy AccessILS;
 631
 632   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
 633   bool IsAffine = !isVariantInNonAffineLoop &&
 634                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
 635                                AccessFunction, SE, &AccessILS);
 636
 637   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
 638   for (LoadInst *LInst : AccessILS)
 639     if (!ScopRIL.count(LInst))
 640       IsAffine = false;
 641
 642   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
 643     AccType = MemoryAccess::MAY_WRITE;
 644
 645   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
 646                  IsAffine, {AccessFunction}, {nullptr}, Val);
 647 }
 648
 649 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
 650   if (buildAccessMemIntrinsic(Inst, Stmt))
 651     return;
 652
 653   if (buildAccessCallInst(Inst, Stmt))
 654     return;
 655
 656   if (buildAccessMultiDimFixed(Inst, Stmt))
 657     return;
 658
 659   if (buildAccessMultiDimParam(Inst, Stmt))
 660     return;
 661
 662   buildAccessSingleDim(Inst, Stmt);
 663 }
 664
 665 void ScopBuilder::buildAccessFunctions() {
 666   for (auto &Stmt : *scop) {
 667     if (Stmt.isBlockStmt()) {
 668       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
 669       continue;
 670     }
 671
 672     Region *R = Stmt.getRegion();
 673     for (BasicBlock *BB : R->blocks())
 674       buildAccessFunctions(&Stmt, *BB, R);
 675   }
 676
 677   // Build write accesses for values that are used after the SCoP.
 678   // The instructions defining them might be synthesizable and therefore not
 679   // contained in any statement, hence we iterate over the original instructions
 680   // to identify all escaping values.
 681   for (BasicBlock *BB : scop->getRegion().blocks()) {
 682     for (Instruction &Inst : *BB)
 683       buildEscapingDependences(&Inst);
 684   }
 685 }
 686
 687 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
 688   return !isa<TerminatorInst>(Inst) && !isIgnoredIntrinsic(Inst) &&
 689          !canSynthesize(Inst, *scop, &SE, L);
 690 }
 691
 692 /// Generate a name for a statement.
 693 ///
 694 /// @param S     The parent SCoP.
 695 /// @param BB    The basic block the statement will represent.
 696 /// @param Count The index of the created statement in @p BB.
 697 static std::string makeStmtName(Scop *S, BasicBlock *BB, int Count) {
 698   std::string Suffix = "";
 699   if (Count != 0)
 700     Suffix += std::to_string(Count);
 701   return getIslCompatibleName("Stmt", BB, S->getNextStmtIdx(), Suffix,
 702                               UseInstructionNames);
 703 }
 704
 705 /// Generate a name for a statement that represents a non-affine subregion.
 706 ///
 707 /// @param S The parent SCoP.
 708 /// @param R The region the statement will represent.
 709 static std::string makeStmtName(Scop *S, Region *R) {
 710   return getIslCompatibleName("Stmt", R->getNameStr(), S->getNextStmtIdx(), "",
 711                               UseInstructionNames);
 712 }
 713
 714 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
 715   Loop *SurroundingLoop = LI.getLoopFor(BB);
 716
 717   int Count = 0;
 718   std::vector<Instruction *> Instructions;
 719   for (Instruction &Inst : *BB) {
 720     if (shouldModelInst(&Inst, SurroundingLoop))
 721       Instructions.push_back(&Inst);
 722     if (Inst.getMetadata("polly_split_after") ||
 723         (SplitOnStore && isa<StoreInst>(Inst))) {
 724       std::string Name = makeStmtName(scop.get(), BB, Count);
 725       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
 726       Count++;
 727       Instructions.clear();
 728     }
 729   }
 730
 731   std::string Name = makeStmtName(scop.get(), BB, Count);
 732   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
 733 }
 734
 735 /// Is @p Inst an ordered instruction?
 736 ///
 737 /// An unordered instruction is an instruction, such that a sequence of
 738 /// unordered instructions can be permuted without changing semantics. Any
 739 /// instruction for which this is not always the case is ordered.
 740 static bool isOrderedInstruction(Instruction *Inst) {
 741   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
 742 }
 743
 744 /// Join instructions to the same statement if one uses the scalar result of the
 745 /// other.
 746 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
 747                             ArrayRef<Instruction *> ModeledInsts) {
 748   for (Instruction *Inst : ModeledInsts) {
 749     if (isa<PHINode>(Inst))
 750       continue;
 751
 752     for (Use &Op : Inst->operands()) {
 753       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
 754       if (!OpInst)
 755         continue;
 756
 757       // Check if OpInst is in the BB and is a modeled instruction.
 758       auto OpVal = UnionFind.findValue(OpInst);
 759       if (OpVal == UnionFind.end())
 760         continue;
 761
 762       UnionFind.unionSets(Inst, OpInst);
 763     }
 764   }
 765 }
 766
 767 /// Join instructions that are used as incoming value in successor PHIs into the
 768 /// epilogue.
 769 static void
 770 joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
 771                                   ArrayRef<Instruction *> ModeledInsts,
 772                                   BasicBlock *BB) {
 773   for (BasicBlock *Succ : successors(BB)) {
 774     for (Instruction &SuccInst : *Succ) {
 775       PHINode *SuccPHI = dyn_cast<PHINode>(&SuccInst);
 776       if (!SuccPHI)
 777         break;
 778
 779       Value *IncomingVal = SuccPHI->getIncomingValueForBlock(BB);
 780       Instruction *IncomingInst = dyn_cast<Instruction>(IncomingVal);
 781       if (!IncomingInst)
 782         continue;
 783       if (IncomingInst->getParent() != BB)
 784         continue;
 785       if (UnionFind.findValue(IncomingInst) == UnionFind.end())
 786         continue;
 787
 788       UnionFind.unionSets(nullptr, IncomingInst);
 789     }
 790   }
 791 }
 792
 793 /// Ensure that the order of ordered instructions does not change.
 794 ///
 795 /// If we encounter an ordered instruction enclosed in instructions belonging to
 796 /// a different statement (which might as well contain ordered instructions, but
 797 /// this is not tested here), join them.
 798 static void
 799 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
 800                         ArrayRef<Instruction *> ModeledInsts) {
 801   SetVector<Instruction *> SeenLeaders;
 802   for (Instruction *Inst : ModeledInsts) {
 803     if (!isOrderedInstruction(Inst))
 804       continue;
 805
 806     Instruction *Leader = UnionFind.getLeaderValue(Inst);
 807     bool Inserted = SeenLeaders.insert(Leader);
 808     if (Inserted)
 809       continue;
 810
 811     // Merge statements to close holes. Say, we have already seen statements A
 812     // and B, in this order. Then we see an instruction of A again and we would
 813     // see the pattern "A B A". This function joins all statements until the
 814     // only seen occurrence of A.
 815     for (Instruction *Prev : reverse(SeenLeaders)) {
 816       // Items added to 'SeenLeaders' are leaders, but may have lost their
 817       // leadership status when merged into another statement.
 818       Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
 819       if (PrevLeader == Leader)
 820         break;
 821       UnionFind.unionSets(Prev, Leader);
 822     }
 823   }
 824 }
 825
 826 /// Also ensure that the epilogue is the last statement relative to all ordered
 827 /// instructions.
 828 ///
 829 /// This is basically joinOrderedInstructions() but using the epilogue as
 830 /// 'ordered instruction'.
 831 static void joinAllAfterEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
 832                                  ArrayRef<Instruction *> ModeledInsts) {
 833   bool EpilogueSeen = false;
 834   for (Instruction *Inst : ModeledInsts) {
 835     auto PHIWritesLeader = UnionFind.findLeader(nullptr);
 836     auto InstLeader = UnionFind.findLeader(Inst);
 837
 838     if (PHIWritesLeader == InstLeader)
 839       EpilogueSeen = true;
 840
 841     if (!isOrderedInstruction(Inst))
 842       continue;
 843
 844     if (EpilogueSeen)
 845       UnionFind.unionSets(PHIWritesLeader, InstLeader);
 846   }
 847 }
 848
 849 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
 850   Loop *L = LI.getLoopFor(BB);
 851
 852   // Extracting out modeled instructions saves us from checking
 853   // shouldModelInst() repeatedly.
 854   SmallVector<Instruction *, 32> ModeledInsts;
 855   EquivalenceClasses<Instruction *> UnionFind;
 856   for (Instruction &Inst : *BB) {
 857     if (!shouldModelInst(&Inst, L))
 858       continue;
 859     ModeledInsts.push_back(&Inst);
 860     UnionFind.insert(&Inst);
 861   }
 862
 863   // 'nullptr' represents the last statement for a basic block. It contains no
 864   // instructions, but holds the PHI write accesses for successor basic blocks.
 865   // If a PHI has an incoming value defined in this BB, it can also be merged
 866   // with other statements.
 867   // TODO: We wouldn't need this if we would add PHIWrites into the statement
 868   // that defines the incoming value (if in the BB) instead of always the last,
 869   // so we could unconditionally always add a last statement.
 870   UnionFind.insert(nullptr);
 871
 872   joinOperandTree(UnionFind, ModeledInsts);
 873   joinIncomingPHIValuesIntoEpilogue(UnionFind, ModeledInsts, BB);
 874   joinOrderedInstructions(UnionFind, ModeledInsts);
 875   joinAllAfterEpilogue(UnionFind, ModeledInsts);
 876
 877   // The list of instructions for statement (statement represented by the leader
 878   // instruction). The order of statements instructions is reversed such that
 879   // the epilogue is first. This makes it easier to ensure that the epilogue is
 880   // the last statement.
 881   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
 882
 883   // Ensure that the epilogue is last.
 884   LeaderToInstList[nullptr];
 885
 886   // Collect the instructions of all leaders. UnionFind's member iterator
 887   // unfortunately are not in any specific order.
 888   for (Instruction &Inst : reverse(*BB)) {
 889     auto LeaderIt = UnionFind.findLeader(&Inst);
 890     if (LeaderIt == UnionFind.member_end())
 891       continue;
 892
 893     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
 894     InstList.push_back(&Inst);
 895   }
 896
 897   // Finally build the statements.
 898   int Count = 0;
 899   for (auto &Instructions : reverse(LeaderToInstList)) {
 900     std::vector<Instruction *> &InstList = Instructions.second;
 901     std::reverse(InstList.begin(), InstList.end());
 902     std::string Name = makeStmtName(scop.get(), BB, Count);
 903     scop->addScopStmt(BB, Name, L, std::move(InstList));
 904     Count += 1;
 905   }
 906 }
 907
 908 void ScopBuilder::buildStmts(Region &SR) {
 909   if (scop->isNonAffineSubRegion(&SR)) {
 910     std::vector<Instruction *> Instructions;
 911     Loop *SurroundingLoop =
 912         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
 913     for (Instruction &Inst : *SR.getEntry())
 914       if (shouldModelInst(&Inst, SurroundingLoop))
 915         Instructions.push_back(&Inst);
 916     std::string Name = makeStmtName(scop.get(), &SR);
 917     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
 918     return;
 919   }
 920
 921   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
 922     if (I->isSubRegion())
 923       buildStmts(*I->getNodeAs<Region>());
 924     else {
 925       BasicBlock *BB = I->getNodeAs<BasicBlock>();
 926       switch (StmtGranularity) {
 927       case GranularityChoice::BasicBlocks:
 928         buildSequentialBlockStmts(BB);
 929         break;
 930       case GranularityChoice::ScalarIndependence:
 931         buildEqivClassBlockStmts(BB);
 932         break;
 933       case GranularityChoice::Stores:
 934         buildSequentialBlockStmts(BB, true);
 935         break;
 936       }
 937     }
 938 }
 939
 940 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
 941                                        Region *NonAffineSubRegion) {
 942   assert(
 943       Stmt &&
 944       "The exit BB is the only one that cannot be represented by a statement");
 945   assert(Stmt->represents(&BB));
 946
 947   // We do not build access functions for error blocks, as they may contain
 948   // instructions we can not model.
 949   if (isErrorBlock(BB, scop->getRegion(), LI, DT))
 950     return;
 951
 952   auto BuildAccessesForInst = [this, Stmt,
 953                                NonAffineSubRegion](Instruction *Inst) {
 954     PHINode *PHI = dyn_cast<PHINode>(Inst);
 955     if (PHI)
 956       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
 957
 958     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
 959       assert(Stmt && "Cannot build access function in non-existing statement");
 960       buildMemoryAccess(MemInst, Stmt);
 961     }
 962
 963     // PHI nodes have already been modeled above and TerminatorInsts that are
 964     // not part of a non-affine subregion are fully modeled and regenerated
 965     // from the polyhedral domains. Hence, they do not need to be modeled as
 966     // explicit data dependences.
 967     if (!PHI)
 968       buildScalarDependences(Stmt, Inst);
 969   };
 970
 971   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
 972   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
 973   if (IsEntryBlock) {
 974     for (Instruction *Inst : Stmt->getInstructions())
 975       BuildAccessesForInst(Inst);
 976     if (Stmt->isRegionStmt())
 977       BuildAccessesForInst(BB.getTerminator());
 978   } else {
 979     for (Instruction &Inst : BB) {
 980       if (isIgnoredIntrinsic(&Inst))
 981         continue;
 982
 983       // Invariant loads already have been processed.
 984       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
 985         continue;
 986
 987       BuildAccessesForInst(&Inst);
 988     }
 989   }
 990 }
 991
 992 MemoryAccess *ScopBuilder::addMemoryAccess(
 993     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
 994     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
 995     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
 996     MemoryKind Kind) {
 997   bool isKnownMustAccess = false;
 998
 999   // Accesses in single-basic block statements are always executed.
1000   if (Stmt->isBlockStmt())
1001     isKnownMustAccess = true;
1002
1003   if (Stmt->isRegionStmt()) {
1004     // Accesses that dominate the exit block of a non-affine region are always
1005     // executed. In non-affine regions there may exist MemoryKind::Values that
1006     // do not dominate the exit. MemoryKind::Values will always dominate the
1007     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1008     // non-affine region.
1009     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
1010       isKnownMustAccess = true;
1011   }
1012
1013   // Non-affine PHI writes do not "happen" at a particular instruction, but
1014   // after exiting the statement. Therefore they are guaranteed to execute and
1015   // overwrite the old value.
1016   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
1017     isKnownMustAccess = true;
1018
1019   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
1020     AccType = MemoryAccess::MAY_WRITE;
1021
1022   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
1023                                   Affine, Subscripts, Sizes, AccessValue, Kind);
1024
1025   scop->addAccessFunction(Access);
1026   Stmt->addAccess(Access);
1027   return Access;
1028 }
1029
1030 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1031                                  MemoryAccess::AccessType AccType,
1032                                  Value *BaseAddress, Type *ElementType,
1033                                  bool IsAffine,
1034                                  ArrayRef<const SCEV *> Subscripts,
1035                                  ArrayRef<const SCEV *> Sizes,
1036                                  Value *AccessValue) {
1037   ArrayBasePointers.insert(BaseAddress);
1038   auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1039                                     ElementType, IsAffine, AccessValue,
1040                                     Subscripts, Sizes, MemoryKind::Array);
1041
1042   if (!DetectFortranArrays)
1043     return;
1044
1045   if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1046     MemAccess->setFortranArrayDescriptor(FAD);
1047   else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1048     MemAccess->setFortranArrayDescriptor(FAD);
1049 }
1050
1051 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1052   // Find the statement that defines the value of Inst. That statement has to
1053   // write the value to make it available to those statements that read it.
1054   ScopStmt *Stmt = scop->getStmtFor(Inst);
1055
1056   // It is possible that the value is synthesizable within a loop (such that it
1057   // is not part of any statement), but not after the loop (where you need the
1058   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1059   // avoid this. In case the IR has no such PHI, use the last statement (where
1060   // the value is synthesizable) to write the value.
1061   if (!Stmt)
1062     Stmt = scop->getLastStmtFor(Inst->getParent());
1063
1064   // Inst not defined within this SCoP.
1065   if (!Stmt)
1066     return;
1067
1068   // Do not process further if the instruction is already written.
1069   if (Stmt->lookupValueWriteOf(Inst))
1070     return;
1071
1072   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1073                   true, Inst, ArrayRef<const SCEV *>(),
1074                   ArrayRef<const SCEV *>(), MemoryKind::Value);
1075 }
1076
1077 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1078   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1079   // to be able to replace this one. Currently, there is a split responsibility.
1080   // In a first step, the MemoryAccess is created, but without the
1081   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1082   // AccessRelation is created. At least for scalar accesses, there is no new
1083   // information available at ScopStmt::buildAccessRelations(), so we could
1084   // create the AccessRelation right away. This is what
1085   // ScopStmt::ensureValueRead(Value*) does.
1086
1087   auto *Scope = UserStmt->getSurroundingLoop();
1088   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1089   switch (VUse.getKind()) {
1090   case VirtualUse::Constant:
1091   case VirtualUse::Block:
1092   case VirtualUse::Synthesizable:
1093   case VirtualUse::Hoisted:
1094   case VirtualUse::Intra:
1095     // Uses of these kinds do not need a MemoryAccess.
1096     break;
1097
1098   case VirtualUse::ReadOnly:
1099     // Add MemoryAccess for invariant values only if requested.
1100     if (!ModelReadOnlyScalars)
1101       break;
1102
1103     LLVM_FALLTHROUGH;
1104   case VirtualUse::Inter:
1105
1106     // Do not create another MemoryAccess for reloading the value if one already
1107     // exists.
1108     if (UserStmt->lookupValueReadOf(V))
1109       break;
1110
1111     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1112                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1113                     MemoryKind::Value);
1114
1115     // Inter-statement uses need to write the value in their defining statement.
1116     if (VUse.isInter())
1117       ensureValueWrite(cast<Instruction>(V));
1118     break;
1119   }
1120 }
1121
1122 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1123                                  BasicBlock *IncomingBlock,
1124                                  Value *IncomingValue, bool IsExitBlock) {
1125   // As the incoming block might turn out to be an error statement ensure we
1126   // will create an exit PHI SAI object. It is needed during code generation
1127   // and would be created later anyway.
1128   if (IsExitBlock)
1129     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1130                                    MemoryKind::ExitPHI);
1131
1132   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1133   // from outside the SCoP's region have no statement representation.
1134   if (!IncomingStmt)
1135     return;
1136
1137   // Take care for the incoming value being available in the incoming block.
1138   // This must be done before the check for multiple PHI writes because multiple
1139   // exiting edges from subregion each can be the effective written value of the
1140   // subregion. As such, all of them must be made available in the subregion
1141   // statement.
1142   ensureValueRead(IncomingValue, IncomingStmt);
1143
1144   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1145   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1146     assert(Acc->getAccessInstruction() == PHI);
1147     Acc->addIncoming(IncomingBlock, IncomingValue);
1148     return;
1149   }
1150
1151   MemoryAccess *Acc = addMemoryAccess(
1152       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1153       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1154       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
1155   assert(Acc);
1156   Acc->addIncoming(IncomingBlock, IncomingValue);
1157 }
1158
1159 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1160   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1161                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1162                   MemoryKind::PHI);
1163 }
1164
1165 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1166   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1167
1168   Stmt.Domain = scop->getDomainConditions(&Stmt);
1169   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1170 }
1171
1172 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1173   isl::set Domain = Stmt.getDomain();
1174   for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
1175     isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
1176     Stmt.NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
1177   }
1178 }
1179
1180 /// Return the reduction type for a given binary operator.
1181 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1182                                                     const Instruction *Load) {
1183   if (!BinOp)
1184     return MemoryAccess::RT_NONE;
1185   switch (BinOp->getOpcode()) {
1186   case Instruction::FAdd:
1187     if (!BinOp->isFast())
1188       return MemoryAccess::RT_NONE;
1189     // Fall through
1190   case Instruction::Add:
1191     return MemoryAccess::RT_ADD;
1192   case Instruction::Or:
1193     return MemoryAccess::RT_BOR;
1194   case Instruction::Xor:
1195     return MemoryAccess::RT_BXOR;
1196   case Instruction::And:
1197     return MemoryAccess::RT_BAND;
1198   case Instruction::FMul:
1199     if (!BinOp->isFast())
1200       return MemoryAccess::RT_NONE;
1201     // Fall through
1202   case Instruction::Mul:
1203     if (DisableMultiplicativeReductions)
1204       return MemoryAccess::RT_NONE;
1205     return MemoryAccess::RT_MUL;
1206   default:
1207     return MemoryAccess::RT_NONE;
1208   }
1209 }
1210
1211 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1212   SmallVector<MemoryAccess *, 2> Loads;
1213   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1214
1215   // First collect candidate load-store reduction chains by iterating over all
1216   // stores and collecting possible reduction loads.
1217   for (MemoryAccess *StoreMA : Stmt) {
1218     if (StoreMA->isRead())
1219       continue;
1220
1221     Loads.clear();
1222     collectCandidateReductionLoads(StoreMA, Loads);
1223     for (MemoryAccess *LoadMA : Loads)
1224       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1225   }
1226
1227   // Then check each possible candidate pair.
1228   for (const auto &CandidatePair : Candidates) {
1229     bool Valid = true;
1230     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1231     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1232
1233     // Skip those with obviously unequal base addresses.
1234     if (!LoadAccs.has_equal_space(StoreAccs)) {
1235       continue;
1236     }
1237
1238     // And check if the remaining for overlap with other memory accesses.
1239     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1240     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1241     isl::set AllAccs = AllAccsRel.range();
1242
1243     for (MemoryAccess *MA : Stmt) {
1244       if (MA == CandidatePair.first || MA == CandidatePair.second)
1245         continue;
1246
1247       isl::map AccRel =
1248           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1249       isl::set Accs = AccRel.range();
1250
1251       if (AllAccs.has_equal_space(Accs)) {
1252         isl::set OverlapAccs = Accs.intersect(AllAccs);
1253         Valid = Valid && OverlapAccs.is_empty();
1254       }
1255     }
1256
1257     if (!Valid)
1258       continue;
1259
1260     const LoadInst *Load =
1261         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1262     MemoryAccess::ReductionType RT =
1263         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1264
1265     // If no overlapping access was found we mark the load and store as
1266     // reduction like.
1267     CandidatePair.first->markAsReductionLike(RT);
1268     CandidatePair.second->markAsReductionLike(RT);
1269   }
1270 }
1271
1272 void ScopBuilder::collectCandidateReductionLoads(
1273     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1274   ScopStmt *Stmt = StoreMA->getStatement();
1275
1276   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1277   if (!Store)
1278     return;
1279
1280   // Skip if there is not one binary operator between the load and the store
1281   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1282   if (!BinOp)
1283     return;
1284
1285   // Skip if the binary operators has multiple uses
1286   if (BinOp->getNumUses() != 1)
1287     return;
1288
1289   // Skip if the opcode of the binary operator is not commutative/associative
1290   if (!BinOp->isCommutative() || !BinOp->isAssociative())
1291     return;
1292
1293   // Skip if the binary operator is outside the current SCoP
1294   if (BinOp->getParent() != Store->getParent())
1295     return;
1296
1297   // Skip if it is a multiplicative reduction and we disabled them
1298   if (DisableMultiplicativeReductions &&
1299       (BinOp->getOpcode() == Instruction::Mul ||
1300        BinOp->getOpcode() == Instruction::FMul))
1301     return;
1302
1303   // Check the binary operator operands for a candidate load
1304   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1305   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1306   if (!PossibleLoad0 && !PossibleLoad1)
1307     return;
1308
1309   // A load is only a candidate if it cannot escape (thus has only this use)
1310   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1311     if (PossibleLoad0->getParent() == Store->getParent())
1312       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1313   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1314     if (PossibleLoad1->getParent() == Store->getParent())
1315       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1316 }
1317
1318 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1319   for (MemoryAccess *Access : Stmt.MemAccs) {
1320     Type *ElementType = Access->getElementType();
1321
1322     MemoryKind Ty;
1323     if (Access->isPHIKind())
1324       Ty = MemoryKind::PHI;
1325     else if (Access->isExitPHIKind())
1326       Ty = MemoryKind::ExitPHI;
1327     else if (Access->isValueKind())
1328       Ty = MemoryKind::Value;
1329     else
1330       Ty = MemoryKind::Array;
1331
1332     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1333                                                ElementType, Access->Sizes, Ty);
1334     Access->buildAccessRelation(SAI);
1335     scop->addAccessData(Access);
1336   }
1337 }
1338
1339 #ifndef NDEBUG
1340 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1341   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1342   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1343   assert(PhysUse.getKind() == VirtUse.getKind());
1344 }
1345
1346 /// Check the consistency of every statement's MemoryAccesses.
1347 ///
1348 /// The check is carried out by expecting the "physical" kind of use (derived
1349 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1350 /// kind of use (derived from a statement's MemoryAccess).
1351 ///
1352 /// The "physical" uses are taken by ensureValueRead to determine whether to
1353 /// create MemoryAccesses. When done, the kind of scalar access should be the
1354 /// same no matter which way it was derived.
1355 ///
1356 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1357 /// can intentionally influence on the kind of uses (not corresponding to the
1358 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1359 /// to pick up the virtual uses. But here in the code generator, this has not
1360 /// happened yet, such that virtual and physical uses are equivalent.
1361 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1362   for (auto *BB : S->getRegion().blocks()) {
1363     for (auto &Inst : *BB) {
1364       auto *Stmt = S->getStmtFor(&Inst);
1365       if (!Stmt)
1366         continue;
1367
1368       if (isIgnoredIntrinsic(&Inst))
1369         continue;
1370
1371       // Branch conditions are encoded in the statement domains.
1372       if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt())
1373         continue;
1374
1375       // Verify all uses.
1376       for (auto &Op : Inst.operands())
1377         verifyUse(S, Op, LI);
1378
1379       // Stores do not produce values used by other statements.
1380       if (isa<StoreInst>(Inst))
1381         continue;
1382
1383       // For every value defined in the block, also check that a use of that
1384       // value in the same statement would not be an inter-statement use. It can
1385       // still be synthesizable or load-hoisted, but these kind of instructions
1386       // are not directly copied in code-generation.
1387       auto VirtDef =
1388           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1389       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1390              VirtDef.getKind() == VirtualUse::Intra ||
1391              VirtDef.getKind() == VirtualUse::Hoisted);
1392     }
1393   }
1394
1395   if (S->hasSingleExitEdge())
1396     return;
1397
1398   // PHINodes in the SCoP region's exit block are also uses to be checked.
1399   if (!S->getRegion().isTopLevelRegion()) {
1400     for (auto &Inst : *S->getRegion().getExit()) {
1401       if (!isa<PHINode>(Inst))
1402         break;
1403
1404       for (auto &Op : Inst.operands())
1405         verifyUse(S, Op, LI);
1406     }
1407   }
1408 }
1409 #endif
1410
1411 /// Return the block that is the representing block for @p RN.
1412 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1413   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1414                            : RN->getNodeAs<BasicBlock>();
1415 }
1416
1417 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1418                             OptimizationRemarkEmitter &ORE) {
1419   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1420
1421   buildStmts(R);
1422
1423   // Create all invariant load instructions first. These are categorized as
1424   // 'synthesizable', therefore are not part of any ScopStmt but need to be
1425   // created somewhere.
1426   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1427   for (BasicBlock *BB : scop->getRegion().blocks()) {
1428     if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1429       continue;
1430
1431     for (Instruction &Inst : *BB) {
1432       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1433       if (!Load)
1434         continue;
1435
1436       if (!RIL.count(Load))
1437         continue;
1438
1439       // Invariant loads require a MemoryAccess to be created in some statement.
1440       // It is not important to which statement the MemoryAccess is added
1441       // because it will later be removed from the ScopStmt again. We chose the
1442       // first statement of the basic block the LoadInst is in.
1443       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1444       assert(!List.empty());
1445       ScopStmt *RILStmt = List.front();
1446       buildMemoryAccess(Load, RILStmt);
1447     }
1448   }
1449   buildAccessFunctions();
1450
1451   // In case the region does not have an exiting block we will later (during
1452   // code generation) split the exit block. This will move potential PHI nodes
1453   // from the current exit block into the new region exiting block. Hence, PHI
1454   // nodes that are at this point not part of the region will be.
1455   // To handle these PHI nodes later we will now model their operands as scalar
1456   // accesses. Note that we do not model anything in the exit block if we have
1457   // an exiting block in the region, as there will not be any splitting later.
1458   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
1459     for (Instruction &Inst : *R.getExit()) {
1460       PHINode *PHI = dyn_cast<PHINode>(&Inst);
1461       if (!PHI)
1462         break;
1463
1464       buildPHIAccesses(nullptr, PHI, nullptr, true);
1465     }
1466   }
1467
1468   // Create memory accesses for global reads since all arrays are now known.
1469   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1470   for (auto GlobalReadPair : GlobalReads) {
1471     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1472     Instruction *GlobalRead = GlobalReadPair.second;
1473     for (auto *BP : ArrayBasePointers)
1474       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1475                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1476   }
1477
1478   scop->buildInvariantEquivalenceClasses();
1479
1480   /// A map from basic blocks to their invalid domains.
1481   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1482
1483   if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1484     DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1485     return;
1486   }
1487
1488   scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1489
1490   // Initialize the invalid domain.
1491   for (ScopStmt &Stmt : scop->Stmts)
1492     if (Stmt.isBlockStmt())
1493       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1494     else
1495       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1496           Stmt.getRegion()->getNode())]);
1497
1498   // Remove empty statements.
1499   // Exit early in case there are no executable statements left in this scop.
1500   scop->removeStmtNotInDomainMap();
1501   scop->simplifySCoP(false);
1502   if (scop->isEmpty()) {
1503     DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1504     return;
1505   }
1506
1507   // The ScopStmts now have enough information to initialize themselves.
1508   for (ScopStmt &Stmt : *scop) {
1509     buildDomain(Stmt);
1510     collectSurroundingLoops(Stmt);
1511     buildAccessRelations(Stmt);
1512
1513     if (DetectReductions)
1514       checkForReductions(Stmt);
1515   }
1516
1517   // Check early for a feasible runtime context.
1518   if (!scop->hasFeasibleRuntimeContext()) {
1519     DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1520     return;
1521   }
1522
1523   // Check early for profitability. Afterwards it cannot change anymore,
1524   // only the runtime context could become infeasible.
1525   if (!scop->isProfitable(UnprofitableScalarAccs)) {
1526     scop->invalidate(PROFITABLE, DebugLoc());
1527     DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1528     return;
1529   }
1530
1531   scop->buildSchedule(LI);
1532
1533   scop->finalizeAccesses();
1534
1535   scop->realignParams();
1536   scop->addUserContext();
1537
1538   // After the context was fully constructed, thus all our knowledge about
1539   // the parameters is in there, we add all recorded assumptions to the
1540   // assumed/invalid context.
1541   scop->addRecordedAssumptions();
1542
1543   scop->simplifyContexts();
1544   if (!scop->buildAliasChecks(AA)) {
1545     DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1546     return;
1547   }
1548
1549   scop->hoistInvariantLoads();
1550   scop->canonicalizeDynamicBasePtrs();
1551   scop->verifyInvariantLoads();
1552   scop->simplifySCoP(true);
1553
1554   // Check late for a feasible runtime context because profitability did not
1555   // change.
1556   if (!scop->hasFeasibleRuntimeContext()) {
1557     DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1558     return;
1559   }
1560
1561 #ifndef NDEBUG
1562   verifyUses(scop.get(), LI, DT);
1563 #endif
1564 }
1565
1566 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1567                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1568                          ScopDetection &SD, ScalarEvolution &SE,
1569                          OptimizationRemarkEmitter &ORE)
1570     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1571   DebugLoc Beg, End;
1572   auto P = getBBPairForRegion(R);
1573   getDebugLocations(P, Beg, End);
1574
1575   std::string Msg = "SCoP begins here.";
1576   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1577            << Msg);
1578
1579   buildScop(*R, AC, ORE);
1580
1581   DEBUG(dbgs() << *scop);
1582
1583   if (!scop->hasFeasibleRuntimeContext()) {
1584     InfeasibleScops++;
1585     Msg = "SCoP ends here but was dismissed.";
1586     DEBUG(dbgs() << "SCoP detected but dismissed\n");
1587     scop.reset();
1588   } else {
1589     Msg = "SCoP ends here.";
1590     ++ScopFound;
1591     if (scop->getMaxLoopDepth() > 0)
1592       ++RichScopFound;
1593   }
1594
1595   if (R->isTopLevelRegion())
1596     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1597              << Msg);
1598   else
1599     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1600              << Msg);
1601 }