lib/Analysis/ScopBuilder.cpp

   1 //===- ScopBuilder.cpp ----------------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Create a polyhedral description for a static control flow region.
  11 //
  12 // The pass creates a polyhedral description of the Scops detected by the SCoP
  13 // detection derived from their LLVM-IR code.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #include "polly/ScopBuilder.h"
  18 #include "polly/Options.h"
  19 #include "polly/ScopDetection.h"
  20 #include "polly/ScopDetectionDiagnostic.h"
  21 #include "polly/ScopInfo.h"
  22 #include "polly/Support/SCEVValidator.h"
  23 #include "polly/Support/ScopHelper.h"
  24 #include "polly/Support/VirtualInstruction.h"
  25 #include "llvm/ADT/APInt.h"
  26 #include "llvm/ADT/ArrayRef.h"
  27 #include "llvm/ADT/DenseMap.h"
  28 #include "llvm/ADT/SetVector.h"
  29 #include "llvm/ADT/Statistic.h"
  30 #include "llvm/Analysis/AliasAnalysis.h"
  31 #include "llvm/Analysis/LoopInfo.h"
  32 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
  33 #include "llvm/Analysis/RegionInfo.h"
  34 #include "llvm/Analysis/RegionIterator.h"
  35 #include "llvm/Analysis/ScalarEvolution.h"
  36 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  37 #include "llvm/IR/BasicBlock.h"
  38 #include "llvm/IR/Constants.h"
  39 #include "llvm/IR/DataLayout.h"
  40 #include "llvm/IR/DebugLoc.h"
  41 #include "llvm/IR/DerivedTypes.h"
  42 #include "llvm/IR/DiagnosticInfo.h"
  43 #include "llvm/IR/Dominators.h"
  44 #include "llvm/IR/Function.h"
  45 #include "llvm/IR/InstrTypes.h"
  46 #include "llvm/IR/Instruction.h"
  47 #include "llvm/IR/Instructions.h"
  48 #include "llvm/IR/IntrinsicInst.h"
  49 #include "llvm/IR/Operator.h"
  50 #include "llvm/IR/Type.h"
  51 #include "llvm/IR/Use.h"
  52 #include "llvm/IR/Value.h"
  53 #include "llvm/Support/Casting.h"
  54 #include "llvm/Support/CommandLine.h"
  55 #include "llvm/Support/Compiler.h"
  56 #include "llvm/Support/Debug.h"
  57 #include "llvm/Support/ErrorHandling.h"
  58 #include "llvm/Support/raw_ostream.h"
  59 #include <cassert>
  60 #include <string>
  61 #include <tuple>
  62 #include <vector>
  63
  64 using namespace llvm;
  65 using namespace polly;
  66
  67 #define DEBUG_TYPE "polly-scops"
  68
  69 STATISTIC(ScopFound, "Number of valid Scops");
  70 STATISTIC(RichScopFound, "Number of Scops containing a loop");
  71 STATISTIC(InfeasibleScops,
  72           "Number of SCoPs with statically infeasible context.");
  73
  74 bool polly::ModelReadOnlyScalars;
  75
  76 static cl::opt<bool, true> XModelReadOnlyScalars(
  77     "polly-analyze-read-only-scalars",
  78     cl::desc("Model read-only scalar values in the scop description"),
  79     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
  80     cl::init(true), cl::cat(PollyCategory));
  81
  82 static cl::opt<bool> UnprofitableScalarAccs(
  83     "polly-unprofitable-scalar-accs",
  84     cl::desc("Count statements with scalar accesses as not optimizable"),
  85     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  86
  87 static cl::opt<bool> DetectFortranArrays(
  88     "polly-detect-fortran-arrays",
  89     cl::desc("Detect Fortran arrays and use this for code generation"),
  90     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  91
  92 static cl::opt<bool> DetectReductions("polly-detect-reductions",
  93                                       cl::desc("Detect and exploit reductions"),
  94                                       cl::Hidden, cl::ZeroOrMore,
  95                                       cl::init(true), cl::cat(PollyCategory));
  96
  97 // Multiplicative reductions can be disabled separately as these kind of
  98 // operations can overflow easily. Additive reductions and bit operations
  99 // are in contrast pretty stable.
 100 static cl::opt<bool> DisableMultiplicativeReductions(
 101     "polly-disable-multiplicative-reductions",
 102     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
 103     cl::init(false), cl::cat(PollyCategory));
 104
 105 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
 106                                    Region *NonAffineSubRegion,
 107                                    bool IsExitBlock) {
 108   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
 109   // true, are not modeled as ordinary PHI nodes as they are not part of the
 110   // region. However, we model the operands in the predecessor blocks that are
 111   // part of the region as regular scalar accesses.
 112
 113   // If we can synthesize a PHI we can skip it, however only if it is in
 114   // the region. If it is not it can only be in the exit block of the region.
 115   // In this case we model the operands but not the PHI itself.
 116   auto *Scope = LI.getLoopFor(PHI->getParent());
 117   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
 118     return;
 119
 120   // PHI nodes are modeled as if they had been demoted prior to the SCoP
 121   // detection. Hence, the PHI is a load of a new memory location in which the
 122   // incoming value was written at the end of the incoming basic block.
 123   bool OnlyNonAffineSubRegionOperands = true;
 124   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
 125     Value *Op = PHI->getIncomingValue(u);
 126     BasicBlock *OpBB = PHI->getIncomingBlock(u);
 127     ScopStmt *OpStmt = scop->getLastStmtFor(OpBB);
 128
 129     // Do not build PHI dependences inside a non-affine subregion, but make
 130     // sure that the necessary scalar values are still made available.
 131     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
 132       auto *OpInst = dyn_cast<Instruction>(Op);
 133       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
 134         ensureValueRead(Op, OpStmt);
 135       continue;
 136     }
 137
 138     OnlyNonAffineSubRegionOperands = false;
 139     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
 140   }
 141
 142   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
 143     addPHIReadAccess(PHIStmt, PHI);
 144   }
 145 }
 146
 147 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
 148                                          Instruction *Inst) {
 149   assert(!isa<PHINode>(Inst));
 150
 151   // Pull-in required operands.
 152   for (Use &Op : Inst->operands())
 153     ensureValueRead(Op.get(), UserStmt);
 154 }
 155
 156 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
 157   // Check for uses of this instruction outside the scop. Because we do not
 158   // iterate over such instructions and therefore did not "ensure" the existence
 159   // of a write, we must determine such use here.
 160   if (scop->isEscaping(Inst))
 161     ensureValueWrite(Inst);
 162 }
 163
 164 /// Check that a value is a Fortran Array descriptor.
 165 ///
 166 /// We check if V has the following structure:
 167 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
 168 ///                                   [<num> x %struct.descriptor_dimension] }
 169 ///
 170 ///
 171 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
 172 ///
 173 /// 1. V's type name starts with "struct.array"
 174 /// 2. V's type has layout as shown.
 175 /// 3. Final member of V's type has name "struct.descriptor_dimension",
 176 /// 4. "struct.descriptor_dimension" has layout as shown.
 177 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
 178 ///
 179 /// We are interested in such types since this is the code that dragonegg
 180 /// generates for Fortran array descriptors.
 181 ///
 182 /// @param V the Value to be checked.
 183 ///
 184 /// @returns True if V is a Fortran array descriptor, False otherwise.
 185 bool isFortranArrayDescriptor(Value *V) {
 186   PointerType *PTy = dyn_cast<PointerType>(V->getType());
 187
 188   if (!PTy)
 189     return false;
 190
 191   Type *Ty = PTy->getElementType();
 192   assert(Ty && "Ty expected to be initialized");
 193   auto *StructArrTy = dyn_cast<StructType>(Ty);
 194
 195   if (!(StructArrTy && StructArrTy->hasName()))
 196     return false;
 197
 198   if (!StructArrTy->getName().startswith("struct.array"))
 199     return false;
 200
 201   if (StructArrTy->getNumElements() != 4)
 202     return false;
 203
 204   const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
 205
 206   // i8* match
 207   if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
 208     return false;
 209
 210   // Get a reference to the int type and check that all the members
 211   // share the same int type
 212   Type *IntTy = ArrMemberTys[1];
 213   if (ArrMemberTys[2] != IntTy)
 214     return false;
 215
 216   // type: [<num> x %struct.descriptor_dimension]
 217   ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
 218   if (!DescriptorDimArrayTy)
 219     return false;
 220
 221   // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
 222   StructType *DescriptorDimTy =
 223       dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
 224
 225   if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
 226     return false;
 227
 228   if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
 229     return false;
 230
 231   if (DescriptorDimTy->getNumElements() != 3)
 232     return false;
 233
 234   for (auto MemberTy : DescriptorDimTy->elements()) {
 235     if (MemberTy != IntTy)
 236       return false;
 237   }
 238
 239   return true;
 240 }
 241
 242 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
 243   // match: 4.1 & 4.2 store/load
 244   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
 245     return nullptr;
 246
 247   // match: 4
 248   if (Inst.getAlignment() != 8)
 249     return nullptr;
 250
 251   Value *Address = Inst.getPointerOperand();
 252
 253   const BitCastInst *Bitcast = nullptr;
 254   // [match: 3]
 255   if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
 256     Value *TypedMem = Slot->getPointerOperand();
 257     // match: 2
 258     Bitcast = dyn_cast<BitCastInst>(TypedMem);
 259   } else {
 260     // match: 2
 261     Bitcast = dyn_cast<BitCastInst>(Address);
 262   }
 263
 264   if (!Bitcast)
 265     return nullptr;
 266
 267   auto *MallocMem = Bitcast->getOperand(0);
 268
 269   // match: 1
 270   auto *MallocCall = dyn_cast<CallInst>(MallocMem);
 271   if (!MallocCall)
 272     return nullptr;
 273
 274   Function *MallocFn = MallocCall->getCalledFunction();
 275   if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
 276     return nullptr;
 277
 278   // Find all uses the malloc'd memory.
 279   // We are looking for a "store" into a struct with the type being the Fortran
 280   // descriptor type
 281   for (auto user : MallocMem->users()) {
 282     /// match: 5
 283     auto *MallocStore = dyn_cast<StoreInst>(user);
 284     if (!MallocStore)
 285       continue;
 286
 287     auto *DescriptorGEP =
 288         dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
 289     if (!DescriptorGEP)
 290       continue;
 291
 292     // match: 5
 293     auto DescriptorType =
 294         dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
 295     if (!(DescriptorType && DescriptorType->hasName()))
 296       continue;
 297
 298     Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
 299
 300     if (!Descriptor)
 301       continue;
 302
 303     if (!isFortranArrayDescriptor(Descriptor))
 304       continue;
 305
 306     return Descriptor;
 307   }
 308
 309   return nullptr;
 310 }
 311
 312 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
 313   // match: 3
 314   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
 315     return nullptr;
 316
 317   Value *Slot = Inst.getPointerOperand();
 318
 319   LoadInst *MemLoad = nullptr;
 320   // [match: 2]
 321   if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
 322     // match: 1
 323     MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
 324   } else {
 325     // match: 1
 326     MemLoad = dyn_cast<LoadInst>(Slot);
 327   }
 328
 329   if (!MemLoad)
 330     return nullptr;
 331
 332   auto *BitcastOperator =
 333       dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
 334   if (!BitcastOperator)
 335     return nullptr;
 336
 337   Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
 338   if (!Descriptor)
 339     return nullptr;
 340
 341   if (!isFortranArrayDescriptor(Descriptor))
 342     return nullptr;
 343
 344   return Descriptor;
 345 }
 346
 347 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
 348   Value *Val = Inst.getValueOperand();
 349   Type *ElementType = Val->getType();
 350   Value *Address = Inst.getPointerOperand();
 351   const SCEV *AccessFunction =
 352       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
 353   const SCEVUnknown *BasePointer =
 354       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
 355   enum MemoryAccess::AccessType AccType =
 356       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
 357
 358   if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
 359     auto *Src = BitCast->getOperand(0);
 360     auto *SrcTy = Src->getType();
 361     auto *DstTy = BitCast->getType();
 362     // Do not try to delinearize non-sized (opaque) pointers.
 363     if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
 364         (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
 365       return false;
 366     }
 367     if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
 368         DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
 369             DL.getTypeAllocSize(DstTy->getPointerElementType()))
 370       Address = Src;
 371   }
 372
 373   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
 374   if (!GEP)
 375     return false;
 376
 377   std::vector<const SCEV *> Subscripts;
 378   std::vector<int> Sizes;
 379   std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
 380   auto *BasePtr = GEP->getOperand(0);
 381
 382   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
 383     BasePtr = BasePtrCast->getOperand(0);
 384
 385   // Check for identical base pointers to ensure that we do not miss index
 386   // offsets that have been added before this GEP is applied.
 387   if (BasePtr != BasePointer->getValue())
 388     return false;
 389
 390   std::vector<const SCEV *> SizesSCEV;
 391
 392   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
 393
 394   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
 395   for (auto *Subscript : Subscripts) {
 396     InvariantLoadsSetTy AccessILS;
 397     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
 398                       &AccessILS))
 399       return false;
 400
 401     for (LoadInst *LInst : AccessILS)
 402       if (!ScopRIL.count(LInst))
 403         return false;
 404   }
 405
 406   if (Sizes.empty())
 407     return false;
 408
 409   SizesSCEV.push_back(nullptr);
 410
 411   for (auto V : Sizes)
 412     SizesSCEV.push_back(SE.getSCEV(
 413         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
 414
 415   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
 416                  true, Subscripts, SizesSCEV, Val);
 417   return true;
 418 }
 419
 420 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
 421   if (!PollyDelinearize)
 422     return false;
 423
 424   Value *Address = Inst.getPointerOperand();
 425   Value *Val = Inst.getValueOperand();
 426   Type *ElementType = Val->getType();
 427   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
 428   enum MemoryAccess::AccessType AccType =
 429       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
 430
 431   const SCEV *AccessFunction =
 432       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
 433   const SCEVUnknown *BasePointer =
 434       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
 435
 436   assert(BasePointer && "Could not find base pointer");
 437
 438   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
 439   auto AccItr = InsnToMemAcc.find(Inst);
 440   if (AccItr == InsnToMemAcc.end())
 441     return false;
 442
 443   std::vector<const SCEV *> Sizes = {nullptr};
 444
 445   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
 446                AccItr->second.Shape->DelinearizedSizes.end());
 447
 448   // In case only the element size is contained in the 'Sizes' array, the
 449   // access does not access a real multi-dimensional array. Hence, we allow
 450   // the normal single-dimensional access construction to handle this.
 451   if (Sizes.size() == 1)
 452     return false;
 453
 454   // Remove the element size. This information is already provided by the
 455   // ElementSize parameter. In case the element size of this access and the
 456   // element size used for delinearization differs the delinearization is
 457   // incorrect. Hence, we invalidate the scop.
 458   //
 459   // TODO: Handle delinearization with differing element sizes.
 460   auto DelinearizedSize =
 461       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
 462   Sizes.pop_back();
 463   if (ElementSize != DelinearizedSize)
 464     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
 465
 466   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
 467                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
 468   return true;
 469 }
 470
 471 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
 472   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
 473
 474   if (MemIntr == nullptr)
 475     return false;
 476
 477   auto *L = LI.getLoopFor(Inst->getParent());
 478   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
 479   assert(LengthVal);
 480
 481   // Check if the length val is actually affine or if we overapproximate it
 482   InvariantLoadsSetTy AccessILS;
 483   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
 484
 485   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
 486   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
 487                                      LengthVal, SE, &AccessILS);
 488   for (LoadInst *LInst : AccessILS)
 489     if (!ScopRIL.count(LInst))
 490       LengthIsAffine = false;
 491   if (!LengthIsAffine)
 492     LengthVal = nullptr;
 493
 494   auto *DestPtrVal = MemIntr->getDest();
 495   assert(DestPtrVal);
 496
 497   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
 498   assert(DestAccFunc);
 499   // Ignore accesses to "NULL".
 500   // TODO: We could use this to optimize the region further, e.g., intersect
 501   //       the context with
 502   //          isl_set_complement(isl_set_params(getDomain()))
 503   //       as we know it would be undefined to execute this instruction anyway.
 504   if (DestAccFunc->isZero())
 505     return true;
 506
 507   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
 508   assert(DestPtrSCEV);
 509   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
 510   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
 511                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
 512                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
 513                  Inst.getValueOperand());
 514
 515   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
 516   if (!MemTrans)
 517     return true;
 518
 519   auto *SrcPtrVal = MemTrans->getSource();
 520   assert(SrcPtrVal);
 521
 522   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
 523   assert(SrcAccFunc);
 524   // Ignore accesses to "NULL".
 525   // TODO: See above TODO
 526   if (SrcAccFunc->isZero())
 527     return true;
 528
 529   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
 530   assert(SrcPtrSCEV);
 531   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
 532   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
 533                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
 534                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
 535                  Inst.getValueOperand());
 536
 537   return true;
 538 }
 539
 540 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
 541   auto *CI = dyn_cast_or_null<CallInst>(Inst);
 542
 543   if (CI == nullptr)
 544     return false;
 545
 546   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI))
 547     return true;
 548
 549   bool ReadOnly = false;
 550   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
 551   auto *CalledFunction = CI->getCalledFunction();
 552   switch (AA.getModRefBehavior(CalledFunction)) {
 553   case FMRB_UnknownModRefBehavior:
 554     llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
 555   case FMRB_DoesNotAccessMemory:
 556     return true;
 557   case FMRB_DoesNotReadMemory:
 558   case FMRB_OnlyAccessesInaccessibleMem:
 559   case FMRB_OnlyAccessesInaccessibleOrArgMem:
 560     return false;
 561   case FMRB_OnlyReadsMemory:
 562     GlobalReads.emplace_back(Stmt, CI);
 563     return true;
 564   case FMRB_OnlyReadsArgumentPointees:
 565     ReadOnly = true;
 566   // Fall through
 567   case FMRB_OnlyAccessesArgumentPointees: {
 568     auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
 569     Loop *L = LI.getLoopFor(Inst->getParent());
 570     for (const auto &Arg : CI->arg_operands()) {
 571       if (!Arg->getType()->isPointerTy())
 572         continue;
 573
 574       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
 575       if (ArgSCEV->isZero())
 576         continue;
 577
 578       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
 579       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
 580                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
 581     }
 582     return true;
 583   }
 584   }
 585
 586   return true;
 587 }
 588
 589 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
 590   Value *Address = Inst.getPointerOperand();
 591   Value *Val = Inst.getValueOperand();
 592   Type *ElementType = Val->getType();
 593   enum MemoryAccess::AccessType AccType =
 594       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
 595
 596   const SCEV *AccessFunction =
 597       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
 598   const SCEVUnknown *BasePointer =
 599       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
 600
 601   assert(BasePointer && "Could not find base pointer");
 602   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
 603
 604   // Check if the access depends on a loop contained in a non-affine subregion.
 605   bool isVariantInNonAffineLoop = false;
 606   SetVector<const Loop *> Loops;
 607   findLoops(AccessFunction, Loops);
 608   for (const Loop *L : Loops)
 609     if (Stmt->contains(L)) {
 610       isVariantInNonAffineLoop = true;
 611       break;
 612     }
 613
 614   InvariantLoadsSetTy AccessILS;
 615
 616   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
 617   bool IsAffine = !isVariantInNonAffineLoop &&
 618                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
 619                                AccessFunction, SE, &AccessILS);
 620
 621   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
 622   for (LoadInst *LInst : AccessILS)
 623     if (!ScopRIL.count(LInst))
 624       IsAffine = false;
 625
 626   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
 627     AccType = MemoryAccess::MAY_WRITE;
 628
 629   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
 630                  IsAffine, {AccessFunction}, {nullptr}, Val);
 631 }
 632
 633 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
 634   if (buildAccessMemIntrinsic(Inst, Stmt))
 635     return;
 636
 637   if (buildAccessCallInst(Inst, Stmt))
 638     return;
 639
 640   if (buildAccessMultiDimFixed(Inst, Stmt))
 641     return;
 642
 643   if (buildAccessMultiDimParam(Inst, Stmt))
 644     return;
 645
 646   buildAccessSingleDim(Inst, Stmt);
 647 }
 648
 649 void ScopBuilder::buildAccessFunctions() {
 650   for (auto &Stmt : *scop) {
 651     if (Stmt.isBlockStmt()) {
 652       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
 653       continue;
 654     }
 655
 656     Region *R = Stmt.getRegion();
 657     for (BasicBlock *BB : R->blocks())
 658       buildAccessFunctions(&Stmt, *BB, R);
 659   }
 660
 661   // Build write accesses for values that are used after the SCoP.
 662   // The instructions defining them might be synthesizable and therefore not
 663   // contained in any statement, hence we iterate over the original instructions
 664   // to identify all escaping values.
 665   for (BasicBlock *BB : scop->getRegion().blocks()) {
 666     for (Instruction &Inst : *BB)
 667       buildEscapingDependences(&Inst);
 668   }
 669 }
 670
 671 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
 672   return !isa<TerminatorInst>(Inst) && !isIgnoredIntrinsic(Inst) &&
 673          !canSynthesize(Inst, *scop, &SE, L);
 674 }
 675
 676 void ScopBuilder::buildStmts(Region &SR) {
 677   if (scop->isNonAffineSubRegion(&SR)) {
 678     std::vector<Instruction *> Instructions;
 679     Loop *SurroundingLoop =
 680         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
 681     for (Instruction &Inst : *SR.getEntry())
 682       if (shouldModelInst(&Inst, SurroundingLoop))
 683         Instructions.push_back(&Inst);
 684     scop->addScopStmt(&SR, SurroundingLoop, Instructions);
 685     return;
 686   }
 687
 688   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
 689     if (I->isSubRegion())
 690       buildStmts(*I->getNodeAs<Region>());
 691     else {
 692       int Count = 0;
 693       std::vector<Instruction *> Instructions;
 694       for (Instruction &Inst : *I->getNodeAs<BasicBlock>()) {
 695         Loop *L = LI.getLoopFor(Inst.getParent());
 696         if (shouldModelInst(&Inst, L))
 697           Instructions.push_back(&Inst);
 698         if (Inst.getMetadata("polly_split_after")) {
 699           Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs<BasicBlock>());
 700           scop->addScopStmt(I->getNodeAs<BasicBlock>(), SurroundingLoop,
 701                             Instructions, Count);
 702           Count++;
 703           Instructions.clear();
 704         }
 705       }
 706       Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs<BasicBlock>());
 707       scop->addScopStmt(I->getNodeAs<BasicBlock>(), SurroundingLoop,
 708                         Instructions, Count);
 709     }
 710 }
 711
 712 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
 713                                        Region *NonAffineSubRegion) {
 714   assert(
 715       Stmt &&
 716       "The exit BB is the only one that cannot be represented by a statement");
 717   assert(Stmt->represents(&BB));
 718
 719   // We do not build access functions for error blocks, as they may contain
 720   // instructions we can not model.
 721   if (isErrorBlock(BB, scop->getRegion(), LI, DT))
 722     return;
 723
 724   int Count = 0;
 725   bool Split = false;
 726   for (Instruction &Inst : BB) {
 727     if (Split) {
 728       Split = false;
 729       Count++;
 730     }
 731     if (Inst.getMetadata("polly_split_after"))
 732       Split = true;
 733
 734     if (Stmt && Stmt->isBlockStmt() && Stmt != scop->getStmtListFor(&BB)[Count])
 735       continue;
 736
 737     PHINode *PHI = dyn_cast<PHINode>(&Inst);
 738     if (PHI)
 739       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
 740
 741     if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
 742       assert(Stmt && "Cannot build access function in non-existing statement");
 743       buildMemoryAccess(MemInst, Stmt);
 744     }
 745
 746     if (isIgnoredIntrinsic(&Inst))
 747       continue;
 748
 749     // PHI nodes have already been modeled above and TerminatorInsts that are
 750     // not part of a non-affine subregion are fully modeled and regenerated
 751     // from the polyhedral domains. Hence, they do not need to be modeled as
 752     // explicit data dependences.
 753     if (!PHI && (!isa<TerminatorInst>(&Inst) || NonAffineSubRegion))
 754       buildScalarDependences(Stmt, &Inst);
 755   }
 756 }
 757
 758 MemoryAccess *ScopBuilder::addMemoryAccess(
 759     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
 760     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
 761     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
 762     MemoryKind Kind) {
 763   bool isKnownMustAccess = false;
 764
 765   // Accesses in single-basic block statements are always executed.
 766   if (Stmt->isBlockStmt())
 767     isKnownMustAccess = true;
 768
 769   if (Stmt->isRegionStmt()) {
 770     // Accesses that dominate the exit block of a non-affine region are always
 771     // executed. In non-affine regions there may exist MemoryKind::Values that
 772     // do not dominate the exit. MemoryKind::Values will always dominate the
 773     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
 774     // non-affine region.
 775     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
 776       isKnownMustAccess = true;
 777   }
 778
 779   // Non-affine PHI writes do not "happen" at a particular instruction, but
 780   // after exiting the statement. Therefore they are guaranteed to execute and
 781   // overwrite the old value.
 782   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
 783     isKnownMustAccess = true;
 784
 785   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
 786     AccType = MemoryAccess::MAY_WRITE;
 787
 788   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
 789                                   Affine, Subscripts, Sizes, AccessValue, Kind);
 790
 791   scop->addAccessFunction(Access);
 792   Stmt->addAccess(Access);
 793   return Access;
 794 }
 795
 796 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
 797                                  MemoryAccess::AccessType AccType,
 798                                  Value *BaseAddress, Type *ElementType,
 799                                  bool IsAffine,
 800                                  ArrayRef<const SCEV *> Subscripts,
 801                                  ArrayRef<const SCEV *> Sizes,
 802                                  Value *AccessValue) {
 803   ArrayBasePointers.insert(BaseAddress);
 804   auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
 805                                     ElementType, IsAffine, AccessValue,
 806                                     Subscripts, Sizes, MemoryKind::Array);
 807
 808   if (!DetectFortranArrays)
 809     return;
 810
 811   if (Value *FAD = findFADAllocationInvisible(MemAccInst))
 812     MemAccess->setFortranArrayDescriptor(FAD);
 813   else if (Value *FAD = findFADAllocationVisible(MemAccInst))
 814     MemAccess->setFortranArrayDescriptor(FAD);
 815 }
 816
 817 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
 818   // Find the statement that defines the value of Inst. That statement has to
 819   // write the value to make it available to those statements that read it.
 820   ScopStmt *Stmt = scop->getStmtFor(Inst);
 821
 822   // It is possible that the value is synthesizable within a loop (such that it
 823   // is not part of any statement), but not after the loop (where you need the
 824   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
 825   // avoid this. In case the IR has no such PHI, use the last statement (where
 826   // the value is synthesizable) to write the value.
 827   if (!Stmt)
 828     Stmt = scop->getLastStmtFor(Inst->getParent());
 829
 830   // Inst not defined within this SCoP.
 831   if (!Stmt)
 832     return;
 833
 834   // Do not process further if the instruction is already written.
 835   if (Stmt->lookupValueWriteOf(Inst))
 836     return;
 837
 838   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
 839                   true, Inst, ArrayRef<const SCEV *>(),
 840                   ArrayRef<const SCEV *>(), MemoryKind::Value);
 841 }
 842
 843 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
 844   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
 845   // to be able to replace this one. Currently, there is a split responsibility.
 846   // In a first step, the MemoryAccess is created, but without the
 847   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
 848   // AccessRelation is created. At least for scalar accesses, there is no new
 849   // information available at ScopStmt::buildAccessRelations(), so we could
 850   // create the AccessRelation right away. This is what
 851   // ScopStmt::ensureValueRead(Value*) does.
 852
 853   auto *Scope = UserStmt->getSurroundingLoop();
 854   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
 855   switch (VUse.getKind()) {
 856   case VirtualUse::Constant:
 857   case VirtualUse::Block:
 858   case VirtualUse::Synthesizable:
 859   case VirtualUse::Hoisted:
 860   case VirtualUse::Intra:
 861     // Uses of these kinds do not need a MemoryAccess.
 862     break;
 863
 864   case VirtualUse::ReadOnly:
 865     // Add MemoryAccess for invariant values only if requested.
 866     if (!ModelReadOnlyScalars)
 867       break;
 868
 869     LLVM_FALLTHROUGH;
 870   case VirtualUse::Inter:
 871
 872     // Do not create another MemoryAccess for reloading the value if one already
 873     // exists.
 874     if (UserStmt->lookupValueReadOf(V))
 875       break;
 876
 877     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
 878                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
 879                     MemoryKind::Value);
 880
 881     // Inter-statement uses need to write the value in their defining statement.
 882     if (VUse.isInter())
 883       ensureValueWrite(cast<Instruction>(V));
 884     break;
 885   }
 886 }
 887
 888 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
 889                                  BasicBlock *IncomingBlock,
 890                                  Value *IncomingValue, bool IsExitBlock) {
 891   // As the incoming block might turn out to be an error statement ensure we
 892   // will create an exit PHI SAI object. It is needed during code generation
 893   // and would be created later anyway.
 894   if (IsExitBlock)
 895     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
 896                                    MemoryKind::ExitPHI);
 897
 898   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
 899   // from outside the SCoP's region have no statement representation.
 900   if (!IncomingStmt)
 901     return;
 902
 903   // Take care for the incoming value being available in the incoming block.
 904   // This must be done before the check for multiple PHI writes because multiple
 905   // exiting edges from subregion each can be the effective written value of the
 906   // subregion. As such, all of them must be made available in the subregion
 907   // statement.
 908   ensureValueRead(IncomingValue, IncomingStmt);
 909
 910   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
 911   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
 912     assert(Acc->getAccessInstruction() == PHI);
 913     Acc->addIncoming(IncomingBlock, IncomingValue);
 914     return;
 915   }
 916
 917   MemoryAccess *Acc = addMemoryAccess(
 918       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
 919       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
 920       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
 921   assert(Acc);
 922   Acc->addIncoming(IncomingBlock, IncomingValue);
 923 }
 924
 925 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
 926   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
 927                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
 928                   MemoryKind::PHI);
 929 }
 930
 931 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
 932   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
 933
 934   Stmt.Domain = scop->getDomainConditions(&Stmt);
 935   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
 936 }
 937
 938 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
 939   isl::set Domain = Stmt.getDomain();
 940   for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
 941     isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
 942     Stmt.NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
 943   }
 944 }
 945
 946 /// Return the reduction type for a given binary operator.
 947 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
 948                                                     const Instruction *Load) {
 949   if (!BinOp)
 950     return MemoryAccess::RT_NONE;
 951   switch (BinOp->getOpcode()) {
 952   case Instruction::FAdd:
 953     if (!BinOp->hasUnsafeAlgebra())
 954       return MemoryAccess::RT_NONE;
 955     // Fall through
 956   case Instruction::Add:
 957     return MemoryAccess::RT_ADD;
 958   case Instruction::Or:
 959     return MemoryAccess::RT_BOR;
 960   case Instruction::Xor:
 961     return MemoryAccess::RT_BXOR;
 962   case Instruction::And:
 963     return MemoryAccess::RT_BAND;
 964   case Instruction::FMul:
 965     if (!BinOp->hasUnsafeAlgebra())
 966       return MemoryAccess::RT_NONE;
 967     // Fall through
 968   case Instruction::Mul:
 969     if (DisableMultiplicativeReductions)
 970       return MemoryAccess::RT_NONE;
 971     return MemoryAccess::RT_MUL;
 972   default:
 973     return MemoryAccess::RT_NONE;
 974   }
 975 }
 976
 977 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
 978   SmallVector<MemoryAccess *, 2> Loads;
 979   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
 980
 981   // First collect candidate load-store reduction chains by iterating over all
 982   // stores and collecting possible reduction loads.
 983   for (MemoryAccess *StoreMA : Stmt) {
 984     if (StoreMA->isRead())
 985       continue;
 986
 987     Loads.clear();
 988     collectCandidateReductionLoads(StoreMA, Loads);
 989     for (MemoryAccess *LoadMA : Loads)
 990       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
 991   }
 992
 993   // Then check each possible candidate pair.
 994   for (const auto &CandidatePair : Candidates) {
 995     bool Valid = true;
 996     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
 997     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
 998
 999     // Skip those with obviously unequal base addresses.
1000     if (!LoadAccs.has_equal_space(StoreAccs)) {
1001       continue;
1002     }
1003
1004     // And check if the remaining for overlap with other memory accesses.
1005     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1006     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1007     isl::set AllAccs = AllAccsRel.range();
1008
1009     for (MemoryAccess *MA : Stmt) {
1010       if (MA == CandidatePair.first || MA == CandidatePair.second)
1011         continue;
1012
1013       isl::map AccRel =
1014           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1015       isl::set Accs = AccRel.range();
1016
1017       if (AllAccs.has_equal_space(Accs)) {
1018         isl::set OverlapAccs = Accs.intersect(AllAccs);
1019         Valid = Valid && OverlapAccs.is_empty();
1020       }
1021     }
1022
1023     if (!Valid)
1024       continue;
1025
1026     const LoadInst *Load =
1027         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1028     MemoryAccess::ReductionType RT =
1029         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1030
1031     // If no overlapping access was found we mark the load and store as
1032     // reduction like.
1033     CandidatePair.first->markAsReductionLike(RT);
1034     CandidatePair.second->markAsReductionLike(RT);
1035   }
1036 }
1037
1038 void ScopBuilder::collectCandidateReductionLoads(
1039     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1040   ScopStmt *Stmt = StoreMA->getStatement();
1041
1042   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1043   if (!Store)
1044     return;
1045
1046   // Skip if there is not one binary operator between the load and the store
1047   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1048   if (!BinOp)
1049     return;
1050
1051   // Skip if the binary operators has multiple uses
1052   if (BinOp->getNumUses() != 1)
1053     return;
1054
1055   // Skip if the opcode of the binary operator is not commutative/associative
1056   if (!BinOp->isCommutative() || !BinOp->isAssociative())
1057     return;
1058
1059   // Skip if the binary operator is outside the current SCoP
1060   if (BinOp->getParent() != Store->getParent())
1061     return;
1062
1063   // Skip if it is a multiplicative reduction and we disabled them
1064   if (DisableMultiplicativeReductions &&
1065       (BinOp->getOpcode() == Instruction::Mul ||
1066        BinOp->getOpcode() == Instruction::FMul))
1067     return;
1068
1069   // Check the binary operator operands for a candidate load
1070   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1071   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1072   if (!PossibleLoad0 && !PossibleLoad1)
1073     return;
1074
1075   // A load is only a candidate if it cannot escape (thus has only this use)
1076   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1077     if (PossibleLoad0->getParent() == Store->getParent())
1078       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1079   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1080     if (PossibleLoad1->getParent() == Store->getParent())
1081       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1082 }
1083
1084 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1085   for (MemoryAccess *Access : Stmt.MemAccs) {
1086     Type *ElementType = Access->getElementType();
1087
1088     MemoryKind Ty;
1089     if (Access->isPHIKind())
1090       Ty = MemoryKind::PHI;
1091     else if (Access->isExitPHIKind())
1092       Ty = MemoryKind::ExitPHI;
1093     else if (Access->isValueKind())
1094       Ty = MemoryKind::Value;
1095     else
1096       Ty = MemoryKind::Array;
1097
1098     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1099                                                ElementType, Access->Sizes, Ty);
1100     Access->buildAccessRelation(SAI);
1101     scop->addAccessData(Access);
1102   }
1103 }
1104
1105 #ifndef NDEBUG
1106 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1107   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1108   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1109   assert(PhysUse.getKind() == VirtUse.getKind());
1110 }
1111
1112 /// Check the consistency of every statement's MemoryAccesses.
1113 ///
1114 /// The check is carried out by expecting the "physical" kind of use (derived
1115 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
1116 /// kind of use (derived from a statement's MemoryAccess).
1117 ///
1118 /// The "physical" uses are taken by ensureValueRead to determine whether to
1119 /// create MemoryAccesses. When done, the kind of scalar access should be the
1120 /// same no matter which way it was derived.
1121 ///
1122 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1123 /// can intentionally influence on the kind of uses (not corresponding to the
1124 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1125 /// to pick up the virtual uses. But here in the code generator, this has not
1126 /// happened yet, such that virtual and physical uses are equivalent.
1127 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1128   for (auto *BB : S->getRegion().blocks()) {
1129     for (auto &Inst : *BB) {
1130       auto *Stmt = S->getStmtFor(&Inst);
1131       if (!Stmt)
1132         continue;
1133
1134       if (isIgnoredIntrinsic(&Inst))
1135         continue;
1136
1137       // Branch conditions are encoded in the statement domains.
1138       if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt())
1139         continue;
1140
1141       // Verify all uses.
1142       for (auto &Op : Inst.operands())
1143         verifyUse(S, Op, LI);
1144
1145       // Stores do not produce values used by other statements.
1146       if (isa<StoreInst>(Inst))
1147         continue;
1148
1149       // For every value defined in the block, also check that a use of that
1150       // value in the same statement would not be an inter-statement use. It can
1151       // still be synthesizable or load-hoisted, but these kind of instructions
1152       // are not directly copied in code-generation.
1153       auto VirtDef =
1154           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1155       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1156              VirtDef.getKind() == VirtualUse::Intra ||
1157              VirtDef.getKind() == VirtualUse::Hoisted);
1158     }
1159   }
1160
1161   if (S->hasSingleExitEdge())
1162     return;
1163
1164   // PHINodes in the SCoP region's exit block are also uses to be checked.
1165   if (!S->getRegion().isTopLevelRegion()) {
1166     for (auto &Inst : *S->getRegion().getExit()) {
1167       if (!isa<PHINode>(Inst))
1168         break;
1169
1170       for (auto &Op : Inst.operands())
1171         verifyUse(S, Op, LI);
1172     }
1173   }
1174 }
1175 #endif
1176
1177 /// Return the block that is the representing block for @p RN.
1178 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1179   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1180                            : RN->getNodeAs<BasicBlock>();
1181 }
1182
1183 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1184                             OptimizationRemarkEmitter &ORE) {
1185   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1186
1187   buildStmts(R);
1188   buildAccessFunctions();
1189
1190   // In case the region does not have an exiting block we will later (during
1191   // code generation) split the exit block. This will move potential PHI nodes
1192   // from the current exit block into the new region exiting block. Hence, PHI
1193   // nodes that are at this point not part of the region will be.
1194   // To handle these PHI nodes later we will now model their operands as scalar
1195   // accesses. Note that we do not model anything in the exit block if we have
1196   // an exiting block in the region, as there will not be any splitting later.
1197   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
1198     for (Instruction &Inst : *R.getExit()) {
1199       PHINode *PHI = dyn_cast<PHINode>(&Inst);
1200       if (!PHI)
1201         break;
1202
1203       buildPHIAccesses(nullptr, PHI, nullptr, true);
1204     }
1205   }
1206
1207   // Create memory accesses for global reads since all arrays are now known.
1208   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1209   for (auto GlobalReadPair : GlobalReads) {
1210     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1211     Instruction *GlobalRead = GlobalReadPair.second;
1212     for (auto *BP : ArrayBasePointers)
1213       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1214                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1215   }
1216
1217   scop->buildInvariantEquivalenceClasses();
1218
1219   /// A map from basic blocks to their invalid domains.
1220   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1221
1222   if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1223     DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1224     return;
1225   }
1226
1227   scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1228
1229   // Initialize the invalid domain.
1230   for (ScopStmt &Stmt : scop->Stmts)
1231     if (Stmt.isBlockStmt())
1232       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1233     else
1234       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1235           Stmt.getRegion()->getNode())]);
1236
1237   // Remove empty statements.
1238   // Exit early in case there are no executable statements left in this scop.
1239   scop->removeStmtNotInDomainMap();
1240   scop->simplifySCoP(false);
1241   if (scop->isEmpty()) {
1242     DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1243     return;
1244   }
1245
1246   // The ScopStmts now have enough information to initialize themselves.
1247   for (ScopStmt &Stmt : *scop) {
1248     buildDomain(Stmt);
1249     collectSurroundingLoops(Stmt);
1250     buildAccessRelations(Stmt);
1251
1252     if (DetectReductions)
1253       checkForReductions(Stmt);
1254   }
1255
1256   // Check early for a feasible runtime context.
1257   if (!scop->hasFeasibleRuntimeContext()) {
1258     DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1259     return;
1260   }
1261
1262   // Check early for profitability. Afterwards it cannot change anymore,
1263   // only the runtime context could become infeasible.
1264   if (!scop->isProfitable(UnprofitableScalarAccs)) {
1265     scop->invalidate(PROFITABLE, DebugLoc());
1266     DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1267     return;
1268   }
1269
1270   scop->buildSchedule(LI);
1271
1272   scop->finalizeAccesses();
1273
1274   scop->realignParams();
1275   scop->addUserContext();
1276
1277   // After the context was fully constructed, thus all our knowledge about
1278   // the parameters is in there, we add all recorded assumptions to the
1279   // assumed/invalid context.
1280   scop->addRecordedAssumptions();
1281
1282   scop->simplifyContexts();
1283   if (!scop->buildAliasChecks(AA)) {
1284     DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1285     return;
1286   }
1287
1288   scop->hoistInvariantLoads();
1289   scop->canonicalizeDynamicBasePtrs();
1290   scop->verifyInvariantLoads();
1291   scop->simplifySCoP(true);
1292
1293   // Check late for a feasible runtime context because profitability did not
1294   // change.
1295   if (!scop->hasFeasibleRuntimeContext()) {
1296     DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1297     return;
1298   }
1299
1300 #ifndef NDEBUG
1301   verifyUses(scop.get(), LI, DT);
1302 #endif
1303 }
1304
1305 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1306                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1307                          ScopDetection &SD, ScalarEvolution &SE,
1308                          OptimizationRemarkEmitter &ORE)
1309     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1310   DebugLoc Beg, End;
1311   auto P = getBBPairForRegion(R);
1312   getDebugLocations(P, Beg, End);
1313
1314   std::string Msg = "SCoP begins here.";
1315   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1316            << Msg);
1317
1318   buildScop(*R, AC, ORE);
1319
1320   DEBUG(dbgs() << *scop);
1321
1322   if (!scop->hasFeasibleRuntimeContext()) {
1323     InfeasibleScops++;
1324     Msg = "SCoP ends here but was dismissed.";
1325     DEBUG(dbgs() << "SCoP detected but dismissed\n");
1326     scop.reset();
1327   } else {
1328     Msg = "SCoP ends here.";
1329     ++ScopFound;
1330     if (scop->getMaxLoopDepth() > 0)
1331       ++RichScopFound;
1332   }
1333
1334   if (R->isTopLevelRegion())
1335     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1336              << Msg);
1337   else
1338     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1339              << Msg);
1340 }