lib/Analysis/ScopInfo.cpp

   1 //===- ScopInfo.cpp -------------------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Create a polyhedral description for a static control flow region.
  11 //
  12 // The pass creates a polyhedral description of the Scops detected by the Scop
  13 // detection derived from their LLVM-IR code.
  14 //
  15 // This representation is shared among several tools in the polyhedral
  16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
  17 //
  18 //===----------------------------------------------------------------------===//
  19
  20 #include "polly/ScopInfo.h"
  21 #include "polly/LinkAllPasses.h"
  22 #include "polly/Options.h"
  23 #include "polly/ScopBuilder.h"
  24 #include "polly/ScopDetection.h"
  25 #include "polly/Support/GICHelper.h"
  26 #include "polly/Support/ISLOStream.h"
  27 #include "polly/Support/SCEVAffinator.h"
  28 #include "polly/Support/SCEVValidator.h"
  29 #include "polly/Support/ScopHelper.h"
  30 #include "llvm/ADT/APInt.h"
  31 #include "llvm/ADT/ArrayRef.h"
  32 #include "llvm/ADT/DenseMap.h"
  33 #include "llvm/ADT/DenseSet.h"
  34 #include "llvm/ADT/PostOrderIterator.h"
  35 #include "llvm/ADT/STLExtras.h"
  36 #include "llvm/ADT/SetVector.h"
  37 #include "llvm/ADT/SmallPtrSet.h"
  38 #include "llvm/ADT/SmallSet.h"
  39 #include "llvm/ADT/SmallVector.h"
  40 #include "llvm/ADT/Statistic.h"
  41 #include "llvm/ADT/StringExtras.h"
  42 #include "llvm/ADT/StringMap.h"
  43 #include "llvm/Analysis/AliasAnalysis.h"
  44 #include "llvm/Analysis/AliasSetTracker.h"
  45 #include "llvm/Analysis/AssumptionCache.h"
  46 #include "llvm/Analysis/Loads.h"
  47 #include "llvm/Analysis/LoopInfo.h"
  48 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
  49 #include "llvm/Analysis/RegionInfo.h"
  50 #include "llvm/Analysis/RegionIterator.h"
  51 #include "llvm/Analysis/ScalarEvolution.h"
  52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  53 #include "llvm/IR/Argument.h"
  54 #include "llvm/IR/BasicBlock.h"
  55 #include "llvm/IR/CFG.h"
  56 #include "llvm/IR/ConstantRange.h"
  57 #include "llvm/IR/Constants.h"
  58 #include "llvm/IR/DataLayout.h"
  59 #include "llvm/IR/DebugLoc.h"
  60 #include "llvm/IR/DerivedTypes.h"
  61 #include "llvm/IR/DiagnosticInfo.h"
  62 #include "llvm/IR/Dominators.h"
  63 #include "llvm/IR/Function.h"
  64 #include "llvm/IR/InstrTypes.h"
  65 #include "llvm/IR/Instruction.h"
  66 #include "llvm/IR/Instructions.h"
  67 #include "llvm/IR/IntrinsicInst.h"
  68 #include "llvm/IR/Module.h"
  69 #include "llvm/IR/PassManager.h"
  70 #include "llvm/IR/Type.h"
  71 #include "llvm/IR/Use.h"
  72 #include "llvm/IR/User.h"
  73 #include "llvm/IR/Value.h"
  74 #include "llvm/Pass.h"
  75 #include "llvm/Support/Casting.h"
  76 #include "llvm/Support/CommandLine.h"
  77 #include "llvm/Support/Compiler.h"
  78 #include "llvm/Support/Debug.h"
  79 #include "llvm/Support/ErrorHandling.h"
  80 #include "llvm/Support/MathExtras.h"
  81 #include "llvm/Support/raw_ostream.h"
  82 #include "isl/aff.h"
  83 #include "isl/constraint.h"
  84 #include "isl/local_space.h"
  85 #include "isl/map.h"
  86 #include "isl/options.h"
  87 #include "isl/printer.h"
  88 #include "isl/schedule.h"
  89 #include "isl/schedule_node.h"
  90 #include "isl/set.h"
  91 #include "isl/union_map.h"
  92 #include "isl/union_set.h"
  93 #include "isl/val.h"
  94 #include <algorithm>
  95 #include <cassert>
  96 #include <cstdlib>
  97 #include <cstring>
  98 #include <deque>
  99 #include <iterator>
 100 #include <memory>
 101 #include <string>
 102 #include <tuple>
 103 #include <utility>
 104 #include <vector>
 105
 106 using namespace llvm;
 107 using namespace polly;
 108
 109 #define DEBUG_TYPE "polly-scops"
 110
 111 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
 112 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
 113 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
 114 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
 115 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
 116 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
 117 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
 118 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
 119 STATISTIC(AssumptionsInvariantLoad,
 120           "Number of invariant loads assumptions taken.");
 121 STATISTIC(AssumptionsDelinearization,
 122           "Number of delinearization assumptions taken.");
 123
 124 STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
 125 STATISTIC(NumLoopsInScop, "Number of loops in scops");
 126 STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
 127 STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
 128
 129 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
 130 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
 131 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
 132 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
 133 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
 134 STATISTIC(NumScopsDepthLarger,
 135           "Number of scops with maximal loop depth 6 and larger");
 136 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
 137
 138 STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
 139 STATISTIC(
 140     NumValueWritesInLoops,
 141     "Number of scalar value writes nested in affine loops after ScopInfo");
 142 STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
 143 STATISTIC(NumPHIWritesInLoops,
 144           "Number of scalar phi writes nested in affine loops after ScopInfo");
 145 STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
 146 STATISTIC(NumSingletonWritesInLoops,
 147           "Number of singleton writes nested in affine loops after ScopInfo");
 148
 149 // The maximal number of basic sets we allow during domain construction to
 150 // be created. More complex scops will result in very high compile time and
 151 // are also unlikely to result in good code
 152 static int const MaxDisjunctsInDomain = 20;
 153
 154 // The number of disjunct in the context after which we stop to add more
 155 // disjuncts. This parameter is there to avoid exponential growth in the
 156 // number of disjunct when adding non-convex sets to the context.
 157 static int const MaxDisjunctsInContext = 4;
 158
 159 // The maximal number of dimensions we allow during invariant load construction.
 160 // More complex access ranges will result in very high compile time and are also
 161 // unlikely to result in good code. This value is very high and should only
 162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
 163 static int const MaxDimensionsInAccessRange = 9;
 164
 165 static cl::opt<int>
 166     OptComputeOut("polly-analysis-computeout",
 167                   cl::desc("Bound the scop analysis by a maximal amount of "
 168                            "computational steps (0 means no bound)"),
 169                   cl::Hidden, cl::init(800000), cl::ZeroOrMore,
 170                   cl::cat(PollyCategory));
 171
 172 static cl::opt<bool> PollyRemarksMinimal(
 173     "polly-remarks-minimal",
 174     cl::desc("Do not emit remarks about assumptions that are known"),
 175     cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
 176
 177 // Multiplicative reductions can be disabled separately as these kind of
 178 // operations can overflow easily. Additive reductions and bit operations
 179 // are in contrast pretty stable.
 180 static cl::opt<bool> DisableMultiplicativeReductions(
 181     "polly-disable-multiplicative-reductions",
 182     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
 183     cl::init(false), cl::cat(PollyCategory));
 184
 185 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
 186     "polly-rtc-max-array-disjuncts",
 187     cl::desc("The maximal number of disjunts allowed in memory accesses to "
 188              "to build RTCs."),
 189     cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
 190
 191 static cl::opt<unsigned> RunTimeChecksMaxParameters(
 192     "polly-rtc-max-parameters",
 193     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
 194     cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
 195
 196 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
 197     "polly-rtc-max-arrays-per-group",
 198     cl::desc("The maximal number of arrays to compare in each alias group."),
 199     cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
 200
 201 static cl::opt<std::string> UserContextStr(
 202     "polly-context", cl::value_desc("isl parameter set"),
 203     cl::desc("Provide additional constraints on the context parameters"),
 204     cl::init(""), cl::cat(PollyCategory));
 205
 206 static cl::opt<bool> DetectReductions("polly-detect-reductions",
 207                                       cl::desc("Detect and exploit reductions"),
 208                                       cl::Hidden, cl::ZeroOrMore,
 209                                       cl::init(true), cl::cat(PollyCategory));
 210
 211 static cl::opt<bool>
 212     IslOnErrorAbort("polly-on-isl-error-abort",
 213                     cl::desc("Abort if an isl error is encountered"),
 214                     cl::init(true), cl::cat(PollyCategory));
 215
 216 static cl::opt<bool> PollyPreciseInbounds(
 217     "polly-precise-inbounds",
 218     cl::desc("Take more precise inbounds assumptions (do not scale well)"),
 219     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 220
 221 static cl::opt<bool>
 222     PollyIgnoreInbounds("polly-ignore-inbounds",
 223                         cl::desc("Do not take inbounds assumptions at all"),
 224                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 225
 226 static cl::opt<bool> PollyIgnoreParamBounds(
 227     "polly-ignore-parameter-bounds",
 228     cl::desc(
 229         "Do not add parameter bounds and do no gist simplify sets accordingly"),
 230     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 231
 232 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
 233     "polly-allow-dereference-of-all-function-parameters",
 234     cl::desc(
 235         "Treat all parameters to functions that are pointers as dereferencible."
 236         " This is useful for invariant load hoisting, since we can generate"
 237         " less runtime checks. This is only valid if all pointers to functions"
 238         " are always initialized, so that Polly can choose to hoist"
 239         " their loads. "),
 240     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 241
 242 static cl::opt<bool> PollyPreciseFoldAccesses(
 243     "polly-precise-fold-accesses",
 244     cl::desc("Fold memory accesses to model more possible delinearizations "
 245              "(does not scale well)"),
 246     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 247
 248 bool polly::UseInstructionNames;
 249
 250 static cl::opt<bool, true> XUseInstructionNames(
 251     "polly-use-llvm-names",
 252     cl::desc("Use LLVM-IR names when deriving statement names"),
 253     cl::location(UseInstructionNames), cl::Hidden, cl::init(false),
 254     cl::ZeroOrMore, cl::cat(PollyCategory));
 255
 256 static cl::opt<bool> PollyPrintInstructions(
 257     "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
 258     cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory));
 259
 260 //===----------------------------------------------------------------------===//
 261
 262 // Create a sequence of two schedules. Either argument may be null and is
 263 // interpreted as the empty schedule. Can also return null if both schedules are
 264 // empty.
 265 static __isl_give isl_schedule *
 266 combineInSequence(__isl_take isl_schedule *Prev,
 267                   __isl_take isl_schedule *Succ) {
 268   if (!Prev)
 269     return Succ;
 270   if (!Succ)
 271     return Prev;
 272
 273   return isl_schedule_sequence(Prev, Succ);
 274 }
 275
 276 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
 277                                     int dim, isl::dim type) {
 278   isl::val V;
 279   isl::ctx Ctx = S.get_ctx();
 280
 281   // The upper and lower bound for a parameter value is derived either from
 282   // the data type of the parameter or from the - possibly more restrictive -
 283   // range metadata.
 284   V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true);
 285   S = S.lower_bound_val(type, dim, V);
 286   V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true);
 287   S = S.upper_bound_val(type, dim, V);
 288
 289   if (Range.isFullSet())
 290     return S;
 291
 292   if (isl_set_n_basic_set(S.get()) > MaxDisjunctsInContext)
 293     return S;
 294
 295   // In case of signed wrapping, we can refine the set of valid values by
 296   // excluding the part not covered by the wrapping range.
 297   if (Range.isSignWrappedSet()) {
 298     V = valFromAPInt(Ctx.get(), Range.getLower(), true);
 299     isl::set SLB = S.lower_bound_val(type, dim, V);
 300
 301     V = valFromAPInt(Ctx.get(), Range.getUpper(), true);
 302     V = V.sub_ui(1);
 303     isl::set SUB = S.upper_bound_val(type, dim, V);
 304     S = SLB.unite(SUB);
 305   }
 306
 307   return S;
 308 }
 309
 310 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
 311   LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
 312   if (!BasePtrLI)
 313     return nullptr;
 314
 315   if (!S->contains(BasePtrLI))
 316     return nullptr;
 317
 318   ScalarEvolution &SE = *S->getSE();
 319
 320   auto *OriginBaseSCEV =
 321       SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
 322   if (!OriginBaseSCEV)
 323     return nullptr;
 324
 325   auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
 326   if (!OriginBaseSCEVUnknown)
 327     return nullptr;
 328
 329   return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
 330                              MemoryKind::Array);
 331 }
 332
 333 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx,
 334                              ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
 335                              const DataLayout &DL, Scop *S,
 336                              const char *BaseName)
 337     : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
 338   std::string BasePtrName =
 339       BaseName ? BaseName
 340                : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
 341                                       Kind == MemoryKind::PHI ? "__phi" : "",
 342                                       UseInstructionNames);
 343   Id = isl::id::alloc(Ctx, BasePtrName, this);
 344
 345   updateSizes(Sizes);
 346
 347   if (!BasePtr || Kind != MemoryKind::Array) {
 348     BasePtrOriginSAI = nullptr;
 349     return;
 350   }
 351
 352   BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
 353   if (BasePtrOriginSAI)
 354     const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
 355 }
 356
 357 ScopArrayInfo::~ScopArrayInfo() = default;
 358
 359 isl::space ScopArrayInfo::getSpace() const {
 360   auto Space = isl::space(Id.get_ctx(), 0, getNumberOfDimensions());
 361   Space = Space.set_tuple_id(isl::dim::set, Id);
 362   return Space;
 363 }
 364
 365 bool ScopArrayInfo::isReadOnly() {
 366   isl::union_set WriteSet = S.getWrites().range();
 367   isl::space Space = getSpace();
 368   WriteSet = WriteSet.extract_set(Space);
 369
 370   return bool(WriteSet.is_empty());
 371 }
 372
 373 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const {
 374   if (Array->getElementType() != getElementType())
 375     return false;
 376
 377   if (Array->getNumberOfDimensions() != getNumberOfDimensions())
 378     return false;
 379
 380   for (unsigned i = 0; i < getNumberOfDimensions(); i++)
 381     if (Array->getDimensionSize(i) != getDimensionSize(i))
 382       return false;
 383
 384   return true;
 385 }
 386
 387 void ScopArrayInfo::updateElementType(Type *NewElementType) {
 388   if (NewElementType == ElementType)
 389     return;
 390
 391   auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
 392   auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
 393
 394   if (NewElementSize == OldElementSize || NewElementSize == 0)
 395     return;
 396
 397   if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) {
 398     ElementType = NewElementType;
 399   } else {
 400     auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
 401     ElementType = IntegerType::get(ElementType->getContext(), GCD);
 402   }
 403 }
 404
 405 /// Make the ScopArrayInfo model a Fortran Array
 406 void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
 407   assert(FAD && "got invalid Fortran array descriptor");
 408   if (this->FAD) {
 409     assert(this->FAD == FAD &&
 410            "receiving different array descriptors for same array");
 411     return;
 412   }
 413
 414   assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
 415   assert(!this->FAD);
 416   this->FAD = FAD;
 417
 418   isl::space Space(S.getIslCtx(), 1, 0);
 419
 420   std::string param_name = getName();
 421   param_name += "_fortranarr_size";
 422   isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this);
 423
 424   Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff);
 425   isl::pw_aff PwAff =
 426       isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0);
 427
 428   DimensionSizesPw[0] = PwAff;
 429 }
 430
 431 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
 432                                 bool CheckConsistency) {
 433   int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
 434   int ExtraDimsNew = NewSizes.size() - SharedDims;
 435   int ExtraDimsOld = DimensionSizes.size() - SharedDims;
 436
 437   if (CheckConsistency) {
 438     for (int i = 0; i < SharedDims; i++) {
 439       auto *NewSize = NewSizes[i + ExtraDimsNew];
 440       auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
 441       if (NewSize && KnownSize && NewSize != KnownSize)
 442         return false;
 443     }
 444
 445     if (DimensionSizes.size() >= NewSizes.size())
 446       return true;
 447   }
 448
 449   DimensionSizes.clear();
 450   DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
 451                         NewSizes.end());
 452   DimensionSizesPw.clear();
 453   for (const SCEV *Expr : DimensionSizes) {
 454     if (!Expr) {
 455       DimensionSizesPw.push_back(nullptr);
 456       continue;
 457     }
 458     isl::pw_aff Size = S.getPwAffOnly(Expr);
 459     DimensionSizesPw.push_back(Size);
 460   }
 461   return true;
 462 }
 463
 464 std::string ScopArrayInfo::getName() const { return Id.get_name(); }
 465
 466 int ScopArrayInfo::getElemSizeInBytes() const {
 467   return DL.getTypeAllocSize(ElementType);
 468 }
 469
 470 isl::id ScopArrayInfo::getBasePtrId() const { return Id; }
 471
 472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 473 LLVM_DUMP_METHOD void ScopArrayInfo::dump() const { print(errs()); }
 474 #endif
 475
 476 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
 477   OS.indent(8) << *getElementType() << " " << getName();
 478   unsigned u = 0;
 479   // If this is a Fortran array, then we can print the outermost dimension
 480   // as a isl_pw_aff even though there is no SCEV information.
 481   bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
 482
 483   if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
 484       !getDimensionSize(0)) {
 485     OS << "[*]";
 486     u++;
 487   }
 488   for (; u < getNumberOfDimensions(); u++) {
 489     OS << "[";
 490
 491     if (SizeAsPwAff) {
 492       isl::pw_aff Size = getDimensionSizePw(u);
 493       OS << " " << Size << " ";
 494     } else {
 495       OS << *getDimensionSize(u);
 496     }
 497
 498     OS << "]";
 499   }
 500
 501   OS << ";";
 502
 503   if (BasePtrOriginSAI)
 504     OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
 505
 506   OS << " // Element size " << getElemSizeInBytes() << "\n";
 507 }
 508
 509 const ScopArrayInfo *
 510 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA) {
 511   isl::id Id = PMA.get_tuple_id(isl::dim::out);
 512   assert(!Id.is_null() && "Output dimension didn't have an ID");
 513   return getFromId(Id);
 514 }
 515
 516 const ScopArrayInfo *ScopArrayInfo::getFromId(isl::id Id) {
 517   void *User = Id.get_user();
 518   const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
 519   return SAI;
 520 }
 521
 522 void MemoryAccess::wrapConstantDimensions() {
 523   auto *SAI = getScopArrayInfo();
 524   isl::space ArraySpace = SAI->getSpace();
 525   isl::ctx Ctx = ArraySpace.get_ctx();
 526   unsigned DimsArray = SAI->getNumberOfDimensions();
 527
 528   isl::multi_aff DivModAff = isl::multi_aff::identity(
 529       ArraySpace.map_from_domain_and_range(ArraySpace));
 530   isl::local_space LArraySpace = isl::local_space(ArraySpace);
 531
 532   // Begin with last dimension, to iteratively carry into higher dimensions.
 533   for (int i = DimsArray - 1; i > 0; i--) {
 534     auto *DimSize = SAI->getDimensionSize(i);
 535     auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
 536
 537     // This transformation is not applicable to dimensions with dynamic size.
 538     if (!DimSizeCst)
 539       continue;
 540
 541     // This transformation is not applicable to dimensions of size zero.
 542     if (DimSize->isZero())
 543       continue;
 544
 545     isl::val DimSizeVal =
 546         valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false);
 547     isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i);
 548     isl::aff PrevVar =
 549         isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1);
 550
 551     // Compute: index % size
 552     // Modulo must apply in the divide of the previous iteration, if any.
 553     isl::aff Modulo = Var.mod(DimSizeVal);
 554     Modulo = Modulo.pullback(DivModAff);
 555
 556     // Compute: floor(index / size)
 557     isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal));
 558     Divide = Divide.floor();
 559     Divide = Divide.add(PrevVar);
 560     Divide = Divide.pullback(DivModAff);
 561
 562     // Apply Modulo and Divide.
 563     DivModAff = DivModAff.set_aff(i, Modulo);
 564     DivModAff = DivModAff.set_aff(i - 1, Divide);
 565   }
 566
 567   // Apply all modulo/divides on the accesses.
 568   isl::map Relation = AccessRelation;
 569   Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff));
 570   Relation = Relation.detect_equalities();
 571   AccessRelation = Relation;
 572 }
 573
 574 void MemoryAccess::updateDimensionality() {
 575   auto *SAI = getScopArrayInfo();
 576   isl::space ArraySpace = SAI->getSpace();
 577   isl::space AccessSpace = AccessRelation.get_space().range();
 578   isl::ctx Ctx = ArraySpace.get_ctx();
 579
 580   auto DimsArray = ArraySpace.dim(isl::dim::set);
 581   auto DimsAccess = AccessSpace.dim(isl::dim::set);
 582   auto DimsMissing = DimsArray - DimsAccess;
 583
 584   auto *BB = getStatement()->getEntryBlock();
 585   auto &DL = BB->getModule()->getDataLayout();
 586   unsigned ArrayElemSize = SAI->getElemSizeInBytes();
 587   unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
 588
 589   isl::map Map = isl::map::from_domain_and_range(
 590       isl::set::universe(AccessSpace), isl::set::universe(ArraySpace));
 591
 592   for (unsigned i = 0; i < DimsMissing; i++)
 593     Map = Map.fix_si(isl::dim::out, i, 0);
 594
 595   for (unsigned i = DimsMissing; i < DimsArray; i++)
 596     Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i);
 597
 598   AccessRelation = AccessRelation.apply_range(Map);
 599
 600   // For the non delinearized arrays, divide the access function of the last
 601   // subscript by the size of the elements in the array.
 602   //
 603   // A stride one array access in C expressed as A[i] is expressed in
 604   // LLVM-IR as something like A[i * elementsize]. This hides the fact that
 605   // two subsequent values of 'i' index two values that are stored next to
 606   // each other in memory. By this division we make this characteristic
 607   // obvious again. If the base pointer was accessed with offsets not divisible
 608   // by the accesses element size, we will have chosen a smaller ArrayElemSize
 609   // that divides the offsets of all accesses to this base pointer.
 610   if (DimsAccess == 1) {
 611     isl::val V = isl::val(Ctx, ArrayElemSize);
 612     AccessRelation = AccessRelation.floordiv_val(V);
 613   }
 614
 615   // We currently do this only if we added at least one dimension, which means
 616   // some dimension's indices have not been specified, an indicator that some
 617   // index values have been added together.
 618   // TODO: Investigate general usefulness; Effect on unit tests is to make index
 619   // expressions more complicated.
 620   if (DimsMissing)
 621     wrapConstantDimensions();
 622
 623   if (!isAffine())
 624     computeBoundsOnAccessRelation(ArrayElemSize);
 625
 626   // Introduce multi-element accesses in case the type loaded by this memory
 627   // access is larger than the canonical element type of the array.
 628   //
 629   // An access ((float *)A)[i] to an array char *A is modeled as
 630   // {[i] -> A[o] : 4 i <= o <= 4 i + 3
 631   if (ElemBytes > ArrayElemSize) {
 632     assert(ElemBytes % ArrayElemSize == 0 &&
 633            "Loaded element size should be multiple of canonical element size");
 634     isl::map Map = isl::map::from_domain_and_range(
 635         isl::set::universe(ArraySpace), isl::set::universe(ArraySpace));
 636     for (unsigned i = 0; i < DimsArray - 1; i++)
 637       Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
 638
 639     isl::constraint C;
 640     isl::local_space LS;
 641
 642     LS = isl::local_space(Map.get_space());
 643     int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
 644
 645     C = isl::constraint::alloc_inequality(LS);
 646     C = C.set_constant_val(isl::val(Ctx, Num - 1));
 647     C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1);
 648     C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1);
 649     Map = Map.add_constraint(C);
 650
 651     C = isl::constraint::alloc_inequality(LS);
 652     C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1);
 653     C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1);
 654     C = C.set_constant_val(isl::val(Ctx, 0));
 655     Map = Map.add_constraint(C);
 656     AccessRelation = AccessRelation.apply_range(Map);
 657   }
 658 }
 659
 660 const std::string
 661 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
 662   switch (RT) {
 663   case MemoryAccess::RT_NONE:
 664     llvm_unreachable("Requested a reduction operator string for a memory "
 665                      "access which isn't a reduction");
 666   case MemoryAccess::RT_ADD:
 667     return "+";
 668   case MemoryAccess::RT_MUL:
 669     return "*";
 670   case MemoryAccess::RT_BOR:
 671     return "|";
 672   case MemoryAccess::RT_BXOR:
 673     return "^";
 674   case MemoryAccess::RT_BAND:
 675     return "&";
 676   }
 677   llvm_unreachable("Unknown reduction type");
 678 }
 679
 680 /// Return the reduction type for a given binary operator.
 681 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
 682                                                     const Instruction *Load) {
 683   if (!BinOp)
 684     return MemoryAccess::RT_NONE;
 685   switch (BinOp->getOpcode()) {
 686   case Instruction::FAdd:
 687     if (!BinOp->hasUnsafeAlgebra())
 688       return MemoryAccess::RT_NONE;
 689   // Fall through
 690   case Instruction::Add:
 691     return MemoryAccess::RT_ADD;
 692   case Instruction::Or:
 693     return MemoryAccess::RT_BOR;
 694   case Instruction::Xor:
 695     return MemoryAccess::RT_BXOR;
 696   case Instruction::And:
 697     return MemoryAccess::RT_BAND;
 698   case Instruction::FMul:
 699     if (!BinOp->hasUnsafeAlgebra())
 700       return MemoryAccess::RT_NONE;
 701   // Fall through
 702   case Instruction::Mul:
 703     if (DisableMultiplicativeReductions)
 704       return MemoryAccess::RT_NONE;
 705     return MemoryAccess::RT_MUL;
 706   default:
 707     return MemoryAccess::RT_NONE;
 708   }
 709 }
 710
 711 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
 712   isl::id ArrayId = getArrayId();
 713   void *User = ArrayId.get_user();
 714   const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
 715   return SAI;
 716 }
 717
 718 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
 719   isl::id ArrayId = getLatestArrayId();
 720   void *User = ArrayId.get_user();
 721   const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
 722   return SAI;
 723 }
 724
 725 isl::id MemoryAccess::getOriginalArrayId() const {
 726   return AccessRelation.get_tuple_id(isl::dim::out);
 727 }
 728
 729 isl::id MemoryAccess::getLatestArrayId() const {
 730   if (!hasNewAccessRelation())
 731     return getOriginalArrayId();
 732   return NewAccessRelation.get_tuple_id(isl::dim::out);
 733 }
 734
 735 isl::map MemoryAccess::getAddressFunction() const {
 736   return getAccessRelation().lexmin();
 737 }
 738
 739 isl::pw_multi_aff
 740 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule) const {
 741   isl::map Schedule, ScheduledAccRel;
 742   isl::union_set UDomain;
 743
 744   UDomain = getStatement()->getDomain();
 745   USchedule = USchedule.intersect_domain(UDomain);
 746   Schedule = isl::map::from_union_map(USchedule);
 747   ScheduledAccRel = getAddressFunction().apply_domain(Schedule);
 748   return isl::pw_multi_aff::from_map(ScheduledAccRel);
 749 }
 750
 751 isl::map MemoryAccess::getOriginalAccessRelation() const {
 752   return AccessRelation;
 753 }
 754
 755 std::string MemoryAccess::getOriginalAccessRelationStr() const {
 756   return stringFromIslObj(AccessRelation.get());
 757 }
 758
 759 isl::space MemoryAccess::getOriginalAccessRelationSpace() const {
 760   return AccessRelation.get_space();
 761 }
 762
 763 isl::map MemoryAccess::getNewAccessRelation() const {
 764   return NewAccessRelation;
 765 }
 766
 767 std::string MemoryAccess::getNewAccessRelationStr() const {
 768   return stringFromIslObj(NewAccessRelation.get());
 769 }
 770
 771 std::string MemoryAccess::getAccessRelationStr() const {
 772   return getAccessRelation().to_str();
 773 }
 774
 775 isl::basic_map MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
 776   isl::space Space = isl::space(Statement->getIslCtx(), 0, 1);
 777   Space = Space.align_params(Statement->getDomainSpace());
 778
 779   return isl::basic_map::from_domain_and_range(
 780       isl::basic_set::universe(Statement->getDomainSpace()),
 781       isl::basic_set::universe(Space));
 782 }
 783
 784 // Formalize no out-of-bound access assumption
 785 //
 786 // When delinearizing array accesses we optimistically assume that the
 787 // delinearized accesses do not access out of bound locations (the subscript
 788 // expression of each array evaluates for each statement instance that is
 789 // executed to a value that is larger than zero and strictly smaller than the
 790 // size of the corresponding dimension). The only exception is the outermost
 791 // dimension for which we do not need to assume any upper bound.  At this point
 792 // we formalize this assumption to ensure that at code generation time the
 793 // relevant run-time checks can be generated.
 794 //
 795 // To find the set of constraints necessary to avoid out of bound accesses, we
 796 // first build the set of data locations that are not within array bounds. We
 797 // then apply the reverse access relation to obtain the set of iterations that
 798 // may contain invalid accesses and reduce this set of iterations to the ones
 799 // that are actually executed by intersecting them with the domain of the
 800 // statement. If we now project out all loop dimensions, we obtain a set of
 801 // parameters that may cause statement instances to be executed that may
 802 // possibly yield out of bound memory accesses. The complement of these
 803 // constraints is the set of constraints that needs to be assumed to ensure such
 804 // statement instances are never executed.
 805 void MemoryAccess::assumeNoOutOfBound() {
 806   if (PollyIgnoreInbounds)
 807     return;
 808   auto *SAI = getScopArrayInfo();
 809   isl::space Space = getOriginalAccessRelationSpace().range();
 810   isl::set Outside = isl::set::empty(Space);
 811   for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) {
 812     isl::local_space LS(Space);
 813     isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i);
 814     isl::pw_aff Zero = isl::pw_aff(LS);
 815
 816     isl::set DimOutside = Var.lt_set(Zero);
 817     isl::pw_aff SizeE = SAI->getDimensionSizePw(i);
 818     SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set));
 819     SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set));
 820     DimOutside = DimOutside.unite(SizeE.le_set(Var));
 821
 822     Outside = Outside.unite(DimOutside);
 823   }
 824
 825   Outside = Outside.apply(getAccessRelation().reverse());
 826   Outside = Outside.intersect(Statement->getDomain());
 827   Outside = Outside.params();
 828
 829   // Remove divs to avoid the construction of overly complicated assumptions.
 830   // Doing so increases the set of parameter combinations that are assumed to
 831   // not appear. This is always save, but may make the resulting run-time check
 832   // bail out more often than strictly necessary.
 833   Outside = Outside.remove_divs();
 834   Outside = Outside.complement();
 835   const auto &Loc = getAccessInstruction()
 836                         ? getAccessInstruction()->getDebugLoc()
 837                         : DebugLoc();
 838   if (!PollyPreciseInbounds)
 839     Outside = Outside.gist_params(Statement->getDomain().params());
 840   Statement->getParent()->recordAssumption(INBOUNDS, Outside.release(), Loc,
 841                                            AS_ASSUMPTION);
 842 }
 843
 844 void MemoryAccess::buildMemIntrinsicAccessRelation() {
 845   assert(isMemoryIntrinsic());
 846   assert(Subscripts.size() == 2 && Sizes.size() == 1);
 847
 848   isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]);
 849   isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA);
 850
 851   isl::map LengthMap;
 852   if (Subscripts[1] == nullptr) {
 853     LengthMap = isl::map::universe(SubscriptMap.get_space());
 854   } else {
 855     isl::pw_aff LengthPWA = getPwAff(Subscripts[1]);
 856     LengthMap = isl::map::from_pw_aff(LengthPWA);
 857     isl::space RangeSpace = LengthMap.get_space().range();
 858     LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace));
 859   }
 860   LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0);
 861   LengthMap = LengthMap.align_params(SubscriptMap.get_space());
 862   SubscriptMap = SubscriptMap.align_params(LengthMap.get_space());
 863   LengthMap = LengthMap.sum(SubscriptMap);
 864   AccessRelation =
 865       LengthMap.set_tuple_id(isl::dim::in, getStatement()->getDomainId());
 866 }
 867
 868 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
 869   ScalarEvolution *SE = Statement->getParent()->getSE();
 870
 871   auto MAI = MemAccInst(getAccessInstruction());
 872   if (isa<MemIntrinsic>(MAI))
 873     return;
 874
 875   Value *Ptr = MAI.getPointerOperand();
 876   if (!Ptr || !SE->isSCEVable(Ptr->getType()))
 877     return;
 878
 879   auto *PtrSCEV = SE->getSCEV(Ptr);
 880   if (isa<SCEVCouldNotCompute>(PtrSCEV))
 881     return;
 882
 883   auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
 884   if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV))
 885     PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
 886
 887   const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
 888   if (Range.isFullSet())
 889     return;
 890
 891   if (Range.isWrappedSet() || Range.isSignWrappedSet())
 892     return;
 893
 894   bool isWrapping = Range.isSignWrappedSet();
 895
 896   unsigned BW = Range.getBitWidth();
 897   const auto One = APInt(BW, 1);
 898   const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin();
 899   const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax();
 900
 901   auto Min = LB.sdiv(APInt(BW, ElementSize));
 902   auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
 903
 904   assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
 905
 906   isl::map Relation = AccessRelation;
 907   isl::set AccessRange = Relation.range();
 908   AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0,
 909                                     isl::dim::set);
 910   AccessRelation = Relation.intersect_range(AccessRange);
 911 }
 912
 913 void MemoryAccess::foldAccessRelation() {
 914   if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1]))
 915     return;
 916
 917   int Size = Subscripts.size();
 918
 919   isl::map NewAccessRelation = AccessRelation;
 920
 921   for (int i = Size - 2; i >= 0; --i) {
 922     isl::space Space;
 923     isl::map MapOne, MapTwo;
 924     isl::pw_aff DimSize = getPwAff(Sizes[i + 1]);
 925
 926     isl::space SpaceSize = DimSize.get_space();
 927     isl::id ParamId =
 928         give(isl_space_get_dim_id(SpaceSize.get(), isl_dim_param, 0));
 929
 930     Space = AccessRelation.get_space();
 931     Space = Space.range().map_from_set();
 932     Space = Space.align_params(SpaceSize);
 933
 934     int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId);
 935
 936     MapOne = isl::map::universe(Space);
 937     for (int j = 0; j < Size; ++j)
 938       MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j);
 939     MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0);
 940
 941     MapTwo = isl::map::universe(Space);
 942     for (int j = 0; j < Size; ++j)
 943       if (j < i || j > i + 1)
 944         MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j);
 945
 946     isl::local_space LS(Space);
 947     isl::constraint C;
 948     C = isl::constraint::alloc_equality(LS);
 949     C = C.set_constant_si(-1);
 950     C = C.set_coefficient_si(isl::dim::in, i, 1);
 951     C = C.set_coefficient_si(isl::dim::out, i, -1);
 952     MapTwo = MapTwo.add_constraint(C);
 953     C = isl::constraint::alloc_equality(LS);
 954     C = C.set_coefficient_si(isl::dim::in, i + 1, 1);
 955     C = C.set_coefficient_si(isl::dim::out, i + 1, -1);
 956     C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1);
 957     MapTwo = MapTwo.add_constraint(C);
 958     MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1);
 959
 960     MapOne = MapOne.unite(MapTwo);
 961     NewAccessRelation = NewAccessRelation.apply_range(MapOne);
 962   }
 963
 964   isl::id BaseAddrId = getScopArrayInfo()->getBasePtrId();
 965   isl::space Space = Statement->getDomainSpace();
 966   NewAccessRelation = NewAccessRelation.set_tuple_id(
 967       isl::dim::in, Space.get_tuple_id(isl::dim::set));
 968   NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
 969   NewAccessRelation = NewAccessRelation.gist_domain(Statement->getDomain());
 970
 971   // Access dimension folding might in certain cases increase the number of
 972   // disjuncts in the memory access, which can possibly complicate the generated
 973   // run-time checks and can lead to costly compilation.
 974   if (!PollyPreciseFoldAccesses &&
 975       isl_map_n_basic_map(NewAccessRelation.get()) >
 976           isl_map_n_basic_map(AccessRelation.get())) {
 977   } else {
 978     AccessRelation = NewAccessRelation;
 979   }
 980 }
 981
 982 /// Check if @p Expr is divisible by @p Size.
 983 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
 984   assert(Size != 0);
 985   if (Size == 1)
 986     return true;
 987
 988   // Only one factor needs to be divisible.
 989   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
 990     for (auto *FactorExpr : MulExpr->operands())
 991       if (isDivisible(FactorExpr, Size, SE))
 992         return true;
 993     return false;
 994   }
 995
 996   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
 997   // to be divisible.
 998   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
 999     for (auto *OpExpr : NAryExpr->operands())
1000       if (!isDivisible(OpExpr, Size, SE))
1001         return false;
1002     return true;
1003   }
1004
1005   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
1006   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
1007   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
1008   return MulSCEV == Expr;
1009 }
1010
1011 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
1012   assert(AccessRelation.is_null() && "AccessRelation already built");
1013
1014   // Initialize the invalid domain which describes all iterations for which the
1015   // access relation is not modeled correctly.
1016   isl::set StmtInvalidDomain = getStatement()->getInvalidDomain();
1017   InvalidDomain = isl::set::empty(StmtInvalidDomain.get_space());
1018
1019   isl::ctx Ctx = Id.get_ctx();
1020   isl::id BaseAddrId = SAI->getBasePtrId();
1021
1022   if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) {
1023     buildMemIntrinsicAccessRelation();
1024     AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1025     return;
1026   }
1027
1028   if (!isAffine()) {
1029     // We overapproximate non-affine accesses with a possible access to the
1030     // whole array. For read accesses it does not make a difference, if an
1031     // access must or may happen. However, for write accesses it is important to
1032     // differentiate between writes that must happen and writes that may happen.
1033     if (AccessRelation.is_null())
1034       AccessRelation = createBasicAccessMap(Statement);
1035
1036     AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1037     return;
1038   }
1039
1040   isl::space Space = isl::space(Ctx, 0, Statement->getNumIterators(), 0);
1041   AccessRelation = isl::map::universe(Space);
1042
1043   for (int i = 0, Size = Subscripts.size(); i < Size; ++i) {
1044     isl::pw_aff Affine = getPwAff(Subscripts[i]);
1045     isl::map SubscriptMap = isl::map::from_pw_aff(Affine);
1046     AccessRelation = AccessRelation.flat_range_product(SubscriptMap);
1047   }
1048
1049   Space = Statement->getDomainSpace();
1050   AccessRelation = AccessRelation.set_tuple_id(
1051       isl::dim::in, Space.get_tuple_id(isl::dim::set));
1052   AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1053
1054   AccessRelation = AccessRelation.gist_domain(Statement->getDomain());
1055 }
1056
1057 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
1058                            AccessType AccType, Value *BaseAddress,
1059                            Type *ElementType, bool Affine,
1060                            ArrayRef<const SCEV *> Subscripts,
1061                            ArrayRef<const SCEV *> Sizes, Value *AccessValue,
1062                            MemoryKind Kind)
1063     : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr),
1064       BaseAddr(BaseAddress), ElementType(ElementType),
1065       Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst),
1066       AccessValue(AccessValue), IsAffine(Affine),
1067       Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
1068       NewAccessRelation(nullptr), FAD(nullptr) {
1069   static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1070   const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1071
1072   std::string IdName = Stmt->getBaseName() + Access;
1073   Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1074 }
1075
1076 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel)
1077     : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt),
1078       InvalidDomain(nullptr), AccessRelation(nullptr),
1079       NewAccessRelation(AccRel), FAD(nullptr) {
1080   isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out);
1081   auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
1082   Sizes.push_back(nullptr);
1083   for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++)
1084     Sizes.push_back(SAI->getDimensionSize(i));
1085   ElementType = SAI->getElementType();
1086   BaseAddr = SAI->getBasePtr();
1087   static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1088   const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1089
1090   std::string IdName = Stmt->getBaseName() + Access;
1091   Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1092 }
1093
1094 MemoryAccess::~MemoryAccess() = default;
1095
1096 void MemoryAccess::realignParams() {
1097   isl::set Ctx = Statement->getParent()->getContext();
1098   InvalidDomain = InvalidDomain.gist_params(Ctx);
1099   AccessRelation = AccessRelation.gist_params(Ctx);
1100 }
1101
1102 const std::string MemoryAccess::getReductionOperatorStr() const {
1103   return MemoryAccess::getReductionOperatorStr(getReductionType());
1104 }
1105
1106 isl::id MemoryAccess::getId() const { return Id; }
1107
1108 raw_ostream &polly::operator<<(raw_ostream &OS,
1109                                MemoryAccess::ReductionType RT) {
1110   if (RT == MemoryAccess::RT_NONE)
1111     OS << "NONE";
1112   else
1113     OS << MemoryAccess::getReductionOperatorStr(RT);
1114   return OS;
1115 }
1116
1117 void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; }
1118
1119 void MemoryAccess::print(raw_ostream &OS) const {
1120   switch (AccType) {
1121   case READ:
1122     OS.indent(12) << "ReadAccess :=\t";
1123     break;
1124   case MUST_WRITE:
1125     OS.indent(12) << "MustWriteAccess :=\t";
1126     break;
1127   case MAY_WRITE:
1128     OS.indent(12) << "MayWriteAccess :=\t";
1129     break;
1130   }
1131
1132   OS << "[Reduction Type: " << getReductionType() << "] ";
1133
1134   if (FAD) {
1135     OS << "[Fortran array descriptor: " << FAD->getName();
1136     OS << "] ";
1137   };
1138
1139   OS << "[Scalar: " << isScalarKind() << "]\n";
1140   OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1141   if (hasNewAccessRelation())
1142     OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1143 }
1144
1145 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1146 LLVM_DUMP_METHOD void MemoryAccess::dump() const { print(errs()); }
1147 #endif
1148
1149 isl::pw_aff MemoryAccess::getPwAff(const SCEV *E) {
1150   auto *Stmt = getStatement();
1151   PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1152   isl::set StmtDom = getStatement()->getDomain();
1153   StmtDom = StmtDom.reset_tuple_id();
1154   isl::set NewInvalidDom = StmtDom.intersect(isl::manage(PWAC.second));
1155   InvalidDomain = InvalidDomain.unite(NewInvalidDom);
1156   return isl::manage(PWAC.first);
1157 }
1158
1159 // Create a map in the size of the provided set domain, that maps from the
1160 // one element of the provided set domain to another element of the provided
1161 // set domain.
1162 // The mapping is limited to all points that are equal in all but the last
1163 // dimension and for which the last dimension of the input is strict smaller
1164 // than the last dimension of the output.
1165 //
1166 //   getEqualAndLarger(set[i0, i1, ..., iX]):
1167 //
1168 //   set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1169 //     : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1170 //
1171 static isl::map getEqualAndLarger(isl::space SetDomain) {
1172   isl::space Space = SetDomain.map_from_set();
1173   isl::map Map = isl::map::universe(Space);
1174   unsigned lastDimension = Map.dim(isl::dim::in) - 1;
1175
1176   // Set all but the last dimension to be equal for the input and output
1177   //
1178   //   input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1179   //     : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1180   for (unsigned i = 0; i < lastDimension; ++i)
1181     Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
1182
1183   // Set the last dimension of the input to be strict smaller than the
1184   // last dimension of the output.
1185   //
1186   //   input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1187   Map = Map.order_lt(isl::dim::in, lastDimension, isl::dim::out, lastDimension);
1188   return Map;
1189 }
1190
1191 isl::set MemoryAccess::getStride(isl::map Schedule) const {
1192   isl::map AccessRelation = getAccessRelation();
1193   isl::space Space = Schedule.get_space().range();
1194   isl::map NextScatt = getEqualAndLarger(Space);
1195
1196   Schedule = Schedule.reverse();
1197   NextScatt = NextScatt.lexmin();
1198
1199   NextScatt = NextScatt.apply_range(Schedule);
1200   NextScatt = NextScatt.apply_range(AccessRelation);
1201   NextScatt = NextScatt.apply_domain(Schedule);
1202   NextScatt = NextScatt.apply_domain(AccessRelation);
1203
1204   isl::set Deltas = NextScatt.deltas();
1205   return Deltas;
1206 }
1207
1208 bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const {
1209   isl::set Stride, StrideX;
1210   bool IsStrideX;
1211
1212   Stride = getStride(Schedule);
1213   StrideX = isl::set::universe(Stride.get_space());
1214   for (unsigned i = 0; i < StrideX.dim(isl::dim::set) - 1; i++)
1215     StrideX = StrideX.fix_si(isl::dim::set, i, 0);
1216   StrideX = StrideX.fix_si(isl::dim::set, StrideX.dim(isl::dim::set) - 1,
1217                            StrideWidth);
1218   IsStrideX = Stride.is_subset(StrideX);
1219
1220   return IsStrideX;
1221 }
1222
1223 bool MemoryAccess::isStrideZero(isl::map Schedule) const {
1224   return isStrideX(Schedule, 0);
1225 }
1226
1227 bool MemoryAccess::isStrideOne(isl::map Schedule) const {
1228   return isStrideX(Schedule, 1);
1229 }
1230
1231 void MemoryAccess::setAccessRelation(isl::map NewAccess) {
1232   AccessRelation = NewAccess;
1233 }
1234
1235 void MemoryAccess::setNewAccessRelation(isl::map NewAccess) {
1236   assert(NewAccess);
1237
1238 #ifndef NDEBUG
1239   // Check domain space compatibility.
1240   isl::space NewSpace = NewAccess.get_space();
1241   isl::space NewDomainSpace = NewSpace.domain();
1242   isl::space OriginalDomainSpace = getStatement()->getDomainSpace();
1243   assert(OriginalDomainSpace.has_equal_tuples(NewDomainSpace));
1244
1245   // Reads must be executed unconditionally. Writes might be executed in a
1246   // subdomain only.
1247   if (isRead()) {
1248     // Check whether there is an access for every statement instance.
1249     isl::set StmtDomain = getStatement()->getDomain();
1250     StmtDomain =
1251         StmtDomain.intersect_params(getStatement()->getParent()->getContext());
1252     isl::set NewDomain = NewAccess.domain();
1253     assert(StmtDomain.is_subset(NewDomain) &&
1254            "Partial READ accesses not supported");
1255   }
1256
1257   isl::space NewAccessSpace = NewAccess.get_space();
1258   assert(NewAccessSpace.has_tuple_id(isl::dim::set) &&
1259          "Must specify the array that is accessed");
1260   isl::id NewArrayId = NewAccessSpace.get_tuple_id(isl::dim::set);
1261   auto *SAI = static_cast<ScopArrayInfo *>(NewArrayId.get_user());
1262   assert(SAI && "Must set a ScopArrayInfo");
1263
1264   if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1265     InvariantEquivClassTy *EqClass =
1266         getStatement()->getParent()->lookupInvariantEquivClass(
1267             SAI->getBasePtr());
1268     assert(EqClass &&
1269            "Access functions to indirect arrays must have an invariant and "
1270            "hoisted base pointer");
1271   }
1272
1273   // Check whether access dimensions correspond to number of dimensions of the
1274   // accesses array.
1275   auto Dims = SAI->getNumberOfDimensions();
1276   assert(NewAccessSpace.dim(isl::dim::set) == Dims &&
1277          "Access dims must match array dims");
1278 #endif
1279
1280   NewAccess = NewAccess.gist_domain(getStatement()->getDomain());
1281   NewAccessRelation = NewAccess;
1282 }
1283
1284 bool MemoryAccess::isLatestPartialAccess() const {
1285   isl::set StmtDom = getStatement()->getDomain();
1286   isl::set AccDom = getLatestAccessRelation().domain();
1287
1288   return isl_set_is_subset(StmtDom.keep(), AccDom.keep()) == isl_bool_false;
1289 }
1290
1291 //===----------------------------------------------------------------------===//
1292
1293 isl::map ScopStmt::getSchedule() const {
1294   isl::set Domain = getDomain();
1295   if (Domain.is_empty())
1296     return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1297   auto Schedule = getParent()->getSchedule();
1298   if (!Schedule)
1299     return nullptr;
1300   Schedule = Schedule.intersect_domain(isl::union_set(Domain));
1301   if (Schedule.is_empty())
1302     return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1303   isl::map M = M.from_union_map(Schedule);
1304   M = M.coalesce();
1305   M = M.gist_domain(Domain);
1306   M = M.coalesce();
1307   return M;
1308 }
1309
1310 void ScopStmt::restrictDomain(isl::set NewDomain) {
1311   assert(NewDomain.is_subset(Domain) &&
1312          "New domain is not a subset of old domain!");
1313   Domain = NewDomain;
1314 }
1315
1316 void ScopStmt::buildAccessRelations() {
1317   Scop &S = *getParent();
1318   for (MemoryAccess *Access : MemAccs) {
1319     Type *ElementType = Access->getElementType();
1320
1321     MemoryKind Ty;
1322     if (Access->isPHIKind())
1323       Ty = MemoryKind::PHI;
1324     else if (Access->isExitPHIKind())
1325       Ty = MemoryKind::ExitPHI;
1326     else if (Access->isValueKind())
1327       Ty = MemoryKind::Value;
1328     else
1329       Ty = MemoryKind::Array;
1330
1331     auto *SAI = S.getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1332                                            ElementType, Access->Sizes, Ty);
1333     Access->buildAccessRelation(SAI);
1334     S.addAccessData(Access);
1335   }
1336 }
1337
1338 void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
1339   Instruction *AccessInst = Access->getAccessInstruction();
1340
1341   if (Access->isArrayKind()) {
1342     MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1343     MAL.emplace_front(Access);
1344   } else if (Access->isValueKind() && Access->isWrite()) {
1345     Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1346     assert(!ValueWrites.lookup(AccessVal));
1347
1348     ValueWrites[AccessVal] = Access;
1349   } else if (Access->isValueKind() && Access->isRead()) {
1350     Value *AccessVal = Access->getAccessValue();
1351     assert(!ValueReads.lookup(AccessVal));
1352
1353     ValueReads[AccessVal] = Access;
1354   } else if (Access->isAnyPHIKind() && Access->isWrite()) {
1355     PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1356     assert(!PHIWrites.lookup(PHI));
1357
1358     PHIWrites[PHI] = Access;
1359   } else if (Access->isAnyPHIKind() && Access->isRead()) {
1360     PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1361     assert(!PHIReads.lookup(PHI));
1362
1363     PHIReads[PHI] = Access;
1364   }
1365
1366   if (Prepend) {
1367     MemAccs.insert(MemAccs.begin(), Access);
1368     return;
1369   }
1370   MemAccs.push_back(Access);
1371 }
1372
1373 void ScopStmt::realignParams() {
1374   for (MemoryAccess *MA : *this)
1375     MA->realignParams();
1376
1377   isl::set Ctx = Parent.getContext();
1378   InvalidDomain = InvalidDomain.gist_params(Ctx);
1379   Domain = Domain.gist_params(Ctx);
1380 }
1381
1382 /// Add @p BSet to the set @p User if @p BSet is bounded.
1383 static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet,
1384                                     void *User) {
1385   isl_set **BoundedParts = static_cast<isl_set **>(User);
1386   if (isl_basic_set_is_bounded(BSet))
1387     *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet));
1388   else
1389     isl_basic_set_free(BSet);
1390   return isl_stat_ok;
1391 }
1392
1393 /// Return the bounded parts of @p S.
1394 static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) {
1395   isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S));
1396   isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts);
1397   isl_set_free(S);
1398   return BoundedParts;
1399 }
1400
1401 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1402 ///
1403 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1404 ///          both with regards to the dimension @p Dim.
1405 static std::pair<__isl_give isl_set *, __isl_give isl_set *>
1406 partitionSetParts(__isl_take isl_set *S, unsigned Dim) {
1407   for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++)
1408     S = isl_set_lower_bound_si(S, isl_dim_set, u, 0);
1409
1410   unsigned NumDimsS = isl_set_n_dim(S);
1411   isl_set *OnlyDimS = isl_set_copy(S);
1412
1413   // Remove dimensions that are greater than Dim as they are not interesting.
1414   assert(NumDimsS >= Dim + 1);
1415   OnlyDimS =
1416       isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1);
1417
1418   // Create artificial parametric upper bounds for dimensions smaller than Dim
1419   // as we are not interested in them.
1420   OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim);
1421   for (unsigned u = 0; u < Dim; u++) {
1422     isl_constraint *C = isl_inequality_alloc(
1423         isl_local_space_from_space(isl_set_get_space(OnlyDimS)));
1424     C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1);
1425     C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1);
1426     OnlyDimS = isl_set_add_constraint(OnlyDimS, C);
1427   }
1428
1429   // Collect all bounded parts of OnlyDimS.
1430   isl_set *BoundedParts = collectBoundedParts(OnlyDimS);
1431
1432   // Create the dimensions greater than Dim again.
1433   BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1,
1434                                      NumDimsS - Dim - 1);
1435
1436   // Remove the artificial upper bound parameters again.
1437   BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim);
1438
1439   isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts));
1440   return std::make_pair(UnboundedParts, BoundedParts);
1441 }
1442
1443 /// Set the dimension Ids from @p From in @p To.
1444 static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From,
1445                                            __isl_take isl_set *To) {
1446   for (unsigned u = 0, e = isl_set_n_dim(From); u < e; u++) {
1447     isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u);
1448     To = isl_set_set_dim_id(To, isl_dim_set, u, DimId);
1449   }
1450   return To;
1451 }
1452
1453 /// Create the conditions under which @p L @p Pred @p R is true.
1454 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1455                                              __isl_take isl_pw_aff *L,
1456                                              __isl_take isl_pw_aff *R) {
1457   switch (Pred) {
1458   case ICmpInst::ICMP_EQ:
1459     return isl_pw_aff_eq_set(L, R);
1460   case ICmpInst::ICMP_NE:
1461     return isl_pw_aff_ne_set(L, R);
1462   case ICmpInst::ICMP_SLT:
1463     return isl_pw_aff_lt_set(L, R);
1464   case ICmpInst::ICMP_SLE:
1465     return isl_pw_aff_le_set(L, R);
1466   case ICmpInst::ICMP_SGT:
1467     return isl_pw_aff_gt_set(L, R);
1468   case ICmpInst::ICMP_SGE:
1469     return isl_pw_aff_ge_set(L, R);
1470   case ICmpInst::ICMP_ULT:
1471     return isl_pw_aff_lt_set(L, R);
1472   case ICmpInst::ICMP_UGT:
1473     return isl_pw_aff_gt_set(L, R);
1474   case ICmpInst::ICMP_ULE:
1475     return isl_pw_aff_le_set(L, R);
1476   case ICmpInst::ICMP_UGE:
1477     return isl_pw_aff_ge_set(L, R);
1478   default:
1479     llvm_unreachable("Non integer predicate not supported");
1480   }
1481 }
1482
1483 /// Create the conditions under which @p L @p Pred @p R is true.
1484 ///
1485 /// Helper function that will make sure the dimensions of the result have the
1486 /// same isl_id's as the @p Domain.
1487 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1488                                              __isl_take isl_pw_aff *L,
1489                                              __isl_take isl_pw_aff *R,
1490                                              __isl_keep isl_set *Domain) {
1491   isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R);
1492   return setDimensionIds(Domain, ConsequenceCondSet);
1493 }
1494
1495 /// Compute the isl representation for the SCEV @p E in this BB.
1496 ///
1497 /// @param S                The Scop in which @p BB resides in.
1498 /// @param BB               The BB for which isl representation is to be
1499 /// computed.
1500 /// @param InvalidDomainMap A map of BB to their invalid domains.
1501 /// @param E                The SCEV that should be translated.
1502 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
1503 ///
1504 /// Note that this function will also adjust the invalid context accordingly.
1505
1506 __isl_give isl_pw_aff *
1507 getPwAff(Scop &S, BasicBlock *BB,
1508          DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, const SCEV *E,
1509          bool NonNegative = false) {
1510   PWACtx PWAC = S.getPwAff(E, BB, NonNegative);
1511   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(isl::manage(PWAC.second));
1512   return PWAC.first;
1513 }
1514
1515 /// Build the conditions sets for the switch @p SI in the @p Domain.
1516 ///
1517 /// This will fill @p ConditionSets with the conditions under which control
1518 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1519 /// have as many elements as @p SI has successors.
1520 static bool
1521 buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L,
1522                    __isl_keep isl_set *Domain,
1523                    DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1524                    SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1525   Value *Condition = getConditionFromTerminator(SI);
1526   assert(Condition && "No condition for switch");
1527
1528   ScalarEvolution &SE = *S.getSE();
1529   isl_pw_aff *LHS, *RHS;
1530   LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
1531
1532   unsigned NumSuccessors = SI->getNumSuccessors();
1533   ConditionSets.resize(NumSuccessors);
1534   for (auto &Case : SI->cases()) {
1535     unsigned Idx = Case.getSuccessorIndex();
1536     ConstantInt *CaseValue = Case.getCaseValue();
1537
1538     RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue));
1539     isl_set *CaseConditionSet =
1540         buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain);
1541     ConditionSets[Idx] = isl_set_coalesce(
1542         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1543   }
1544
1545   assert(ConditionSets[0] == nullptr && "Default condition set was set");
1546   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1547   for (unsigned u = 2; u < NumSuccessors; u++)
1548     ConditionSetUnion =
1549         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1550   ConditionSets[0] = setDimensionIds(
1551       Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion));
1552
1553   isl_pw_aff_free(LHS);
1554
1555   return true;
1556 }
1557
1558 /// Build condition sets for unsigned ICmpInst(s).
1559 /// Special handling is required for unsigned operands to ensure that if
1560 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1561 /// it should wrap around.
1562 ///
1563 /// @param IsStrictUpperBound holds information on the predicate relation
1564 /// between TestVal and UpperBound, i.e,
1565 /// TestVal < UpperBound  OR  TestVal <= UpperBound
1566 static __isl_give isl_set *
1567 buildUnsignedConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1568                            __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal,
1569                            const SCEV *SCEV_UpperBound,
1570                            DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1571                            bool IsStrictUpperBound) {
1572   // Do not take NonNeg assumption on TestVal
1573   // as it might have MSB (Sign bit) set.
1574   isl_pw_aff *TestVal = getPwAff(S, BB, InvalidDomainMap, SCEV_TestVal, false);
1575   // Take NonNeg assumption on UpperBound.
1576   isl_pw_aff *UpperBound =
1577       getPwAff(S, BB, InvalidDomainMap, SCEV_UpperBound, true);
1578
1579   // 0 <= TestVal
1580   isl_set *First =
1581       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1582                             isl_pw_aff_get_domain_space(TestVal))),
1583                         isl_pw_aff_copy(TestVal));
1584
1585   isl_set *Second;
1586   if (IsStrictUpperBound)
1587     // TestVal < UpperBound
1588     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
1589   else
1590     // TestVal <= UpperBound
1591     Second = isl_pw_aff_le_set(TestVal, UpperBound);
1592
1593   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
1594   ConsequenceCondSet = setDimensionIds(Domain, ConsequenceCondSet);
1595   return ConsequenceCondSet;
1596 }
1597
1598 /// Build the conditions sets for the branch condition @p Condition in
1599 /// the @p Domain.
1600 ///
1601 /// This will fill @p ConditionSets with the conditions under which control
1602 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1603 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1604 /// context under which @p Condition is true/false will be returned as the
1605 /// new elements of @p ConditionSets.
1606 static bool
1607 buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1608                    TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain,
1609                    DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1610                    SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1611   isl_set *ConsequenceCondSet = nullptr;
1612   if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
1613     if (CCond->isZero())
1614       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1615     else
1616       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1617   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
1618     auto Opcode = BinOp->getOpcode();
1619     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1620
1621     bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain,
1622                                     InvalidDomainMap, ConditionSets) &&
1623                  buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain,
1624                                     InvalidDomainMap, ConditionSets);
1625     if (!Valid) {
1626       while (!ConditionSets.empty())
1627         isl_set_free(ConditionSets.pop_back_val());
1628       return false;
1629     }
1630
1631     isl_set_free(ConditionSets.pop_back_val());
1632     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1633     isl_set_free(ConditionSets.pop_back_val());
1634     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1635
1636     if (Opcode == Instruction::And)
1637       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1638     else
1639       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1640   } else {
1641     auto *ICond = dyn_cast<ICmpInst>(Condition);
1642     assert(ICond &&
1643            "Condition of exiting branch was neither constant nor ICmp!");
1644
1645     ScalarEvolution &SE = *S.getSE();
1646     isl_pw_aff *LHS, *RHS;
1647     // For unsigned comparisons we assumed the signed bit of neither operand
1648     // to be set. The comparison is equal to a signed comparison under this
1649     // assumption.
1650     bool NonNeg = ICond->isUnsigned();
1651     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
1652                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
1653
1654     switch (ICond->getPredicate()) {
1655     case ICmpInst::ICMP_ULT:
1656       ConsequenceCondSet =
1657           buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1658                                      RightOperand, InvalidDomainMap, true);
1659       break;
1660     case ICmpInst::ICMP_ULE:
1661       ConsequenceCondSet =
1662           buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1663                                      RightOperand, InvalidDomainMap, false);
1664       break;
1665     case ICmpInst::ICMP_UGT:
1666       ConsequenceCondSet =
1667           buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1668                                      LeftOperand, InvalidDomainMap, true);
1669       break;
1670     case ICmpInst::ICMP_UGE:
1671       ConsequenceCondSet =
1672           buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1673                                      LeftOperand, InvalidDomainMap, false);
1674       break;
1675     default:
1676       LHS = getPwAff(S, BB, InvalidDomainMap, LeftOperand, NonNeg);
1677       RHS = getPwAff(S, BB, InvalidDomainMap, RightOperand, NonNeg);
1678       ConsequenceCondSet =
1679           buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain);
1680       break;
1681     }
1682   }
1683
1684   // If no terminator was given we are only looking for parameter constraints
1685   // under which @p Condition is true/false.
1686   if (!TI)
1687     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1688   assert(ConsequenceCondSet);
1689   ConsequenceCondSet = isl_set_coalesce(
1690       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1691
1692   isl_set *AlternativeCondSet = nullptr;
1693   bool TooComplex =
1694       isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1695
1696   if (!TooComplex) {
1697     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1698                                           isl_set_copy(ConsequenceCondSet));
1699     TooComplex =
1700         isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1701   }
1702
1703   if (TooComplex) {
1704     S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
1705                  TI ? TI->getParent() : nullptr /* BasicBlock */);
1706     isl_set_free(AlternativeCondSet);
1707     isl_set_free(ConsequenceCondSet);
1708     return false;
1709   }
1710
1711   ConditionSets.push_back(ConsequenceCondSet);
1712   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1713
1714   return true;
1715 }
1716
1717 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1718 ///
1719 /// This will fill @p ConditionSets with the conditions under which control
1720 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1721 /// have as many elements as @p TI has successors.
1722 static bool
1723 buildConditionSets(Scop &S, BasicBlock *BB, TerminatorInst *TI, Loop *L,
1724                    __isl_keep isl_set *Domain,
1725                    DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1726                    SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1727   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1728     return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap,
1729                               ConditionSets);
1730
1731   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1732
1733   if (TI->getNumSuccessors() == 1) {
1734     ConditionSets.push_back(isl_set_copy(Domain));
1735     return true;
1736   }
1737
1738   Value *Condition = getConditionFromTerminator(TI);
1739   assert(Condition && "No condition for Terminator");
1740
1741   return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap,
1742                             ConditionSets);
1743 }
1744
1745 void ScopStmt::buildDomain() {
1746   isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1747
1748   Domain = getParent()->getDomainConditions(this);
1749   Domain = Domain.set_tuple_id(Id);
1750 }
1751
1752 void ScopStmt::collectSurroundingLoops() {
1753   for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
1754     isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
1755     NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
1756   }
1757 }
1758
1759 ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop)
1760     : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R),
1761       Build(nullptr), SurroundingLoop(SurroundingLoop) {
1762   BaseName = getIslCompatibleName(
1763       "Stmt", R.getNameStr(), parent.getNextStmtIdx(), "", UseInstructionNames);
1764 }
1765
1766 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, Loop *SurroundingLoop,
1767                    std::vector<Instruction *> Instructions)
1768     : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1769       Build(nullptr), SurroundingLoop(SurroundingLoop),
1770       Instructions(Instructions) {
1771   BaseName = getIslCompatibleName("Stmt", &bb, parent.getNextStmtIdx(), "",
1772                                   UseInstructionNames);
1773 }
1774
1775 ScopStmt::ScopStmt(Scop &parent, isl::map SourceRel, isl::map TargetRel,
1776                    isl::set NewDomain)
1777     : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain),
1778       Build(nullptr) {
1779   BaseName = getIslCompatibleName("CopyStmt_", "",
1780                                   std::to_string(parent.getCopyStmtsNum()));
1781   isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1782   Domain = Domain.set_tuple_id(Id);
1783   TargetRel = TargetRel.set_tuple_id(isl::dim::in, Id);
1784   auto *Access =
1785       new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1786   parent.addAccessFunction(Access);
1787   addAccess(Access);
1788   SourceRel = SourceRel.set_tuple_id(isl::dim::in, Id);
1789   Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1790   parent.addAccessFunction(Access);
1791   addAccess(Access);
1792 }
1793
1794 ScopStmt::~ScopStmt() = default;
1795
1796 void ScopStmt::init(LoopInfo &LI) {
1797   assert(!Domain && "init must be called only once");
1798
1799   buildDomain();
1800   collectSurroundingLoops();
1801   buildAccessRelations();
1802
1803   if (DetectReductions)
1804     checkForReductions();
1805 }
1806
1807 /// Collect loads which might form a reduction chain with @p StoreMA.
1808 ///
1809 /// Check if the stored value for @p StoreMA is a binary operator with one or
1810 /// two loads as operands. If the binary operand is commutative & associative,
1811 /// used only once (by @p StoreMA) and its load operands are also used only
1812 /// once, we have found a possible reduction chain. It starts at an operand
1813 /// load and includes the binary operator and @p StoreMA.
1814 ///
1815 /// Note: We allow only one use to ensure the load and binary operator cannot
1816 ///       escape this block or into any other store except @p StoreMA.
1817 void ScopStmt::collectCandiateReductionLoads(
1818     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1819   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1820   if (!Store)
1821     return;
1822
1823   // Skip if there is not one binary operator between the load and the store
1824   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1825   if (!BinOp)
1826     return;
1827
1828   // Skip if the binary operators has multiple uses
1829   if (BinOp->getNumUses() != 1)
1830     return;
1831
1832   // Skip if the opcode of the binary operator is not commutative/associative
1833   if (!BinOp->isCommutative() || !BinOp->isAssociative())
1834     return;
1835
1836   // Skip if the binary operator is outside the current SCoP
1837   if (BinOp->getParent() != Store->getParent())
1838     return;
1839
1840   // Skip if it is a multiplicative reduction and we disabled them
1841   if (DisableMultiplicativeReductions &&
1842       (BinOp->getOpcode() == Instruction::Mul ||
1843        BinOp->getOpcode() == Instruction::FMul))
1844     return;
1845
1846   // Check the binary operator operands for a candidate load
1847   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1848   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1849   if (!PossibleLoad0 && !PossibleLoad1)
1850     return;
1851
1852   // A load is only a candidate if it cannot escape (thus has only this use)
1853   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1854     if (PossibleLoad0->getParent() == Store->getParent())
1855       Loads.push_back(&getArrayAccessFor(PossibleLoad0));
1856   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1857     if (PossibleLoad1->getParent() == Store->getParent())
1858       Loads.push_back(&getArrayAccessFor(PossibleLoad1));
1859 }
1860
1861 /// Check for reductions in this ScopStmt.
1862 ///
1863 /// Iterate over all store memory accesses and check for valid binary reduction
1864 /// like chains. For all candidates we check if they have the same base address
1865 /// and there are no other accesses which overlap with them. The base address
1866 /// check rules out impossible reductions candidates early. The overlap check,
1867 /// together with the "only one user" check in collectCandiateReductionLoads,
1868 /// guarantees that none of the intermediate results will escape during
1869 /// execution of the loop nest. We basically check here that no other memory
1870 /// access can access the same memory as the potential reduction.
1871 void ScopStmt::checkForReductions() {
1872   SmallVector<MemoryAccess *, 2> Loads;
1873   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1874
1875   // First collect candidate load-store reduction chains by iterating over all
1876   // stores and collecting possible reduction loads.
1877   for (MemoryAccess *StoreMA : MemAccs) {
1878     if (StoreMA->isRead())
1879       continue;
1880
1881     Loads.clear();
1882     collectCandiateReductionLoads(StoreMA, Loads);
1883     for (MemoryAccess *LoadMA : Loads)
1884       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1885   }
1886
1887   // Then check each possible candidate pair.
1888   for (const auto &CandidatePair : Candidates) {
1889     bool Valid = true;
1890     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1891     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1892
1893     // Skip those with obviously unequal base addresses.
1894     if (!LoadAccs.has_equal_space(StoreAccs)) {
1895       continue;
1896     }
1897
1898     // And check if the remaining for overlap with other memory accesses.
1899     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1900     AllAccsRel = AllAccsRel.intersect_domain(getDomain());
1901     isl::set AllAccs = AllAccsRel.range();
1902
1903     for (MemoryAccess *MA : MemAccs) {
1904       if (MA == CandidatePair.first || MA == CandidatePair.second)
1905         continue;
1906
1907       isl::map AccRel = MA->getAccessRelation().intersect_domain(getDomain());
1908       isl::set Accs = AccRel.range();
1909
1910       if (AllAccs.has_equal_space(Accs)) {
1911         isl::set OverlapAccs = Accs.intersect(AllAccs);
1912         Valid = Valid && OverlapAccs.is_empty();
1913       }
1914     }
1915
1916     if (!Valid)
1917       continue;
1918
1919     const LoadInst *Load =
1920         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1921     MemoryAccess::ReductionType RT =
1922         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1923
1924     // If no overlapping access was found we mark the load and store as
1925     // reduction like.
1926     CandidatePair.first->markAsReductionLike(RT);
1927     CandidatePair.second->markAsReductionLike(RT);
1928   }
1929 }
1930
1931 std::string ScopStmt::getDomainStr() const { return Domain.to_str(); }
1932
1933 std::string ScopStmt::getScheduleStr() const {
1934   auto *S = getSchedule().release();
1935   if (!S)
1936     return {};
1937   auto Str = stringFromIslObj(S);
1938   isl_map_free(S);
1939   return Str;
1940 }
1941
1942 void ScopStmt::setInvalidDomain(isl::set ID) { InvalidDomain = ID; }
1943
1944 BasicBlock *ScopStmt::getEntryBlock() const {
1945   if (isBlockStmt())
1946     return getBasicBlock();
1947   return getRegion()->getEntry();
1948 }
1949
1950 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1951
1952 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1953
1954 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1955   return NestLoops[Dimension];
1956 }
1957
1958 isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1959
1960 isl::set ScopStmt::getDomain() const { return Domain; }
1961
1962 isl::space ScopStmt::getDomainSpace() const { return Domain.get_space(); }
1963
1964 isl::id ScopStmt::getDomainId() const { return Domain.get_tuple_id(); }
1965
1966 void ScopStmt::printInstructions(raw_ostream &OS) const {
1967   OS << "Instructions {\n";
1968
1969   for (Instruction *Inst : Instructions)
1970     OS.indent(16) << *Inst << "\n";
1971
1972   OS.indent(12) << "}\n";
1973 }
1974
1975 void ScopStmt::print(raw_ostream &OS, bool PrintInstructions) const {
1976   OS << "\t" << getBaseName() << "\n";
1977   OS.indent(12) << "Domain :=\n";
1978
1979   if (Domain) {
1980     OS.indent(16) << getDomainStr() << ";\n";
1981   } else
1982     OS.indent(16) << "n/a\n";
1983
1984   OS.indent(12) << "Schedule :=\n";
1985
1986   if (Domain) {
1987     OS.indent(16) << getScheduleStr() << ";\n";
1988   } else
1989     OS.indent(16) << "n/a\n";
1990
1991   for (MemoryAccess *Access : MemAccs)
1992     Access->print(OS);
1993
1994   if (PrintInstructions && isBlockStmt())
1995     printInstructions(OS.indent(12));
1996 }
1997
1998 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1999 LLVM_DUMP_METHOD void ScopStmt::dump() const { print(dbgs(), true); }
2000 #endif
2001
2002 void ScopStmt::removeAccessData(MemoryAccess *MA) {
2003   if (MA->isRead() && MA->isOriginalValueKind()) {
2004     bool Found = ValueReads.erase(MA->getAccessValue());
2005     (void)Found;
2006     assert(Found && "Expected access data not found");
2007   }
2008   if (MA->isWrite() && MA->isOriginalValueKind()) {
2009     bool Found = ValueWrites.erase(cast<Instruction>(MA->getAccessValue()));
2010     (void)Found;
2011     assert(Found && "Expected access data not found");
2012   }
2013   if (MA->isWrite() && MA->isOriginalAnyPHIKind()) {
2014     bool Found = PHIWrites.erase(cast<PHINode>(MA->getAccessInstruction()));
2015     (void)Found;
2016     assert(Found && "Expected access data not found");
2017   }
2018   if (MA->isRead() && MA->isOriginalAnyPHIKind()) {
2019     bool Found = PHIReads.erase(cast<PHINode>(MA->getAccessInstruction()));
2020     (void)Found;
2021     assert(Found && "Expected access data not found");
2022   }
2023 }
2024
2025 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
2026   // Remove the memory accesses from this statement together with all scalar
2027   // accesses that were caused by it. MemoryKind::Value READs have no access
2028   // instruction, hence would not be removed by this function. However, it is
2029   // only used for invariant LoadInst accesses, its arguments are always affine,
2030   // hence synthesizable, and therefore there are no MemoryKind::Value READ
2031   // accesses to be removed.
2032   auto Predicate = [&](MemoryAccess *Acc) {
2033     return Acc->getAccessInstruction() == MA->getAccessInstruction();
2034   };
2035   for (auto *MA : MemAccs) {
2036     if (Predicate(MA)) {
2037       removeAccessData(MA);
2038       Parent.removeAccessData(MA);
2039     }
2040   }
2041   MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
2042                 MemAccs.end());
2043   InstructionToAccess.erase(MA->getAccessInstruction());
2044 }
2045
2046 void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) {
2047   auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
2048   assert(MAIt != MemAccs.end());
2049   MemAccs.erase(MAIt);
2050
2051   removeAccessData(MA);
2052   Parent.removeAccessData(MA);
2053
2054   auto It = InstructionToAccess.find(MA->getAccessInstruction());
2055   if (It != InstructionToAccess.end()) {
2056     It->second.remove(MA);
2057     if (It->second.empty())
2058       InstructionToAccess.erase(MA->getAccessInstruction());
2059   }
2060 }
2061
2062 MemoryAccess *ScopStmt::ensureValueRead(Value *V) {
2063   MemoryAccess *Access = lookupInputAccessOf(V);
2064   if (Access)
2065     return Access;
2066
2067   ScopArrayInfo *SAI =
2068       Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value);
2069   Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(),
2070                             true, {}, {}, V, MemoryKind::Value);
2071   Parent.addAccessFunction(Access);
2072   Access->buildAccessRelation(SAI);
2073   addAccess(Access);
2074   Parent.addAccessData(Access);
2075   return Access;
2076 }
2077
2078 raw_ostream &polly::operator<<(raw_ostream &OS, const ScopStmt &S) {
2079   S.print(OS, PollyPrintInstructions);
2080   return OS;
2081 }
2082
2083 //===----------------------------------------------------------------------===//
2084 /// Scop class implement
2085
2086 void Scop::setContext(__isl_take isl_set *NewContext) {
2087   NewContext = isl_set_align_params(NewContext, isl_set_get_space(Context));
2088   isl_set_free(Context);
2089   Context = NewContext;
2090 }
2091
2092 namespace {
2093
2094 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
2095 struct SCEVSensitiveParameterRewriter
2096     : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
2097   const ValueToValueMap &VMap;
2098
2099 public:
2100   SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap,
2101                                  ScalarEvolution &SE)
2102       : SCEVRewriteVisitor(SE), VMap(VMap) {}
2103
2104   static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
2105                              const ValueToValueMap &VMap) {
2106     SCEVSensitiveParameterRewriter SSPR(VMap, SE);
2107     return SSPR.visit(E);
2108   }
2109
2110   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
2111     auto *Start = visit(E->getStart());
2112     auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
2113                                     visit(E->getStepRecurrence(SE)),
2114                                     E->getLoop(), SCEV::FlagAnyWrap);
2115     return SE.getAddExpr(Start, AddRec);
2116   }
2117
2118   const SCEV *visitUnknown(const SCEVUnknown *E) {
2119     if (auto *NewValue = VMap.lookup(E->getValue()))
2120       return SE.getUnknown(NewValue);
2121     return E;
2122   }
2123 };
2124
2125 /// Check whether we should remap a SCEV expression.
2126 struct SCEVFindInsideScop : public SCEVTraversal<SCEVFindInsideScop> {
2127   const ValueToValueMap &VMap;
2128   bool FoundInside = false;
2129   const Scop *S;
2130
2131 public:
2132   SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE,
2133                      const Scop *S)
2134       : SCEVTraversal(*this), VMap(VMap), S(S) {}
2135
2136   static bool hasVariant(const SCEV *E, ScalarEvolution &SE,
2137                          const ValueToValueMap &VMap, const Scop *S) {
2138     SCEVFindInsideScop SFIS(VMap, SE, S);
2139     SFIS.visitAll(E);
2140     return SFIS.FoundInside;
2141   }
2142
2143   bool follow(const SCEV *E) {
2144     if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E)) {
2145       FoundInside |= S->getRegion().contains(AddRec->getLoop());
2146     } else if (auto *Unknown = dyn_cast<SCEVUnknown>(E)) {
2147       if (Instruction *I = dyn_cast<Instruction>(Unknown->getValue()))
2148         FoundInside |= S->getRegion().contains(I) && !VMap.count(I);
2149     }
2150     return !FoundInside;
2151   }
2152
2153   bool isDone() { return FoundInside; }
2154 };
2155
2156 } // end anonymous namespace
2157
2158 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const {
2159   // Check whether it makes sense to rewrite the SCEV.  (ScalarEvolution
2160   // doesn't like addition between an AddRec and an expression that
2161   // doesn't have a dominance relationship with it.)
2162   if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this))
2163     return E;
2164
2165   // Rewrite SCEV.
2166   return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
2167 }
2168
2169 // This table of function names is used to translate parameter names in more
2170 // human-readable names. This makes it easier to interpret Polly analysis
2171 // results.
2172 StringMap<std::string> KnownNames = {
2173     {"_Z13get_global_idj", "global_id"},
2174     {"_Z12get_local_idj", "local_id"},
2175     {"_Z15get_global_sizej", "global_size"},
2176     {"_Z14get_local_sizej", "local_size"},
2177     {"_Z12get_work_dimv", "work_dim"},
2178     {"_Z17get_global_offsetj", "global_offset"},
2179     {"_Z12get_group_idj", "group_id"},
2180     {"_Z14get_num_groupsj", "num_groups"},
2181 };
2182
2183 static std::string getCallParamName(CallInst *Call) {
2184   std::string Result;
2185   raw_string_ostream OS(Result);
2186   std::string Name = Call->getCalledFunction()->getName();
2187
2188   auto Iterator = KnownNames.find(Name);
2189   if (Iterator != KnownNames.end())
2190     Name = "__" + Iterator->getValue();
2191   OS << Name;
2192   for (auto &Operand : Call->arg_operands()) {
2193     ConstantInt *Op = cast<ConstantInt>(&Operand);
2194     OS << "_" << Op->getValue();
2195   }
2196   OS.flush();
2197   return Result;
2198 }
2199
2200 void Scop::createParameterId(const SCEV *Parameter) {
2201   assert(Parameters.count(Parameter));
2202   assert(!ParameterIds.count(Parameter));
2203
2204   std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
2205
2206   if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
2207     Value *Val = ValueParameter->getValue();
2208     CallInst *Call = dyn_cast<CallInst>(Val);
2209
2210     if (Call && isConstCall(Call)) {
2211       ParameterName = getCallParamName(Call);
2212     } else if (UseInstructionNames) {
2213       // If this parameter references a specific Value and this value has a name
2214       // we use this name as it is likely to be unique and more useful than just
2215       // a number.
2216       if (Val->hasName())
2217         ParameterName = Val->getName();
2218       else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) {
2219         auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
2220         if (LoadOrigin->hasName()) {
2221           ParameterName += "_loaded_from_";
2222           ParameterName +=
2223               LI->getPointerOperand()->stripInBoundsOffsets()->getName();
2224         }
2225       }
2226     }
2227
2228     ParameterName = getIslCompatibleName("", ParameterName, "");
2229   }
2230
2231   isl::id Id = isl::id::alloc(getIslCtx(), ParameterName,
2232                               const_cast<void *>((const void *)Parameter));
2233   ParameterIds[Parameter] = Id;
2234 }
2235
2236 void Scop::addParams(const ParameterSetTy &NewParameters) {
2237   for (const SCEV *Parameter : NewParameters) {
2238     // Normalize the SCEV to get the representing element for an invariant load.
2239     Parameter = extractConstantFactor(Parameter, *SE).second;
2240     Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2241
2242     if (Parameters.insert(Parameter))
2243       createParameterId(Parameter);
2244   }
2245 }
2246
2247 isl::id Scop::getIdForParam(const SCEV *Parameter) const {
2248   // Normalize the SCEV to get the representing element for an invariant load.
2249   Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2250   return ParameterIds.lookup(Parameter);
2251 }
2252
2253 isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
2254   isl_set *DomainContext = isl_union_set_params(getDomains().release());
2255   return isl::manage(isl_set_intersect_params(C.release(), DomainContext));
2256 }
2257
2258 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
2259   return DT.dominates(BB, getEntry());
2260 }
2261
2262 void Scop::addUserAssumptions(
2263     AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
2264     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2265   for (auto &Assumption : AC.assumptions()) {
2266     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
2267     if (!CI || CI->getNumArgOperands() != 1)
2268       continue;
2269
2270     bool InScop = contains(CI);
2271     if (!InScop && !isDominatedBy(DT, CI->getParent()))
2272       continue;
2273
2274     auto *L = LI.getLoopFor(CI->getParent());
2275     auto *Val = CI->getArgOperand(0);
2276     ParameterSetTy DetectedParams;
2277     if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) {
2278       ORE.emit(
2279           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
2280           << "Non-affine user assumption ignored.");
2281       continue;
2282     }
2283
2284     // Collect all newly introduced parameters.
2285     ParameterSetTy NewParams;
2286     for (auto *Param : DetectedParams) {
2287       Param = extractConstantFactor(Param, *SE).second;
2288       Param = getRepresentingInvariantLoadSCEV(Param);
2289       if (Parameters.count(Param))
2290         continue;
2291       NewParams.insert(Param);
2292     }
2293
2294     SmallVector<isl_set *, 2> ConditionSets;
2295     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
2296     BasicBlock *BB = InScop ? CI->getParent() : getRegion().getEntry();
2297     auto *Dom = InScop ? DomainMap[BB].copy() : isl_set_copy(Context);
2298     assert(Dom && "Cannot propagate a nullptr.");
2299     bool Valid = buildConditionSets(*this, BB, Val, TI, L, Dom,
2300                                     InvalidDomainMap, ConditionSets);
2301     isl_set_free(Dom);
2302
2303     if (!Valid)
2304       continue;
2305
2306     isl_set *AssumptionCtx = nullptr;
2307     if (InScop) {
2308       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
2309       isl_set_free(ConditionSets[0]);
2310     } else {
2311       AssumptionCtx = isl_set_complement(ConditionSets[1]);
2312       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
2313     }
2314
2315     // Project out newly introduced parameters as they are not otherwise useful.
2316     if (!NewParams.empty()) {
2317       for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
2318         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2319         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2320         isl_id_free(Id);
2321
2322         if (!NewParams.count(Param))
2323           continue;
2324
2325         AssumptionCtx =
2326             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2327       }
2328     }
2329     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
2330              << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
2331     Context = isl_set_intersect(Context, AssumptionCtx);
2332   }
2333 }
2334
2335 void Scop::addUserContext() {
2336   if (UserContextStr.empty())
2337     return;
2338
2339   isl_set *UserContext =
2340       isl_set_read_from_str(getIslCtx(), UserContextStr.c_str());
2341   isl_space *Space = getParamSpace().release();
2342   if (isl_space_dim(Space, isl_dim_param) !=
2343       isl_set_dim(UserContext, isl_dim_param)) {
2344     auto SpaceStr = isl_space_to_str(Space);
2345     errs() << "Error: the context provided in -polly-context has not the same "
2346            << "number of dimensions than the computed context. Due to this "
2347            << "mismatch, the -polly-context option is ignored. Please provide "
2348            << "the context in the parameter space: " << SpaceStr << ".\n";
2349     free(SpaceStr);
2350     isl_set_free(UserContext);
2351     isl_space_free(Space);
2352     return;
2353   }
2354
2355   for (unsigned i = 0; i < isl_space_dim(Space, isl_dim_param); i++) {
2356     auto *NameContext = isl_set_get_dim_name(Context, isl_dim_param, i);
2357     auto *NameUserContext = isl_set_get_dim_name(UserContext, isl_dim_param, i);
2358
2359     if (strcmp(NameContext, NameUserContext) != 0) {
2360       auto SpaceStr = isl_space_to_str(Space);
2361       errs() << "Error: the name of dimension " << i
2362              << " provided in -polly-context "
2363              << "is '" << NameUserContext << "', but the name in the computed "
2364              << "context is '" << NameContext
2365              << "'. Due to this name mismatch, "
2366              << "the -polly-context option is ignored. Please provide "
2367              << "the context in the parameter space: " << SpaceStr << ".\n";
2368       free(SpaceStr);
2369       isl_set_free(UserContext);
2370       isl_space_free(Space);
2371       return;
2372     }
2373
2374     UserContext =
2375         isl_set_set_dim_id(UserContext, isl_dim_param, i,
2376                            isl_space_get_dim_id(Space, isl_dim_param, i));
2377   }
2378
2379   Context = isl_set_intersect(Context, UserContext);
2380   isl_space_free(Space);
2381 }
2382
2383 void Scop::buildInvariantEquivalenceClasses() {
2384   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2385
2386   const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2387   for (LoadInst *LInst : RIL) {
2388     const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2389
2390     Type *Ty = LInst->getType();
2391     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2392     if (ClassRep) {
2393       InvEquivClassVMap[LInst] = ClassRep;
2394       continue;
2395     }
2396
2397     ClassRep = LInst;
2398     InvariantEquivClasses.emplace_back(
2399         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2400   }
2401 }
2402
2403 void Scop::buildContext() {
2404   isl_space *Space = isl_space_params_alloc(getIslCtx(), 0);
2405   Context = isl_set_universe(isl_space_copy(Space));
2406   InvalidContext = isl_set_empty(isl_space_copy(Space));
2407   AssumedContext = isl_set_universe(Space);
2408 }
2409
2410 void Scop::addParameterBounds() {
2411   unsigned PDim = 0;
2412   for (auto *Parameter : Parameters) {
2413     ConstantRange SRange = SE->getSignedRange(Parameter);
2414     Context =
2415         addRangeBoundsToSet(give(Context), SRange, PDim++, isl::dim::param)
2416             .release();
2417   }
2418 }
2419
2420 static std::vector<isl::id> getFortranArrayIds(Scop::array_range Arrays) {
2421   std::vector<isl::id> OutermostSizeIds;
2422   for (auto Array : Arrays) {
2423     // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2424     // for its outermost dimension. Fortran arrays will have this since the
2425     // outermost dimension size can be picked up from their runtime description.
2426     // TODO: actually need to check if it has a FAD, but for now this works.
2427     if (Array->getNumberOfDimensions() > 0) {
2428       isl::pw_aff PwAff = Array->getDimensionSizePw(0);
2429       if (!PwAff)
2430         continue;
2431
2432       isl::id Id =
2433           isl::manage(isl_pw_aff_get_dim_id(PwAff.get(), isl_dim_param, 0));
2434       assert(!Id.is_null() &&
2435              "Invalid Id for PwAff expression in Fortran array");
2436       Id.dump();
2437       OutermostSizeIds.push_back(Id);
2438     }
2439   }
2440   return OutermostSizeIds;
2441 }
2442
2443 // The FORTRAN array size parameters are known to be non-negative.
2444 static isl_set *boundFortranArrayParams(__isl_give isl_set *Context,
2445                                         Scop::array_range Arrays) {
2446   std::vector<isl::id> OutermostSizeIds;
2447   OutermostSizeIds = getFortranArrayIds(Arrays);
2448
2449   for (isl::id Id : OutermostSizeIds) {
2450     int dim = isl_set_find_dim_by_id(Context, isl_dim_param, Id.get());
2451     Context = isl_set_lower_bound_si(Context, isl_dim_param, dim, 0);
2452   }
2453
2454   return Context;
2455 }
2456
2457 void Scop::realignParams() {
2458   if (PollyIgnoreParamBounds)
2459     return;
2460
2461   // Add all parameters into a common model.
2462   isl::space Space = getFullParamSpace();
2463
2464   // Align the parameters of all data structures to the model.
2465   Context = isl_set_align_params(Context, Space.copy());
2466
2467   // Bound the size of the fortran array dimensions.
2468   Context = boundFortranArrayParams(Context, arrays());
2469
2470   // As all parameters are known add bounds to them.
2471   addParameterBounds();
2472
2473   for (ScopStmt &Stmt : *this)
2474     Stmt.realignParams();
2475   // Simplify the schedule according to the context too.
2476   Schedule = isl_schedule_gist_domain_params(Schedule, getContext().release());
2477 }
2478
2479 static __isl_give isl_set *
2480 simplifyAssumptionContext(__isl_take isl_set *AssumptionContext,
2481                           const Scop &S) {
2482   // If we have modeled all blocks in the SCoP that have side effects we can
2483   // simplify the context with the constraints that are needed for anything to
2484   // be executed at all. However, if we have error blocks in the SCoP we already
2485   // assumed some parameter combinations cannot occur and removed them from the
2486   // domains, thus we cannot use the remaining domain to simplify the
2487   // assumptions.
2488   if (!S.hasErrorBlock()) {
2489     isl_set *DomainParameters = isl_union_set_params(S.getDomains().release());
2490     AssumptionContext =
2491         isl_set_gist_params(AssumptionContext, DomainParameters);
2492   }
2493
2494   AssumptionContext =
2495       isl_set_gist_params(AssumptionContext, S.getContext().release());
2496   return AssumptionContext;
2497 }
2498
2499 void Scop::simplifyContexts() {
2500   // The parameter constraints of the iteration domains give us a set of
2501   // constraints that need to hold for all cases where at least a single
2502   // statement iteration is executed in the whole scop. We now simplify the
2503   // assumed context under the assumption that such constraints hold and at
2504   // least a single statement iteration is executed. For cases where no
2505   // statement instances are executed, the assumptions we have taken about
2506   // the executed code do not matter and can be changed.
2507   //
2508   // WARNING: This only holds if the assumptions we have taken do not reduce
2509   //          the set of statement instances that are executed. Otherwise we
2510   //          may run into a case where the iteration domains suggest that
2511   //          for a certain set of parameter constraints no code is executed,
2512   //          but in the original program some computation would have been
2513   //          performed. In such a case, modifying the run-time conditions and
2514   //          possibly influencing the run-time check may cause certain scops
2515   //          to not be executed.
2516   //
2517   // Example:
2518   //
2519   //   When delinearizing the following code:
2520   //
2521   //     for (long i = 0; i < 100; i++)
2522   //       for (long j = 0; j < m; j++)
2523   //         A[i+p][j] = 1.0;
2524   //
2525   //   we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2526   //   otherwise we would access out of bound data. Now, knowing that code is
2527   //   only executed for the case m >= 0, it is sufficient to assume p >= 0.
2528   AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2529   InvalidContext =
2530       isl_set_align_params(InvalidContext, getParamSpace().release());
2531 }
2532
2533 /// Add the minimal/maximal access in @p Set to @p User.
2534 static isl::stat
2535 buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2536   isl::pw_multi_aff MinPMA, MaxPMA;
2537   isl::pw_aff LastDimAff;
2538   isl::aff OneAff;
2539   unsigned Pos;
2540   isl::ctx Ctx = Set.get_ctx();
2541
2542   Set = Set.remove_divs();
2543
2544   if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain)
2545     return isl::stat::error;
2546
2547   // Restrict the number of parameters involved in the access as the lexmin/
2548   // lexmax computation will take too long if this number is high.
2549   //
2550   // Experiments with a simple test case using an i7 4800MQ:
2551   //
2552   //  #Parameters involved | Time (in sec)
2553   //            6          |     0.01
2554   //            7          |     0.04
2555   //            8          |     0.12
2556   //            9          |     0.40
2557   //           10          |     1.54
2558   //           11          |     6.78
2559   //           12          |    30.38
2560   //
2561   if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2562     unsigned InvolvedParams = 0;
2563     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
2564       if (Set.involves_dims(isl::dim::param, u, 1))
2565         InvolvedParams++;
2566
2567     if (InvolvedParams > RunTimeChecksMaxParameters)
2568       return isl::stat::error;
2569   }
2570
2571   if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts)
2572     return isl::stat::error;
2573
2574   MinPMA = Set.lexmin_pw_multi_aff();
2575   MaxPMA = Set.lexmax_pw_multi_aff();
2576
2577   if (isl_ctx_last_error(Ctx.get()) == isl_error_quota)
2578     return isl::stat::error;
2579
2580   MinPMA = MinPMA.coalesce();
2581   MaxPMA = MaxPMA.coalesce();
2582
2583   // Adjust the last dimension of the maximal access by one as we want to
2584   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2585   // we test during code generation might now point after the end of the
2586   // allocated array but we will never dereference it anyway.
2587   assert(MaxPMA.dim(isl::dim::out) && "Assumed at least one output dimension");
2588   Pos = MaxPMA.dim(isl::dim::out) - 1;
2589   LastDimAff = MaxPMA.get_pw_aff(Pos);
2590   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2591   OneAff = OneAff.add_constant_si(1);
2592   LastDimAff = LastDimAff.add(OneAff);
2593   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2594
2595   MinMaxAccesses.push_back(std::make_pair(MinPMA.copy(), MaxPMA.copy()));
2596
2597   return isl::stat::ok;
2598 }
2599
2600 static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) {
2601   isl_set *Domain = MA->getStatement()->getDomain().release();
2602   Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain));
2603   return isl_set_reset_tuple_id(Domain);
2604 }
2605
2606 /// Wrapper function to calculate minimal/maximal accesses to each array.
2607 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2608                                   Scop::MinMaxVectorTy &MinMaxAccesses) {
2609   MinMaxAccesses.reserve(AliasGroup.size());
2610
2611   isl::union_set Domains = S.getDomains();
2612   isl::union_map Accesses = isl::union_map::empty(S.getParamSpace());
2613
2614   for (MemoryAccess *MA : AliasGroup)
2615     Accesses = Accesses.add_map(give(MA->getAccessRelation().release()));
2616
2617   Accesses = Accesses.intersect_domain(Domains);
2618   isl::union_set Locations = Accesses.range();
2619   Locations = Locations.coalesce();
2620   Locations = Locations.detect_equalities();
2621
2622   auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat {
2623     return buildMinMaxAccess(Set, MinMaxAccesses, S);
2624   };
2625   return Locations.foreach_set(Lambda) == isl::stat::ok;
2626 }
2627
2628 /// Helper to treat non-affine regions and basic blocks the same.
2629 ///
2630 ///{
2631
2632 /// Return the block that is the representing block for @p RN.
2633 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2634   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2635                            : RN->getNodeAs<BasicBlock>();
2636 }
2637
2638 /// Return the @p idx'th block that is executed after @p RN.
2639 static inline BasicBlock *
2640 getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) {
2641   if (RN->isSubRegion()) {
2642     assert(idx == 0);
2643     return RN->getNodeAs<Region>()->getExit();
2644   }
2645   return TI->getSuccessor(idx);
2646 }
2647
2648 /// Return the smallest loop surrounding @p RN.
2649 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2650   if (!RN->isSubRegion()) {
2651     BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2652     Loop *L = LI.getLoopFor(BB);
2653
2654     // Unreachable statements are not considered to belong to a LLVM loop, as
2655     // they are not part of an actual loop in the control flow graph.
2656     // Nevertheless, we handle certain unreachable statements that are common
2657     // when modeling run-time bounds checks as being part of the loop to be
2658     // able to model them and to later eliminate the run-time bounds checks.
2659     //
2660     // Specifically, for basic blocks that terminate in an unreachable and
2661     // where the immediate predecessor is part of a loop, we assume these
2662     // basic blocks belong to the loop the predecessor belongs to. This
2663     // allows us to model the following code.
2664     //
2665     // for (i = 0; i < N; i++) {
2666     //   if (i > 1024)
2667     //     abort();            <- this abort might be translated to an
2668     //                            unreachable
2669     //
2670     //   A[i] = ...
2671     // }
2672     if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
2673       L = LI.getLoopFor(BB->getPrevNode());
2674     return L;
2675   }
2676
2677   Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2678   Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2679   while (L && NonAffineSubRegion->contains(L))
2680     L = L->getParentLoop();
2681   return L;
2682 }
2683
2684 /// Get the number of blocks in @p L.
2685 ///
2686 /// The number of blocks in a loop are the number of basic blocks actually
2687 /// belonging to the loop, as well as all single basic blocks that the loop
2688 /// exits to and which terminate in an unreachable instruction. We do not
2689 /// allow such basic blocks in the exit of a scop, hence they belong to the
2690 /// scop and represent run-time conditions which we want to model and
2691 /// subsequently speculate away.
2692 ///
2693 /// @see getRegionNodeLoop for additional details.
2694 unsigned getNumBlocksInLoop(Loop *L) {
2695   unsigned NumBlocks = L->getNumBlocks();
2696   SmallVector<BasicBlock *, 4> ExitBlocks;
2697   L->getExitBlocks(ExitBlocks);
2698
2699   for (auto ExitBlock : ExitBlocks) {
2700     if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2701       NumBlocks++;
2702   }
2703   return NumBlocks;
2704 }
2705
2706 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2707   if (!RN->isSubRegion())
2708     return 1;
2709
2710   Region *R = RN->getNodeAs<Region>();
2711   return std::distance(R->block_begin(), R->block_end());
2712 }
2713
2714 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2715                                const DominatorTree &DT) {
2716   if (!RN->isSubRegion())
2717     return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2718   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2719     if (isErrorBlock(*BB, R, LI, DT))
2720       return true;
2721   return false;
2722 }
2723
2724 ///}
2725
2726 static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain,
2727                                                  unsigned Dim, Loop *L) {
2728   Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1);
2729   isl_id *DimId =
2730       isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L));
2731   return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId);
2732 }
2733
2734 isl::set Scop::getDomainConditions(const ScopStmt *Stmt) const {
2735   return getDomainConditions(Stmt->getEntryBlock());
2736 }
2737
2738 isl::set Scop::getDomainConditions(BasicBlock *BB) const {
2739   auto DIt = DomainMap.find(BB);
2740   if (DIt != DomainMap.end())
2741     return DIt->getSecond();
2742
2743   auto &RI = *R.getRegionInfo();
2744   auto *BBR = RI.getRegionFor(BB);
2745   while (BBR->getEntry() == BB)
2746     BBR = BBR->getParent();
2747   return getDomainConditions(BBR->getEntry());
2748 }
2749
2750 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI,
2751                         DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2752   bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2753   auto *EntryBB = R->getEntry();
2754   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
2755   int LD = getRelativeLoopDepth(L);
2756   auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD + 1));
2757
2758   while (LD-- >= 0) {
2759     S = addDomainDimId(S, LD + 1, L);
2760     L = L->getParentLoop();
2761   }
2762
2763   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
2764   DomainMap[EntryBB] = isl::manage(S);
2765
2766   if (IsOnlyNonAffineRegion)
2767     return !containsErrorBlock(R->getNode(), *R, LI, DT);
2768
2769   if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap))
2770     return false;
2771
2772   if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap))
2773     return false;
2774
2775   // Error blocks and blocks dominated by them have been assumed to never be
2776   // executed. Representing them in the Scop does not add any value. In fact,
2777   // it is likely to cause issues during construction of the ScopStmts. The
2778   // contents of error blocks have not been verified to be expressible and
2779   // will cause problems when building up a ScopStmt for them.
2780   // Furthermore, basic blocks dominated by error blocks may reference
2781   // instructions in the error block which, if the error block is not modeled,
2782   // can themselves not be constructed properly. To this end we will replace
2783   // the domains of error blocks and those only reachable via error blocks
2784   // with an empty set. Additionally, we will record for each block under which
2785   // parameter combination it would be reached via an error block in its
2786   // InvalidDomain. This information is needed during load hoisting.
2787   if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap))
2788     return false;
2789
2790   return true;
2791 }
2792
2793 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2794 ///        to be compatible to domains constructed for loop @p NewL.
2795 ///
2796 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2797 /// edge from @p OldL to @p NewL.
2798 static __isl_give isl_set *adjustDomainDimensions(Scop &S,
2799                                                   __isl_take isl_set *Dom,
2800                                                   Loop *OldL, Loop *NewL) {
2801   // If the loops are the same there is nothing to do.
2802   if (NewL == OldL)
2803     return Dom;
2804
2805   int OldDepth = S.getRelativeLoopDepth(OldL);
2806   int NewDepth = S.getRelativeLoopDepth(NewL);
2807   // If both loops are non-affine loops there is nothing to do.
2808   if (OldDepth == -1 && NewDepth == -1)
2809     return Dom;
2810
2811   // Distinguish three cases:
2812   //   1) The depth is the same but the loops are not.
2813   //      => One loop was left one was entered.
2814   //   2) The depth increased from OldL to NewL.
2815   //      => One loop was entered, none was left.
2816   //   3) The depth decreased from OldL to NewL.
2817   //      => Loops were left were difference of the depths defines how many.
2818   if (OldDepth == NewDepth) {
2819     assert(OldL->getParentLoop() == NewL->getParentLoop());
2820     Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1);
2821     Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2822     Dom = addDomainDimId(Dom, NewDepth, NewL);
2823   } else if (OldDepth < NewDepth) {
2824     assert(OldDepth + 1 == NewDepth);
2825     auto &R = S.getRegion();
2826     (void)R;
2827     assert(NewL->getParentLoop() == OldL ||
2828            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2829     Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2830     Dom = addDomainDimId(Dom, NewDepth, NewL);
2831   } else {
2832     assert(OldDepth > NewDepth);
2833     int Diff = OldDepth - NewDepth;
2834     int NumDim = isl_set_n_dim(Dom);
2835     assert(NumDim >= Diff);
2836     Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff);
2837   }
2838
2839   return Dom;
2840 }
2841
2842 bool Scop::propagateInvalidStmtDomains(
2843     Region *R, DominatorTree &DT, LoopInfo &LI,
2844     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2845   ReversePostOrderTraversal<Region *> RTraversal(R);
2846   for (auto *RN : RTraversal) {
2847
2848     // Recurse for affine subregions but go on for basic blocks and non-affine
2849     // subregions.
2850     if (RN->isSubRegion()) {
2851       Region *SubRegion = RN->getNodeAs<Region>();
2852       if (!isNonAffineSubRegion(SubRegion)) {
2853         propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap);
2854         continue;
2855       }
2856     }
2857
2858     bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2859     BasicBlock *BB = getRegionNodeBasicBlock(RN);
2860     isl::set &Domain = DomainMap[BB];
2861     assert(Domain && "Cannot propagate a nullptr");
2862
2863     isl::set InvalidDomain = InvalidDomainMap[BB];
2864
2865     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
2866
2867     if (!IsInvalidBlock) {
2868       InvalidDomain = InvalidDomain.intersect(Domain);
2869     } else {
2870       InvalidDomain = Domain;
2871       isl::set DomPar = Domain.params();
2872       recordAssumption(ERRORBLOCK, DomPar.release(),
2873                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
2874       Domain = nullptr;
2875     }
2876
2877     if (InvalidDomain.is_empty()) {
2878       InvalidDomainMap[BB] = InvalidDomain;
2879       continue;
2880     }
2881
2882     auto *BBLoop = getRegionNodeLoop(RN, LI);
2883     auto *TI = BB->getTerminator();
2884     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
2885     for (unsigned u = 0; u < NumSuccs; u++) {
2886       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2887
2888       // Skip successors outside the SCoP.
2889       if (!contains(SuccBB))
2890         continue;
2891
2892       // Skip backedges.
2893       if (DT.dominates(SuccBB, BB))
2894         continue;
2895
2896       Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2897
2898       auto *AdjustedInvalidDomain = adjustDomainDimensions(
2899           *this, InvalidDomain.copy(), BBLoop, SuccBBLoop);
2900
2901       auto *SuccInvalidDomain = InvalidDomainMap[SuccBB].copy();
2902       SuccInvalidDomain =
2903           isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain);
2904       SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain);
2905       unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain);
2906
2907       InvalidDomainMap[SuccBB] = isl::manage(SuccInvalidDomain);
2908
2909       // Check if the maximal number of domain disjunctions was reached.
2910       // In case this happens we will bail.
2911       if (NumConjucts < MaxDisjunctsInDomain)
2912         continue;
2913
2914       InvalidDomainMap.erase(BB);
2915       invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
2916       return false;
2917     }
2918
2919     InvalidDomainMap[BB] = InvalidDomain;
2920   }
2921
2922   return true;
2923 }
2924
2925 void Scop::propagateDomainConstraintsToRegionExit(
2926     BasicBlock *BB, Loop *BBLoop,
2927     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI,
2928     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2929   // Check if the block @p BB is the entry of a region. If so we propagate it's
2930   // domain to the exit block of the region. Otherwise we are done.
2931   auto *RI = R.getRegionInfo();
2932   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
2933   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
2934   if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB))
2935     return;
2936
2937   // Do not propagate the domain if there is a loop backedge inside the region
2938   // that would prevent the exit block from being executed.
2939   auto *L = BBLoop;
2940   while (L && contains(L)) {
2941     SmallVector<BasicBlock *, 4> LatchBBs;
2942     BBLoop->getLoopLatches(LatchBBs);
2943     for (auto *LatchBB : LatchBBs)
2944       if (BB != LatchBB && BBReg->contains(LatchBB))
2945         return;
2946     L = L->getParentLoop();
2947   }
2948
2949   isl::set Domain = DomainMap[BB];
2950   assert(Domain && "Cannot propagate a nullptr");
2951
2952   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops());
2953
2954   // Since the dimensions of @p BB and @p ExitBB might be different we have to
2955   // adjust the domain before we can propagate it.
2956   isl::set AdjustedDomain = isl::manage(
2957       adjustDomainDimensions(*this, Domain.copy(), BBLoop, ExitBBLoop));
2958   isl::set &ExitDomain = DomainMap[ExitBB];
2959
2960   // If the exit domain is not yet created we set it otherwise we "add" the
2961   // current domain.
2962   ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
2963
2964   // Initialize the invalid domain.
2965   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
2966
2967   FinishedExitBlocks.insert(ExitBB);
2968 }
2969
2970 bool Scop::buildDomainsWithBranchConstraints(
2971     Region *R, DominatorTree &DT, LoopInfo &LI,
2972     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2973   // To create the domain for each block in R we iterate over all blocks and
2974   // subregions in R and propagate the conditions under which the current region
2975   // element is executed. To this end we iterate in reverse post order over R as
2976   // it ensures that we first visit all predecessors of a region node (either a
2977   // basic block or a subregion) before we visit the region node itself.
2978   // Initially, only the domain for the SCoP region entry block is set and from
2979   // there we propagate the current domain to all successors, however we add the
2980   // condition that the successor is actually executed next.
2981   // As we are only interested in non-loop carried constraints here we can
2982   // simply skip loop back edges.
2983
2984   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2985   ReversePostOrderTraversal<Region *> RTraversal(R);
2986   for (auto *RN : RTraversal) {
2987     // Recurse for affine subregions but go on for basic blocks and non-affine
2988     // subregions.
2989     if (RN->isSubRegion()) {
2990       Region *SubRegion = RN->getNodeAs<Region>();
2991       if (!isNonAffineSubRegion(SubRegion)) {
2992         if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI,
2993                                                InvalidDomainMap))
2994           return false;
2995         continue;
2996       }
2997     }
2998
2999     if (containsErrorBlock(RN, getRegion(), LI, DT))
3000       HasErrorBlock = true;
3001
3002     BasicBlock *BB = getRegionNodeBasicBlock(RN);
3003     TerminatorInst *TI = BB->getTerminator();
3004
3005     if (isa<UnreachableInst>(TI))
3006       continue;
3007
3008     isl::set Domain = DomainMap.lookup(BB);
3009     if (!Domain)
3010       continue;
3011     MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get()));
3012
3013     auto *BBLoop = getRegionNodeLoop(RN, LI);
3014     // Propagate the domain from BB directly to blocks that have a superset
3015     // domain, at the moment only region exit nodes of regions that start in BB.
3016     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI,
3017                                            InvalidDomainMap);
3018
3019     // If all successors of BB have been set a domain through the propagation
3020     // above we do not need to build condition sets but can just skip this
3021     // block. However, it is important to note that this is a local property
3022     // with regards to the region @p R. To this end FinishedExitBlocks is a
3023     // local variable.
3024     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
3025       return FinishedExitBlocks.count(SuccBB);
3026     };
3027     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
3028       continue;
3029
3030     // Build the condition sets for the successor nodes of the current region
3031     // node. If it is a non-affine subregion we will always execute the single
3032     // exit node, hence the single entry node domain is the condition set. For
3033     // basic blocks we use the helper function buildConditionSets.
3034     SmallVector<isl_set *, 8> ConditionSets;
3035     if (RN->isSubRegion())
3036       ConditionSets.push_back(Domain.copy());
3037     else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(),
3038                                  InvalidDomainMap, ConditionSets))
3039       return false;
3040
3041     // Now iterate over the successors and set their initial domain based on
3042     // their condition set. We skip back edges here and have to be careful when
3043     // we leave a loop not to keep constraints over a dimension that doesn't
3044     // exist anymore.
3045     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
3046     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
3047       isl::set CondSet = isl::manage(ConditionSets[u]);
3048       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
3049
3050       // Skip blocks outside the region.
3051       if (!contains(SuccBB))
3052         continue;
3053
3054       // If we propagate the domain of some block to "SuccBB" we do not have to
3055       // adjust the domain.
3056       if (FinishedExitBlocks.count(SuccBB))
3057         continue;
3058
3059       // Skip back edges.
3060       if (DT.dominates(SuccBB, BB))
3061         continue;
3062
3063       Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
3064
3065       CondSet = isl::manage(
3066           adjustDomainDimensions(*this, CondSet.copy(), BBLoop, SuccBBLoop));
3067
3068       // Set the domain for the successor or merge it with an existing domain in
3069       // case there are multiple paths (without loop back edges) to the
3070       // successor block.
3071       isl::set &SuccDomain = DomainMap[SuccBB];
3072
3073       if (SuccDomain) {
3074         SuccDomain = SuccDomain.unite(CondSet).coalesce();
3075       } else {
3076         // Initialize the invalid domain.
3077         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
3078         SuccDomain = CondSet;
3079       }
3080
3081       SuccDomain = SuccDomain.detect_equalities();
3082
3083       // Check if the maximal number of domain disjunctions was reached.
3084       // In case this happens we will clean up and bail.
3085       if (isl_set_n_basic_set(SuccDomain.get()) < MaxDisjunctsInDomain)
3086         continue;
3087
3088       invalidate(COMPLEXITY, DebugLoc());
3089       while (++u < ConditionSets.size())
3090         isl_set_free(ConditionSets[u]);
3091       return false;
3092     }
3093   }
3094
3095   return true;
3096 }
3097
3098 isl::set Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain,
3099                                                DominatorTree &DT,
3100                                                LoopInfo &LI) {
3101   // If @p BB is the ScopEntry we are done
3102   if (R.getEntry() == BB)
3103     return isl::set::universe(Domain.get_space());
3104
3105   // The region info of this function.
3106   auto &RI = *R.getRegionInfo();
3107
3108   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops());
3109
3110   // A domain to collect all predecessor domains, thus all conditions under
3111   // which the block is executed. To this end we start with the empty domain.
3112   isl::set PredDom = isl::set::empty(Domain.get_space());
3113
3114   // Set of regions of which the entry block domain has been propagated to BB.
3115   // all predecessors inside any of the regions can be skipped.
3116   SmallSet<Region *, 8> PropagatedRegions;
3117
3118   for (auto *PredBB : predecessors(BB)) {
3119     // Skip backedges.
3120     if (DT.dominates(BB, PredBB))
3121       continue;
3122
3123     // If the predecessor is in a region we used for propagation we can skip it.
3124     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
3125     if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
3126                     PredBBInRegion)) {
3127       continue;
3128     }
3129
3130     // Check if there is a valid region we can use for propagation, thus look
3131     // for a region that contains the predecessor and has @p BB as exit block.
3132     auto *PredR = RI.getRegionFor(PredBB);
3133     while (PredR->getExit() != BB && !PredR->contains(BB))
3134       PredR->getParent();
3135
3136     // If a valid region for propagation was found use the entry of that region
3137     // for propagation, otherwise the PredBB directly.
3138     if (PredR->getExit() == BB) {
3139       PredBB = PredR->getEntry();
3140       PropagatedRegions.insert(PredR);
3141     }
3142
3143     auto *PredBBDom = getDomainConditions(PredBB).release();
3144     Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops());
3145
3146     PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
3147
3148     PredDom = PredDom.unite(isl::manage(PredBBDom));
3149   }
3150
3151   return PredDom;
3152 }
3153
3154 bool Scop::propagateDomainConstraints(
3155     Region *R, DominatorTree &DT, LoopInfo &LI,
3156     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3157   // Iterate over the region R and propagate the domain constrains from the
3158   // predecessors to the current node. In contrast to the
3159   // buildDomainsWithBranchConstraints function, this one will pull the domain
3160   // information from the predecessors instead of pushing it to the successors.
3161   // Additionally, we assume the domains to be already present in the domain
3162   // map here. However, we iterate again in reverse post order so we know all
3163   // predecessors have been visited before a block or non-affine subregion is
3164   // visited.
3165
3166   ReversePostOrderTraversal<Region *> RTraversal(R);
3167   for (auto *RN : RTraversal) {
3168     // Recurse for affine subregions but go on for basic blocks and non-affine
3169     // subregions.
3170     if (RN->isSubRegion()) {
3171       Region *SubRegion = RN->getNodeAs<Region>();
3172       if (!isNonAffineSubRegion(SubRegion)) {
3173         if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap))
3174           return false;
3175         continue;
3176       }
3177     }
3178
3179     BasicBlock *BB = getRegionNodeBasicBlock(RN);
3180     isl::set &Domain = DomainMap[BB];
3181     assert(Domain);
3182
3183     // Under the union of all predecessor conditions we can reach this block.
3184     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI);
3185     Domain = Domain.intersect(PredDom).coalesce();
3186     Domain = Domain.align_params(getParamSpace());
3187
3188     Loop *BBLoop = getRegionNodeLoop(RN, LI);
3189     if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop))
3190       if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap))
3191         return false;
3192   }
3193
3194   return true;
3195 }
3196
3197 /// Create a map to map from a given iteration to a subsequent iteration.
3198 ///
3199 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3200 /// is incremented by one and all other dimensions are equal, e.g.,
3201 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3202 ///
3203 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3204 static __isl_give isl_map *
3205 createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) {
3206   auto *MapSpace = isl_space_map_from_set(SetSpace);
3207   auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace));
3208   for (unsigned u = 0; u < isl_map_dim(NextIterationMap, isl_dim_in); u++)
3209     if (u != Dim)
3210       NextIterationMap =
3211           isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u);
3212   auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace));
3213   C = isl_constraint_set_constant_si(C, 1);
3214   C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1);
3215   C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1);
3216   NextIterationMap = isl_map_add_constraint(NextIterationMap, C);
3217   return NextIterationMap;
3218 }
3219
3220 bool Scop::addLoopBoundsToHeaderDomain(
3221     Loop *L, LoopInfo &LI, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3222   int LoopDepth = getRelativeLoopDepth(L);
3223   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
3224
3225   BasicBlock *HeaderBB = L->getHeader();
3226   assert(DomainMap.count(HeaderBB));
3227   isl::set &HeaderBBDom = DomainMap[HeaderBB];
3228
3229   isl::map NextIterationMap = isl::manage(
3230       createNextIterationMap(HeaderBBDom.get_space().release(), LoopDepth));
3231
3232   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
3233
3234   SmallVector<BasicBlock *, 4> LatchBlocks;
3235   L->getLoopLatches(LatchBlocks);
3236
3237   for (BasicBlock *LatchBB : LatchBlocks) {
3238     // If the latch is only reachable via error statements we skip it.
3239     isl::set LatchBBDom = DomainMap.lookup(LatchBB);
3240     if (!LatchBBDom)
3241       continue;
3242
3243     isl::set BackedgeCondition = nullptr;
3244
3245     TerminatorInst *TI = LatchBB->getTerminator();
3246     BranchInst *BI = dyn_cast<BranchInst>(TI);
3247     assert(BI && "Only branch instructions allowed in loop latches");
3248
3249     if (BI->isUnconditional())
3250       BackedgeCondition = LatchBBDom;
3251     else {
3252       SmallVector<isl_set *, 8> ConditionSets;
3253       int idx = BI->getSuccessor(0) != HeaderBB;
3254       if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(),
3255                               InvalidDomainMap, ConditionSets))
3256         return false;
3257
3258       // Free the non back edge condition set as we do not need it.
3259       isl_set_free(ConditionSets[1 - idx]);
3260
3261       BackedgeCondition = isl::manage(ConditionSets[idx]);
3262     }
3263
3264     int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
3265     assert(LatchLoopDepth >= LoopDepth);
3266     BackedgeCondition = BackedgeCondition.project_out(
3267         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
3268     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
3269   }
3270
3271   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
3272   for (int i = 0; i < LoopDepth; i++)
3273     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
3274
3275   isl::set UnionBackedgeConditionComplement =
3276       UnionBackedgeCondition.complement();
3277   UnionBackedgeConditionComplement =
3278       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
3279                                                       0);
3280   UnionBackedgeConditionComplement =
3281       UnionBackedgeConditionComplement.apply(ForwardMap);
3282   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
3283   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
3284
3285   auto Parts = partitionSetParts(HeaderBBDom.copy(), LoopDepth);
3286   HeaderBBDom = isl::manage(Parts.second);
3287
3288   // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3289   // the bounded assumptions to the context as they are already implied by the
3290   // <nsw> tag.
3291   if (Affinator.hasNSWAddRecForLoop(L)) {
3292     isl_set_free(Parts.first);
3293     return true;
3294   }
3295
3296   isl_set *UnboundedCtx = isl_set_params(Parts.first);
3297   recordAssumption(INFINITELOOP, UnboundedCtx,
3298                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
3299   return true;
3300 }
3301
3302 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
3303   Value *PointerBase = MA->getOriginalBaseAddr();
3304
3305   auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
3306   if (!PointerBaseInst)
3307     return nullptr;
3308
3309   auto *BasePtrStmt = getStmtFor(PointerBaseInst);
3310   if (!BasePtrStmt)
3311     return nullptr;
3312
3313   return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
3314 }
3315
3316 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
3317                                         isl::union_map Writes) {
3318   if (auto *BasePtrMA = lookupBasePtrAccess(MA)) {
3319     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
3320   }
3321
3322   Value *BaseAddr = MA->getOriginalBaseAddr();
3323   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
3324     if (!isa<LoadInst>(BasePtrInst))
3325       return contains(BasePtrInst);
3326
3327   return false;
3328 }
3329
3330 bool Scop::buildAliasChecks(AliasAnalysis &AA) {
3331   if (!PollyUseRuntimeAliasChecks)
3332     return true;
3333
3334   if (buildAliasGroups(AA)) {
3335     // Aliasing assumptions do not go through addAssumption but we still want to
3336     // collect statistics so we do it here explicitly.
3337     if (MinMaxAliasGroups.size())
3338       AssumptionsAliasing++;
3339     return true;
3340   }
3341
3342   // If a problem occurs while building the alias groups we need to delete
3343   // this SCoP and pretend it wasn't valid in the first place. To this end
3344   // we make the assumed context infeasible.
3345   invalidate(ALIASING, DebugLoc());
3346
3347   DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3348                << " could not be created as the number of parameters involved "
3349                   "is too high. The SCoP will be "
3350                   "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3351                   "the maximal number of parameters but be advised that the "
3352                   "compile time might increase exponentially.\n\n");
3353   return false;
3354 }
3355
3356 std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3357 Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3358   AliasSetTracker AST(AA);
3359
3360   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3361   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3362   for (ScopStmt &Stmt : *this) {
3363
3364     isl_set *StmtDomain = Stmt.getDomain().release();
3365     bool StmtDomainEmpty = isl_set_is_empty(StmtDomain);
3366     isl_set_free(StmtDomain);
3367
3368     // Statements with an empty domain will never be executed.
3369     if (StmtDomainEmpty)
3370       continue;
3371
3372     for (MemoryAccess *MA : Stmt) {
3373       if (MA->isScalarKind())
3374         continue;
3375       if (!MA->isRead())
3376         HasWriteAccess.insert(MA->getScopArrayInfo());
3377       MemAccInst Acc(MA->getAccessInstruction());
3378       if (MA->isRead() && isa<MemTransferInst>(Acc))
3379         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3380       else
3381         PtrToAcc[Acc.getPointerOperand()] = MA;
3382       AST.add(Acc);
3383     }
3384   }
3385
3386   AliasGroupVectorTy AliasGroups;
3387   for (AliasSet &AS : AST) {
3388     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3389       continue;
3390     AliasGroupTy AG;
3391     for (auto &PR : AS)
3392       AG.push_back(PtrToAcc[PR.getValue()]);
3393     if (AG.size() < 2)
3394       continue;
3395     AliasGroups.push_back(std::move(AG));
3396   }
3397
3398   return std::make_tuple(AliasGroups, HasWriteAccess);
3399 }
3400
3401 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3402   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3403     AliasGroupTy NewAG;
3404     AliasGroupTy &AG = AliasGroups[u];
3405     AliasGroupTy::iterator AGI = AG.begin();
3406     isl_set *AGDomain = getAccessDomain(*AGI);
3407     while (AGI != AG.end()) {
3408       MemoryAccess *MA = *AGI;
3409       isl_set *MADomain = getAccessDomain(MA);
3410       if (isl_set_is_disjoint(AGDomain, MADomain)) {
3411         NewAG.push_back(MA);
3412         AGI = AG.erase(AGI);
3413         isl_set_free(MADomain);
3414       } else {
3415         AGDomain = isl_set_union(AGDomain, MADomain);
3416         AGI++;
3417       }
3418     }
3419     if (NewAG.size() > 1)
3420       AliasGroups.push_back(std::move(NewAG));
3421     isl_set_free(AGDomain);
3422   }
3423 }
3424
3425 bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3426   // To create sound alias checks we perform the following steps:
3427   //   o) We partition each group into read only and non read only accesses.
3428   //   o) For each group with more than one base pointer we then compute minimal
3429   //      and maximal accesses to each array of a group in read only and non
3430   //      read only partitions separately.
3431   AliasGroupVectorTy AliasGroups;
3432   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3433
3434   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3435
3436   splitAliasGroupsByDomain(AliasGroups);
3437
3438   for (AliasGroupTy &AG : AliasGroups) {
3439     if (!hasFeasibleRuntimeContext())
3440       return false;
3441
3442     {
3443       IslMaxOperationsGuard MaxOpGuard(getIslCtx(), OptComputeOut);
3444       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3445       if (!Valid)
3446         return false;
3447     }
3448     if (isl_ctx_last_error(getIslCtx()) == isl_error_quota) {
3449       invalidate(COMPLEXITY, DebugLoc());
3450       return false;
3451     }
3452   }
3453
3454   return true;
3455 }
3456
3457 bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3458                            DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3459   AliasGroupTy ReadOnlyAccesses;
3460   AliasGroupTy ReadWriteAccesses;
3461   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3462   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3463
3464   if (AliasGroup.size() < 2)
3465     return true;
3466
3467   for (MemoryAccess *Access : AliasGroup) {
3468     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3469                                         Access->getAccessInstruction())
3470              << "Possibly aliasing pointer, use restrict keyword.");
3471     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3472     if (HasWriteAccess.count(Array)) {
3473       ReadWriteArrays.insert(Array);
3474       ReadWriteAccesses.push_back(Access);
3475     } else {
3476       ReadOnlyArrays.insert(Array);
3477       ReadOnlyAccesses.push_back(Access);
3478     }
3479   }
3480
3481   // If there are no read-only pointers, and less than two read-write pointers,
3482   // no alias check is needed.
3483   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3484     return true;
3485
3486   // If there is no read-write pointer, no alias check is needed.
3487   if (ReadWriteArrays.empty())
3488     return true;
3489
3490   // For non-affine accesses, no alias check can be generated as we cannot
3491   // compute a sufficiently tight lower and upper bound: bail out.
3492   for (MemoryAccess *MA : AliasGroup) {
3493     if (!MA->isAffine()) {
3494       invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3495                  MA->getAccessInstruction()->getParent());
3496       return false;
3497     }
3498   }
3499
3500   // Ensure that for all memory accesses for which we generate alias checks,
3501   // their base pointers are available.
3502   for (MemoryAccess *MA : AliasGroup) {
3503     if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3504       addRequiredInvariantLoad(
3505           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3506   }
3507
3508   MinMaxAliasGroups.emplace_back();
3509   MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3510   MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3511   MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3512
3513   bool Valid;
3514
3515   Valid =
3516       calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3517
3518   if (!Valid)
3519     return false;
3520
3521   // Bail out if the number of values we need to compare is too large.
3522   // This is important as the number of comparisons grows quadratically with
3523   // the number of values we need to compare.
3524   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3525       RunTimeChecksMaxArraysPerGroup)
3526     return false;
3527
3528   Valid =
3529       calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3530
3531   if (!Valid)
3532     return false;
3533
3534   return true;
3535 }
3536
3537 /// Get the smallest loop that contains @p S but is not in @p S.
3538 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3539   // Start with the smallest loop containing the entry and expand that
3540   // loop until it contains all blocks in the region. If there is a loop
3541   // containing all blocks in the region check if it is itself contained
3542   // and if so take the parent loop as it will be the smallest containing
3543   // the region but not contained by it.
3544   Loop *L = LI.getLoopFor(S.getEntry());
3545   while (L) {
3546     bool AllContained = true;
3547     for (auto *BB : S.blocks())
3548       AllContained &= L->contains(BB);
3549     if (AllContained)
3550       break;
3551     L = L->getParentLoop();
3552   }
3553
3554   return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
3555 }
3556
3557 int Scop::NextScopID = 0;
3558
3559 std::string Scop::CurrentFunc;
3560
3561 int Scop::getNextID(std::string ParentFunc) {
3562   if (ParentFunc != CurrentFunc) {
3563     CurrentFunc = ParentFunc;
3564     NextScopID = 0;
3565   }
3566   return NextScopID++;
3567 }
3568
3569 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3570            ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE)
3571     : SE(&ScalarEvolution), R(R), name(R.getNameStr()),
3572       HasSingleExitEdge(R.getExitingBlock()), DC(DC), ORE(ORE),
3573       IslCtx(isl_ctx_alloc(), isl_ctx_free), Affinator(this, LI),
3574       ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
3575   if (IslOnErrorAbort)
3576     isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT);
3577   buildContext();
3578 }
3579
3580 Scop::~Scop() {
3581   isl_set_free(Context);
3582   isl_set_free(AssumedContext);
3583   isl_set_free(InvalidContext);
3584   isl_schedule_free(Schedule);
3585
3586   ParameterIds.clear();
3587
3588   for (auto &AS : RecordedAssumptions)
3589     isl_set_free(AS.Set);
3590
3591   // Free the alias groups
3592   for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) {
3593     for (MinMaxAccessTy &MMA : MinMaxAccessPair.first) {
3594       isl_pw_multi_aff_free(MMA.first);
3595       isl_pw_multi_aff_free(MMA.second);
3596     }
3597     for (MinMaxAccessTy &MMA : MinMaxAccessPair.second) {
3598       isl_pw_multi_aff_free(MMA.first);
3599       isl_pw_multi_aff_free(MMA.second);
3600     }
3601   }
3602
3603   for (const auto &IAClass : InvariantEquivClasses)
3604     isl_set_free(IAClass.ExecutionContext);
3605
3606   // Explicitly release all Scop objects and the underlying isl objects before
3607   // we release the isl context.
3608   Stmts.clear();
3609   ScopArrayInfoSet.clear();
3610   ScopArrayInfoMap.clear();
3611   ScopArrayNameMap.clear();
3612   AccessFunctions.clear();
3613 }
3614
3615 void Scop::foldSizeConstantsToRight() {
3616   isl_union_set *Accessed = isl_union_map_range(getAccesses().release());
3617
3618   for (auto Array : arrays()) {
3619     if (Array->getNumberOfDimensions() <= 1)
3620       continue;
3621
3622     isl_space *Space = Array->getSpace().release();
3623
3624     Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed));
3625
3626     if (!isl_union_set_contains(Accessed, Space)) {
3627       isl_space_free(Space);
3628       continue;
3629     }
3630
3631     isl_set *Elements = isl_union_set_extract_set(Accessed, Space);
3632
3633     isl_map *Transform =
3634         isl_map_universe(isl_space_map_from_set(Array->getSpace().release()));
3635
3636     std::vector<int> Int;
3637
3638     int Dims = isl_set_dim(Elements, isl_dim_set);
3639     for (int i = 0; i < Dims; i++) {
3640       isl_set *DimOnly =
3641           isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i);
3642       DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1);
3643       DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0);
3644
3645       isl_basic_set *DimHull = isl_set_affine_hull(DimOnly);
3646
3647       if (i == Dims - 1) {
3648         Int.push_back(1);
3649         Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3650         isl_basic_set_free(DimHull);
3651         continue;
3652       }
3653
3654       if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) {
3655         isl_aff *Diff = isl_basic_set_get_div(DimHull, 0);
3656         isl_val *Val = isl_aff_get_denominator_val(Diff);
3657         isl_aff_free(Diff);
3658
3659         int ValInt = 1;
3660
3661         if (isl_val_is_int(Val))
3662           ValInt = isl_val_get_num_si(Val);
3663         isl_val_free(Val);
3664
3665         Int.push_back(ValInt);
3666
3667         isl_constraint *C = isl_constraint_alloc_equality(
3668             isl_local_space_from_space(isl_map_get_space(Transform)));
3669         C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt);
3670         C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1);
3671         Transform = isl_map_add_constraint(Transform, C);
3672         isl_basic_set_free(DimHull);
3673         continue;
3674       }
3675
3676       isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull);
3677       ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0);
3678
3679       int ValInt = 1;
3680       if (isl_basic_set_is_equal(ZeroSet, DimHull)) {
3681         ValInt = 0;
3682       }
3683
3684       Int.push_back(ValInt);
3685       Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3686       isl_basic_set_free(DimHull);
3687       isl_basic_set_free(ZeroSet);
3688     }
3689
3690     isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform));
3691
3692     if (!isl_set_is_subset(Elements, MappedElements)) {
3693       isl_set_free(Elements);
3694       isl_set_free(MappedElements);
3695       isl_map_free(Transform);
3696       continue;
3697     }
3698
3699     isl_set_free(MappedElements);
3700
3701     bool CanFold = true;
3702
3703     if (Int[0] <= 1)
3704       CanFold = false;
3705
3706     unsigned NumDims = Array->getNumberOfDimensions();
3707     for (unsigned i = 1; i < NumDims - 1; i++)
3708       if (Int[0] != Int[i] && Int[i])
3709         CanFold = false;
3710
3711     if (!CanFold) {
3712       isl_set_free(Elements);
3713       isl_map_free(Transform);
3714       continue;
3715     }
3716
3717     for (auto &Access : AccessFunctions)
3718       if (Access->getScopArrayInfo() == Array)
3719         Access->setAccessRelation(Access->getAccessRelation().apply_range(
3720             isl::manage(isl_map_copy(Transform))));
3721
3722     isl_map_free(Transform);
3723
3724     std::vector<const SCEV *> Sizes;
3725     for (unsigned i = 0; i < NumDims; i++) {
3726       auto Size = Array->getDimensionSize(i);
3727
3728       if (i == NumDims - 1)
3729         Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3730       Sizes.push_back(Size);
3731     }
3732
3733     Array->updateSizes(Sizes, false /* CheckConsistency */);
3734
3735     isl_set_free(Elements);
3736   }
3737   isl_union_set_free(Accessed);
3738 }
3739
3740 void Scop::markFortranArrays() {
3741   for (ScopStmt &Stmt : Stmts) {
3742     for (MemoryAccess *MemAcc : Stmt) {
3743       Value *FAD = MemAcc->getFortranArrayDescriptor();
3744       if (!FAD)
3745         continue;
3746
3747       // TODO: const_cast-ing to edit
3748       ScopArrayInfo *SAI =
3749           const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
3750       assert(SAI && "memory access into a Fortran array does not "
3751                     "have an associated ScopArrayInfo");
3752       SAI->applyAndSetFAD(FAD);
3753     }
3754   }
3755 }
3756
3757 void Scop::finalizeAccesses() {
3758   updateAccessDimensionality();
3759   foldSizeConstantsToRight();
3760   foldAccessRelations();
3761   assumeNoOutOfBounds();
3762   markFortranArrays();
3763 }
3764
3765 void Scop::updateAccessDimensionality() {
3766   // Check all array accesses for each base pointer and find a (virtual) element
3767   // size for the base pointer that divides all access functions.
3768   for (ScopStmt &Stmt : *this)
3769     for (MemoryAccess *Access : Stmt) {
3770       if (!Access->isArrayKind())
3771         continue;
3772       ScopArrayInfo *Array =
3773           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3774
3775       if (Array->getNumberOfDimensions() != 1)
3776         continue;
3777       unsigned DivisibleSize = Array->getElemSizeInBytes();
3778       const SCEV *Subscript = Access->getSubscript(0);
3779       while (!isDivisible(Subscript, DivisibleSize, *SE))
3780         DivisibleSize /= 2;
3781       auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3782       Array->updateElementType(Ty);
3783     }
3784
3785   for (auto &Stmt : *this)
3786     for (auto &Access : Stmt)
3787       Access->updateDimensionality();
3788 }
3789
3790 void Scop::foldAccessRelations() {
3791   for (auto &Stmt : *this)
3792     for (auto &Access : Stmt)
3793       Access->foldAccessRelation();
3794 }
3795
3796 void Scop::assumeNoOutOfBounds() {
3797   for (auto &Stmt : *this)
3798     for (auto &Access : Stmt)
3799       Access->assumeNoOutOfBound();
3800 }
3801
3802 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
3803   if (Stmt.isRegionStmt())
3804     for (BasicBlock *BB : Stmt.getRegion()->blocks()) {
3805       StmtMap.erase(BB);
3806       for (Instruction &Inst : *BB)
3807         InstStmtMap.erase(&Inst);
3808     }
3809   else {
3810     StmtMap.erase(Stmt.getBasicBlock());
3811     for (Instruction *Inst : Stmt.getInstructions())
3812       InstStmtMap.erase(Inst);
3813   }
3814 }
3815
3816 void Scop::removeStmts(std::function<bool(ScopStmt &)> ShouldDelete) {
3817   for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
3818     if (!ShouldDelete(*StmtIt)) {
3819       StmtIt++;
3820       continue;
3821     }
3822
3823     removeFromStmtMap(*StmtIt);
3824     StmtIt = Stmts.erase(StmtIt);
3825   }
3826 }
3827
3828 void Scop::removeStmtNotInDomainMap() {
3829   auto ShouldDelete = [this](ScopStmt &Stmt) -> bool {
3830     return !this->DomainMap.lookup(Stmt.getEntryBlock());
3831   };
3832   removeStmts(ShouldDelete);
3833 }
3834
3835 void Scop::simplifySCoP(bool AfterHoisting) {
3836   auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool {
3837     bool RemoveStmt = Stmt.isEmpty();
3838
3839     // Remove read only statements only after invariant load hoisting.
3840     if (!RemoveStmt && AfterHoisting) {
3841       bool OnlyRead = true;
3842       for (MemoryAccess *MA : Stmt) {
3843         if (MA->isRead())
3844           continue;
3845
3846         OnlyRead = false;
3847         break;
3848       }
3849
3850       RemoveStmt = OnlyRead;
3851     }
3852     return RemoveStmt;
3853   };
3854
3855   removeStmts(ShouldDelete);
3856 }
3857
3858 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3859   LoadInst *LInst = dyn_cast<LoadInst>(Val);
3860   if (!LInst)
3861     return nullptr;
3862
3863   if (Value *Rep = InvEquivClassVMap.lookup(LInst))
3864     LInst = cast<LoadInst>(Rep);
3865
3866   Type *Ty = LInst->getType();
3867   const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3868   for (auto &IAClass : InvariantEquivClasses) {
3869     if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3870       continue;
3871
3872     auto &MAs = IAClass.InvariantAccesses;
3873     for (auto *MA : MAs)
3874       if (MA->getAccessInstruction() == Val)
3875         return &IAClass;
3876   }
3877
3878   return nullptr;
3879 }
3880
3881 bool isAParameter(llvm::Value *maybeParam, const Function &F) {
3882   for (const llvm::Argument &Arg : F.args())
3883     if (&Arg == maybeParam)
3884       return true;
3885
3886   return false;
3887 }
3888
3889 bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3890                               bool MAInvalidCtxIsEmpty,
3891                               bool NonHoistableCtxIsEmpty) {
3892   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3893   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3894   if (PollyAllowDereferenceOfAllFunctionParams &&
3895       isAParameter(LInst->getPointerOperand(), getFunction()))
3896     return true;
3897
3898   // TODO: We can provide more information for better but more expensive
3899   //       results.
3900   if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3901                                           LInst->getAlignment(), DL))
3902     return false;
3903
3904   // If the location might be overwritten we do not hoist it unconditionally.
3905   //
3906   // TODO: This is probably too conservative.
3907   if (!NonHoistableCtxIsEmpty)
3908     return false;
3909
3910   // If a dereferenceable load is in a statement that is modeled precisely we
3911   // can hoist it.
3912   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3913     return true;
3914
3915   // Even if the statement is not modeled precisely we can hoist the load if it
3916   // does not involve any parameters that might have been specialized by the
3917   // statement domain.
3918   for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
3919     if (!isa<SCEVConstant>(MA->getSubscript(u)))
3920       return false;
3921   return true;
3922 }
3923
3924 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3925   if (InvMAs.empty())
3926     return;
3927
3928   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3929   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3930
3931   // Get the context under which the statement is executed but remove the error
3932   // context under which this statement is reached.
3933   isl::set DomainCtx = Stmt.getDomain().params();
3934   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3935
3936   if (isl_set_n_basic_set(DomainCtx.get()) >= MaxDisjunctsInDomain) {
3937     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3938     invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3939     return;
3940   }
3941
3942   // Project out all parameters that relate to loads in the statement. Otherwise
3943   // we could have cyclic dependences on the constraints under which the
3944   // hoisted loads are executed and we could not determine an order in which to
3945   // pre-load them. This happens because not only lower bounds are part of the
3946   // domain but also upper bounds.
3947   for (auto &InvMA : InvMAs) {
3948     auto *MA = InvMA.MA;
3949     Instruction *AccInst = MA->getAccessInstruction();
3950     if (SE->isSCEVable(AccInst->getType())) {
3951       SetVector<Value *> Values;
3952       for (const SCEV *Parameter : Parameters) {
3953         Values.clear();
3954         findValues(Parameter, *SE, Values);
3955         if (!Values.count(AccInst))
3956           continue;
3957
3958         if (isl::id ParamId = getIdForParam(Parameter)) {
3959           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3960           if (Dim >= 0)
3961             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3962         }
3963       }
3964     }
3965   }
3966
3967   for (auto &InvMA : InvMAs) {
3968     auto *MA = InvMA.MA;
3969     isl::set NHCtx = InvMA.NonHoistableCtx;
3970
3971     // Check for another invariant access that accesses the same location as
3972     // MA and if found consolidate them. Otherwise create a new equivalence
3973     // class at the end of InvariantEquivClasses.
3974     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3975     Type *Ty = LInst->getType();
3976     const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3977
3978     isl::set MAInvalidCtx = MA->getInvalidContext();
3979     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3980     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3981
3982     isl::set MACtx;
3983     // Check if we know that this pointer can be speculatively accessed.
3984     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3985                            NonHoistableCtxIsEmpty)) {
3986       MACtx = isl::set::universe(DomainCtx.get_space());
3987     } else {
3988       MACtx = DomainCtx;
3989       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3990       MACtx = MACtx.gist_params(getContext());
3991     }
3992
3993     bool Consolidated = false;
3994     for (auto &IAClass : InvariantEquivClasses) {
3995       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3996         continue;
3997
3998       // If the pointer and the type is equal check if the access function wrt.
3999       // to the domain is equal too. It can happen that the domain fixes
4000       // parameter values and these can be different for distinct part of the
4001       // SCoP. If this happens we cannot consolidate the loads but need to
4002       // create a new invariant load equivalence class.
4003       auto &MAs = IAClass.InvariantAccesses;
4004       if (!MAs.empty()) {
4005         auto *LastMA = MAs.front();
4006
4007         isl::set AR = MA->getAccessRelation().range();
4008         isl::set LastAR = LastMA->getAccessRelation().range();
4009         bool SameAR = AR.is_equal(LastAR);
4010
4011         if (!SameAR)
4012           continue;
4013       }
4014
4015       // Add MA to the list of accesses that are in this class.
4016       MAs.push_front(MA);
4017
4018       Consolidated = true;
4019
4020       // Unify the execution context of the class and this statement.
4021       isl::set IAClassDomainCtx = isl::manage(IAClass.ExecutionContext);
4022       if (IAClassDomainCtx)
4023         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
4024       else
4025         IAClassDomainCtx = MACtx;
4026       IAClass.ExecutionContext = IAClassDomainCtx.release();
4027       break;
4028     }
4029
4030     if (Consolidated)
4031       continue;
4032
4033     // If we did not consolidate MA, thus did not find an equivalence class
4034     // for it, we create a new one.
4035     InvariantEquivClasses.emplace_back(InvariantEquivClassTy{
4036         PointerSCEV, MemoryAccessList{MA}, MACtx.release(), Ty});
4037   }
4038 }
4039
4040 /// Check if an access range is too complex.
4041 ///
4042 /// An access range is too complex, if it contains either many disjuncts or
4043 /// very complex expressions. As a simple heuristic, we assume if a set to
4044 /// be too complex if the sum of existentially quantified dimensions and
4045 /// set dimensions is larger than a threshold. This reliably detects both
4046 /// sets with many disjuncts as well as sets with many divisions as they
4047 /// arise in h264.
4048 ///
4049 /// @param AccessRange The range to check for complexity.
4050 ///
4051 /// @returns True if the access range is too complex.
4052 static bool isAccessRangeTooComplex(isl::set AccessRange) {
4053   unsigned NumTotalDims = 0;
4054
4055   auto CountDimensions = [&NumTotalDims](isl::basic_set BSet) -> isl::stat {
4056     NumTotalDims += BSet.dim(isl::dim::div);
4057     NumTotalDims += BSet.dim(isl::dim::set);
4058     return isl::stat::ok;
4059   };
4060
4061   AccessRange.foreach_basic_set(CountDimensions);
4062
4063   if (NumTotalDims > MaxDimensionsInAccessRange)
4064     return true;
4065
4066   return false;
4067 }
4068
4069 isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
4070   // TODO: Loads that are not loop carried, hence are in a statement with
4071   //       zero iterators, are by construction invariant, though we
4072   //       currently "hoist" them anyway. This is necessary because we allow
4073   //       them to be treated as parameters (e.g., in conditions) and our code
4074   //       generation would otherwise use the old value.
4075
4076   auto &Stmt = *Access->getStatement();
4077   BasicBlock *BB = Stmt.getEntryBlock();
4078
4079   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
4080       Access->isMemoryIntrinsic())
4081     return nullptr;
4082
4083   // Skip accesses that have an invariant base pointer which is defined but
4084   // not loaded inside the SCoP. This can happened e.g., if a readnone call
4085   // returns a pointer that is used as a base address. However, as we want
4086   // to hoist indirect pointers, we allow the base pointer to be defined in
4087   // the region if it is also a memory access. Each ScopArrayInfo object
4088   // that has a base pointer origin has a base pointer that is loaded and
4089   // that it is invariant, thus it will be hoisted too. However, if there is
4090   // no base pointer origin we check that the base pointer is defined
4091   // outside the region.
4092   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
4093   if (hasNonHoistableBasePtrInScop(Access, Writes))
4094     return nullptr;
4095
4096   isl::map AccessRelation = give(Access->getAccessRelation().release());
4097   assert(!AccessRelation.is_empty());
4098
4099   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
4100     return nullptr;
4101
4102   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
4103   isl::set SafeToLoad;
4104
4105   auto &DL = getFunction().getParent()->getDataLayout();
4106   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
4107                                   DL)) {
4108     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
4109   } else if (BB != LI->getParent()) {
4110     // Skip accesses in non-affine subregions as they might not be executed
4111     // under the same condition as the entry of the non-affine subregion.
4112     return nullptr;
4113   } else {
4114     SafeToLoad = AccessRelation.range();
4115   }
4116
4117   if (isAccessRangeTooComplex(AccessRelation.range()))
4118     return nullptr;
4119
4120   isl::union_map Written = Writes.intersect_range(SafeToLoad);
4121   isl::set WrittenCtx = Written.params();
4122   bool IsWritten = !WrittenCtx.is_empty();
4123
4124   if (!IsWritten)
4125     return WrittenCtx;
4126
4127   WrittenCtx = WrittenCtx.remove_divs();
4128   bool TooComplex =
4129       isl_set_n_basic_set(WrittenCtx.get()) >= MaxDisjunctsInDomain;
4130   if (TooComplex || !isRequiredInvariantLoad(LI))
4131     return nullptr;
4132
4133   addAssumption(INVARIANTLOAD, WrittenCtx.copy(), LI->getDebugLoc(),
4134                 AS_RESTRICTION, LI->getParent());
4135   return WrittenCtx;
4136 }
4137
4138 void Scop::verifyInvariantLoads() {
4139   auto &RIL = getRequiredInvariantLoads();
4140   for (LoadInst *LI : RIL) {
4141     assert(LI && contains(LI));
4142     // If there exists a statement in the scop which has a memory access for
4143     // @p LI, then mark this scop as infeasible for optimization.
4144     for (ScopStmt &Stmt : Stmts)
4145       if (Stmt.getArrayAccessOrNULLFor(LI)) {
4146         invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
4147         return;
4148       }
4149   }
4150 }
4151
4152 void Scop::hoistInvariantLoads() {
4153   if (!PollyInvariantLoadHoisting)
4154     return;
4155
4156   isl::union_map Writes = getWrites();
4157   for (ScopStmt &Stmt : *this) {
4158     InvariantAccessesTy InvariantAccesses;
4159
4160     for (MemoryAccess *Access : Stmt)
4161       if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
4162         InvariantAccesses.push_back({Access, NHCtx});
4163
4164     // Transfer the memory access from the statement to the SCoP.
4165     for (auto InvMA : InvariantAccesses)
4166       Stmt.removeMemoryAccess(InvMA.MA);
4167     addInvariantLoads(Stmt, InvariantAccesses);
4168   }
4169 }
4170
4171 /// Find the canonical scop array info object for a set of invariant load
4172 /// hoisted loads. The canonical array is the one that corresponds to the
4173 /// first load in the list of accesses which is used as base pointer of a
4174 /// scop array.
4175 static const ScopArrayInfo *findCanonicalArray(Scop *S,
4176                                                MemoryAccessList &Accesses) {
4177   for (MemoryAccess *Access : Accesses) {
4178     const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
4179         Access->getAccessInstruction(), MemoryKind::Array);
4180     if (CanonicalArray)
4181       return CanonicalArray;
4182   }
4183   return nullptr;
4184 }
4185
4186 /// Check if @p Array severs as base array in an invariant load.
4187 static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
4188   for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
4189     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
4190       if (Access2->getScopArrayInfo() == Array)
4191         return true;
4192   return false;
4193 }
4194
4195 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4196 /// with a reference to @p New.
4197 static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
4198                                  const ScopArrayInfo *New) {
4199   for (ScopStmt &Stmt : *S)
4200     for (MemoryAccess *Access : Stmt) {
4201       if (Access->getLatestScopArrayInfo() != Old)
4202         continue;
4203
4204       isl::id Id = New->getBasePtrId();
4205       isl::map Map = Access->getAccessRelation();
4206       Map = Map.set_tuple_id(isl::dim::out, Id);
4207       Access->setAccessRelation(Map);
4208     }
4209 }
4210
4211 void Scop::canonicalizeDynamicBasePtrs() {
4212   for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
4213     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
4214
4215     const ScopArrayInfo *CanonicalBasePtrSAI =
4216         findCanonicalArray(this, BasePtrAccesses);
4217
4218     if (!CanonicalBasePtrSAI)
4219       continue;
4220
4221     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
4222       const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
4223           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
4224       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
4225           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
4226         continue;
4227
4228       // we currently do not canonicalize arrays where some accesses are
4229       // hoisted as invariant loads. If we would, we need to update the access
4230       // function of the invariant loads as well. However, as this is not a
4231       // very common situation, we leave this for now to avoid further
4232       // complexity increases.
4233       if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
4234         continue;
4235
4236       replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
4237     }
4238   }
4239 }
4240
4241 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
4242                                               ArrayRef<const SCEV *> Sizes,
4243                                               MemoryKind Kind,
4244                                               const char *BaseName) {
4245   assert((BasePtr || BaseName) &&
4246          "BasePtr and BaseName can not be nullptr at the same time.");
4247   assert(!(BasePtr && BaseName) && "BaseName is redundant.");
4248   auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
4249                       : ScopArrayNameMap[BaseName];
4250   if (!SAI) {
4251     auto &DL = getFunction().getParent()->getDataLayout();
4252     SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
4253                                 DL, this, BaseName));
4254     ScopArrayInfoSet.insert(SAI.get());
4255   } else {
4256     SAI->updateElementType(ElementType);
4257     // In case of mismatching array sizes, we bail out by setting the run-time
4258     // context to false.
4259     if (!SAI->updateSizes(Sizes))
4260       invalidate(DELINEARIZATION, DebugLoc());
4261   }
4262   return SAI.get();
4263 }
4264
4265 ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
4266                                          const std::string &BaseName,
4267                                          const std::vector<unsigned> &Sizes) {
4268   auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
4269   std::vector<const SCEV *> SCEVSizes;
4270
4271   for (auto size : Sizes)
4272     if (size)
4273       SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
4274     else
4275       SCEVSizes.push_back(nullptr);
4276
4277   auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
4278                                        MemoryKind::Array, BaseName.c_str());
4279   return SAI;
4280 }
4281
4282 const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr,
4283                                                   MemoryKind Kind) {
4284   auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
4285   return SAI;
4286 }
4287
4288 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
4289   auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind);
4290   assert(SAI && "No ScopArrayInfo available for this base pointer");
4291   return SAI;
4292 }
4293
4294 std::string Scop::getContextStr() const { return getContext().to_str(); }
4295
4296 std::string Scop::getAssumedContextStr() const {
4297   assert(AssumedContext && "Assumed context not yet built");
4298   return stringFromIslObj(AssumedContext);
4299 }
4300
4301 std::string Scop::getInvalidContextStr() const {
4302   return stringFromIslObj(InvalidContext);
4303 }
4304
4305 std::string Scop::getNameStr() const {
4306   std::string ExitName, EntryName;
4307   std::tie(EntryName, ExitName) = getEntryExitStr();
4308   return EntryName + "---" + ExitName;
4309 }
4310
4311 std::pair<std::string, std::string> Scop::getEntryExitStr() const {
4312   std::string ExitName, EntryName;
4313   raw_string_ostream ExitStr(ExitName);
4314   raw_string_ostream EntryStr(EntryName);
4315
4316   R.getEntry()->printAsOperand(EntryStr, false);
4317   EntryStr.str();
4318
4319   if (R.getExit()) {
4320     R.getExit()->printAsOperand(ExitStr, false);
4321     ExitStr.str();
4322   } else
4323     ExitName = "FunctionExit";
4324
4325   return std::make_pair(EntryName, ExitName);
4326 }
4327
4328 isl::set Scop::getContext() const { return isl::manage(isl_set_copy(Context)); }
4329 isl::space Scop::getParamSpace() const { return getContext().get_space(); }
4330
4331 isl::space Scop::getFullParamSpace() const {
4332   std::vector<isl::id> FortranIDs;
4333   FortranIDs = getFortranArrayIds(arrays());
4334
4335   isl::space Space = isl::space::params_alloc(
4336       getIslCtx(), ParameterIds.size() + FortranIDs.size());
4337
4338   unsigned PDim = 0;
4339   for (const SCEV *Parameter : Parameters) {
4340     isl::id Id = getIdForParam(Parameter);
4341     Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4342   }
4343
4344   for (isl::id Id : FortranIDs)
4345     Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4346
4347   return Space;
4348 }
4349
4350 isl::set Scop::getAssumedContext() const {
4351   assert(AssumedContext && "Assumed context not yet built");
4352   return isl::manage(isl_set_copy(AssumedContext));
4353 }
4354
4355 bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
4356   if (PollyProcessUnprofitable)
4357     return true;
4358
4359   if (isEmpty())
4360     return false;
4361
4362   unsigned OptimizableStmtsOrLoops = 0;
4363   for (auto &Stmt : *this) {
4364     if (Stmt.getNumIterators() == 0)
4365       continue;
4366
4367     bool ContainsArrayAccs = false;
4368     bool ContainsScalarAccs = false;
4369     for (auto *MA : Stmt) {
4370       if (MA->isRead())
4371         continue;
4372       ContainsArrayAccs |= MA->isLatestArrayKind();
4373       ContainsScalarAccs |= MA->isLatestScalarKind();
4374     }
4375
4376     if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
4377       OptimizableStmtsOrLoops += Stmt.getNumIterators();
4378   }
4379
4380   return OptimizableStmtsOrLoops > 1;
4381 }
4382
4383 bool Scop::hasFeasibleRuntimeContext() const {
4384   auto *PositiveContext = getAssumedContext().release();
4385   auto *NegativeContext = getInvalidContext().release();
4386   PositiveContext =
4387       addNonEmptyDomainConstraints(isl::manage(PositiveContext)).release();
4388   bool IsFeasible = !(isl_set_is_empty(PositiveContext) ||
4389                       isl_set_is_subset(PositiveContext, NegativeContext));
4390   isl_set_free(PositiveContext);
4391   if (!IsFeasible) {
4392     isl_set_free(NegativeContext);
4393     return false;
4394   }
4395
4396   auto *DomainContext = isl_union_set_params(getDomains().release());
4397   IsFeasible = !isl_set_is_subset(DomainContext, NegativeContext);
4398   IsFeasible &= !isl_set_is_subset(Context, NegativeContext);
4399   isl_set_free(NegativeContext);
4400   isl_set_free(DomainContext);
4401
4402   return IsFeasible;
4403 }
4404
4405 static std::string toString(AssumptionKind Kind) {
4406   switch (Kind) {
4407   case ALIASING:
4408     return "No-aliasing";
4409   case INBOUNDS:
4410     return "Inbounds";
4411   case WRAPPING:
4412     return "No-overflows";
4413   case UNSIGNED:
4414     return "Signed-unsigned";
4415   case COMPLEXITY:
4416     return "Low complexity";
4417   case PROFITABLE:
4418     return "Profitable";
4419   case ERRORBLOCK:
4420     return "No-error";
4421   case INFINITELOOP:
4422     return "Finite loop";
4423   case INVARIANTLOAD:
4424     return "Invariant load";
4425   case DELINEARIZATION:
4426     return "Delinearization";
4427   }
4428   llvm_unreachable("Unknown AssumptionKind!");
4429 }
4430
4431 bool Scop::isEffectiveAssumption(__isl_keep isl_set *Set, AssumptionSign Sign) {
4432   if (Sign == AS_ASSUMPTION) {
4433     if (isl_set_is_subset(Context, Set))
4434       return false;
4435
4436     if (isl_set_is_subset(AssumedContext, Set))
4437       return false;
4438   } else {
4439     if (isl_set_is_disjoint(Set, Context))
4440       return false;
4441
4442     if (isl_set_is_subset(Set, InvalidContext))
4443       return false;
4444   }
4445   return true;
4446 }
4447
4448 bool Scop::trackAssumption(AssumptionKind Kind, __isl_keep isl_set *Set,
4449                            DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4450   if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign))
4451     return false;
4452
4453   // Do never emit trivial assumptions as they only clutter the output.
4454   if (!PollyRemarksMinimal) {
4455     isl_set *Univ = nullptr;
4456     if (Sign == AS_ASSUMPTION)
4457       Univ = isl_set_universe(isl_set_get_space(Set));
4458
4459     bool IsTrivial = (Sign == AS_RESTRICTION && isl_set_is_empty(Set)) ||
4460                      (Sign == AS_ASSUMPTION && isl_set_is_equal(Univ, Set));
4461     isl_set_free(Univ);
4462
4463     if (IsTrivial)
4464       return false;
4465   }
4466
4467   switch (Kind) {
4468   case ALIASING:
4469     AssumptionsAliasing++;
4470     break;
4471   case INBOUNDS:
4472     AssumptionsInbounds++;
4473     break;
4474   case WRAPPING:
4475     AssumptionsWrapping++;
4476     break;
4477   case UNSIGNED:
4478     AssumptionsUnsigned++;
4479     break;
4480   case COMPLEXITY:
4481     AssumptionsComplexity++;
4482     break;
4483   case PROFITABLE:
4484     AssumptionsUnprofitable++;
4485     break;
4486   case ERRORBLOCK:
4487     AssumptionsErrorBlock++;
4488     break;
4489   case INFINITELOOP:
4490     AssumptionsInfiniteLoop++;
4491     break;
4492   case INVARIANTLOAD:
4493     AssumptionsInvariantLoad++;
4494     break;
4495   case DELINEARIZATION:
4496     AssumptionsDelinearization++;
4497     break;
4498   }
4499
4500   auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t";
4501   std::string Msg = toString(Kind) + Suffix + stringFromIslObj(Set);
4502   if (BB)
4503     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB)
4504              << Msg);
4505   else
4506     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc,
4507                                         R.getEntry())
4508              << Msg);
4509   return true;
4510 }
4511
4512 void Scop::addAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4513                          DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4514   // Simplify the assumptions/restrictions first.
4515   Set = isl_set_gist_params(Set, getContext().release());
4516
4517   if (!trackAssumption(Kind, Set, Loc, Sign, BB)) {
4518     isl_set_free(Set);
4519     return;
4520   }
4521
4522   if (Sign == AS_ASSUMPTION) {
4523     AssumedContext = isl_set_intersect(AssumedContext, Set);
4524     AssumedContext = isl_set_coalesce(AssumedContext);
4525   } else {
4526     InvalidContext = isl_set_union(InvalidContext, Set);
4527     InvalidContext = isl_set_coalesce(InvalidContext);
4528   }
4529 }
4530
4531 void Scop::recordAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4532                             DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4533   assert((isl_set_is_params(Set) || BB) &&
4534          "Assumptions without a basic block must be parameter sets");
4535   RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4536 }
4537
4538 void Scop::addRecordedAssumptions() {
4539   while (!RecordedAssumptions.empty()) {
4540     const Assumption &AS = RecordedAssumptions.pop_back_val();
4541
4542     if (!AS.BB) {
4543       addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
4544       continue;
4545     }
4546
4547     // If the domain was deleted the assumptions are void.
4548     isl_set *Dom = getDomainConditions(AS.BB).release();
4549     if (!Dom) {
4550       isl_set_free(AS.Set);
4551       continue;
4552     }
4553
4554     // If a basic block was given use its domain to simplify the assumption.
4555     // In case of restrictions we know they only have to hold on the domain,
4556     // thus we can intersect them with the domain of the block. However, for
4557     // assumptions the domain has to imply them, thus:
4558     //                     _              _____
4559     //   Dom => S   <==>   A v B   <==>   A - B
4560     //
4561     // To avoid the complement we will register A - B as a restriction not an
4562     // assumption.
4563     isl_set *S = AS.Set;
4564     if (AS.Sign == AS_RESTRICTION)
4565       S = isl_set_params(isl_set_intersect(S, Dom));
4566     else /* (AS.Sign == AS_ASSUMPTION) */
4567       S = isl_set_params(isl_set_subtract(Dom, S));
4568
4569     addAssumption(AS.Kind, S, AS.Loc, AS_RESTRICTION, AS.BB);
4570   }
4571 }
4572
4573 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
4574   addAssumption(Kind, isl_set_empty(getParamSpace().release()), Loc,
4575                 AS_ASSUMPTION, BB);
4576 }
4577
4578 isl::set Scop::getInvalidContext() const {
4579   return isl::manage(isl_set_copy(InvalidContext));
4580 }
4581
4582 void Scop::printContext(raw_ostream &OS) const {
4583   OS << "Context:\n";
4584   OS.indent(4) << Context << "\n";
4585
4586   OS.indent(4) << "Assumed Context:\n";
4587   OS.indent(4) << AssumedContext << "\n";
4588
4589   OS.indent(4) << "Invalid Context:\n";
4590   OS.indent(4) << InvalidContext << "\n";
4591
4592   unsigned Dim = 0;
4593   for (const SCEV *Parameter : Parameters)
4594     OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4595 }
4596
4597 void Scop::printAliasAssumptions(raw_ostream &OS) const {
4598   int noOfGroups = 0;
4599   for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4600     if (Pair.second.size() == 0)
4601       noOfGroups += 1;
4602     else
4603       noOfGroups += Pair.second.size();
4604   }
4605
4606   OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4607   if (MinMaxAliasGroups.empty()) {
4608     OS.indent(8) << "n/a\n";
4609     return;
4610   }
4611
4612   for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4613
4614     // If the group has no read only accesses print the write accesses.
4615     if (Pair.second.empty()) {
4616       OS.indent(8) << "[[";
4617       for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4618         OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4619            << ">";
4620       }
4621       OS << " ]]\n";
4622     }
4623
4624     for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4625       OS.indent(8) << "[[";
4626       OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4627       for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4628         OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4629            << ">";
4630       }
4631       OS << " ]]\n";
4632     }
4633   }
4634 }
4635
4636 void Scop::printStatements(raw_ostream &OS, bool PrintInstructions) const {
4637   OS << "Statements {\n";
4638
4639   for (const ScopStmt &Stmt : *this) {
4640     OS.indent(4);
4641     Stmt.print(OS, PrintInstructions);
4642   }
4643
4644   OS.indent(4) << "}\n";
4645 }
4646
4647 void Scop::printArrayInfo(raw_ostream &OS) const {
4648   OS << "Arrays {\n";
4649
4650   for (auto &Array : arrays())
4651     Array->print(OS);
4652
4653   OS.indent(4) << "}\n";
4654
4655   OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4656
4657   for (auto &Array : arrays())
4658     Array->print(OS, /* SizeAsPwAff */ true);
4659
4660   OS.indent(4) << "}\n";
4661 }
4662
4663 void Scop::print(raw_ostream &OS, bool PrintInstructions) const {
4664   OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4665   OS.indent(4) << "Region: " << getNameStr() << "\n";
4666   OS.indent(4) << "Max Loop Depth:  " << getMaxLoopDepth() << "\n";
4667   OS.indent(4) << "Invariant Accesses: {\n";
4668   for (const auto &IAClass : InvariantEquivClasses) {
4669     const auto &MAs = IAClass.InvariantAccesses;
4670     if (MAs.empty()) {
4671       OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4672     } else {
4673       MAs.front()->print(OS);
4674       OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4675                     << "\n";
4676     }
4677   }
4678   OS.indent(4) << "}\n";
4679   printContext(OS.indent(4));
4680   printArrayInfo(OS.indent(4));
4681   printAliasAssumptions(OS);
4682   printStatements(OS.indent(4), PrintInstructions);
4683 }
4684
4685 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4686 LLVM_DUMP_METHOD void Scop::dump() const { print(dbgs(), true); }
4687 #endif
4688
4689 isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
4690
4691 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4692                                  bool NonNegative) {
4693   // First try to use the SCEVAffinator to generate a piecewise defined
4694   // affine function from @p E in the context of @p BB. If that tasks becomes to
4695   // complex the affinator might return a nullptr. In such a case we invalidate
4696   // the SCoP and return a dummy value. This way we do not need to add error
4697   // handling code to all users of this function.
4698   auto PWAC = Affinator.getPwAff(E, BB);
4699   if (PWAC.first) {
4700     // TODO: We could use a heuristic and either use:
4701     //         SCEVAffinator::takeNonNegativeAssumption
4702     //       or
4703     //         SCEVAffinator::interpretAsUnsigned
4704     //       to deal with unsigned or "NonNegative" SCEVs.
4705     if (NonNegative)
4706       Affinator.takeNonNegativeAssumption(PWAC);
4707     return PWAC;
4708   }
4709
4710   auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
4711   invalidate(COMPLEXITY, DL, BB);
4712   return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4713 }
4714
4715 isl::union_set Scop::getDomains() const {
4716   isl_space *EmptySpace = isl_space_params_alloc(getIslCtx(), 0);
4717   isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4718
4719   for (const ScopStmt &Stmt : *this)
4720     Domain = isl_union_set_add_set(Domain, Stmt.getDomain().release());
4721
4722   return isl::manage(Domain);
4723 }
4724
4725 isl::pw_aff Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4726   PWACtx PWAC = getPwAff(E, BB);
4727   isl_set_free(PWAC.second);
4728   return isl::manage(PWAC.first);
4729 }
4730
4731 isl::union_map
4732 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4733   isl::union_map Accesses = isl::union_map::empty(getParamSpace());
4734
4735   for (ScopStmt &Stmt : *this) {
4736     for (MemoryAccess *MA : Stmt) {
4737       if (!Predicate(*MA))
4738         continue;
4739
4740       isl::set Domain = Stmt.getDomain();
4741       isl::map AccessDomain = MA->getAccessRelation();
4742       AccessDomain = AccessDomain.intersect_domain(Domain);
4743       Accesses = Accesses.add_map(AccessDomain);
4744     }
4745   }
4746
4747   return Accesses.coalesce();
4748 }
4749
4750 isl::union_map Scop::getMustWrites() {
4751   return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4752 }
4753
4754 isl::union_map Scop::getMayWrites() {
4755   return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4756 }
4757
4758 isl::union_map Scop::getWrites() {
4759   return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4760 }
4761
4762 isl::union_map Scop::getReads() {
4763   return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4764 }
4765
4766 isl::union_map Scop::getAccesses() {
4767   return getAccessesOfType([](MemoryAccess &MA) { return true; });
4768 }
4769
4770 isl::union_map Scop::getAccesses(ScopArrayInfo *Array) {
4771   return getAccessesOfType(
4772       [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; });
4773 }
4774
4775 // Check whether @p Node is an extension node.
4776 //
4777 // @return true if @p Node is an extension node.
4778 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4779   if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4780     return isl_bool_error;
4781   else
4782     return isl_bool_true;
4783 }
4784
4785 bool Scop::containsExtensionNode(__isl_keep isl_schedule *Schedule) {
4786   return isl_schedule_foreach_schedule_node_top_down(Schedule, isNotExtNode,
4787                                                      nullptr) == isl_stat_error;
4788 }
4789
4790 isl::union_map Scop::getSchedule() const {
4791   auto *Tree = getScheduleTree().release();
4792   if (containsExtensionNode(Tree)) {
4793     isl_schedule_free(Tree);
4794     return nullptr;
4795   }
4796   auto *S = isl_schedule_get_map(Tree);
4797   isl_schedule_free(Tree);
4798   return isl::manage(S);
4799 }
4800
4801 isl::schedule Scop::getScheduleTree() const {
4802   return isl::manage(isl_schedule_intersect_domain(isl_schedule_copy(Schedule),
4803                                                    getDomains().release()));
4804 }
4805
4806 void Scop::setSchedule(__isl_take isl_union_map *NewSchedule) {
4807   auto *S = isl_schedule_from_domain(getDomains().release());
4808   S = isl_schedule_insert_partial_schedule(
4809       S, isl_multi_union_pw_aff_from_union_map(NewSchedule));
4810   isl_schedule_free(Schedule);
4811   Schedule = S;
4812 }
4813
4814 void Scop::setScheduleTree(__isl_take isl_schedule *NewSchedule) {
4815   isl_schedule_free(Schedule);
4816   Schedule = NewSchedule;
4817 }
4818
4819 bool Scop::restrictDomains(isl::union_set Domain) {
4820   bool Changed = false;
4821   for (ScopStmt &Stmt : *this) {
4822     isl::union_set StmtDomain = isl::union_set(Stmt.getDomain());
4823     isl::union_set NewStmtDomain = StmtDomain.intersect(Domain);
4824
4825     if (StmtDomain.is_subset(NewStmtDomain))
4826       continue;
4827
4828     Changed = true;
4829
4830     NewStmtDomain = NewStmtDomain.coalesce();
4831
4832     if (NewStmtDomain.is_empty())
4833       Stmt.restrictDomain(isl::set::empty(Stmt.getDomainSpace()));
4834     else
4835       Stmt.restrictDomain(isl::set(NewStmtDomain));
4836   }
4837   return Changed;
4838 }
4839
4840 ScalarEvolution *Scop::getSE() const { return SE; }
4841
4842 // Create an isl_multi_union_aff that defines an identity mapping from the
4843 // elements of USet to their N-th dimension.
4844 //
4845 // # Example:
4846 //
4847 //            Domain: { A[i,j]; B[i,j,k] }
4848 //                 N: 1
4849 //
4850 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4851 //
4852 // @param USet   A union set describing the elements for which to generate a
4853 //               mapping.
4854 // @param N      The dimension to map to.
4855 // @returns      A mapping from USet to its N-th dimension.
4856 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
4857   assert(N >= 0);
4858   assert(USet);
4859   assert(!USet.is_empty());
4860
4861   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
4862
4863   auto Lambda = [&Result, N](isl::set S) -> isl::stat {
4864     int Dim = S.dim(isl::dim::set);
4865     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
4866                                                   N, Dim - N);
4867     if (N > 1)
4868       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
4869
4870     Result = Result.add_pw_multi_aff(PMA);
4871     return isl::stat::ok;
4872   };
4873
4874   isl::stat Res = USet.foreach_set(Lambda);
4875   (void)Res;
4876
4877   assert(Res == isl::stat::ok);
4878
4879   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
4880 }
4881
4882 void Scop::addScopStmt(BasicBlock *BB, Loop *SurroundingLoop,
4883                        std::vector<Instruction *> Instructions) {
4884   assert(BB && "Unexpected nullptr!");
4885   Stmts.emplace_back(*this, *BB, SurroundingLoop, Instructions);
4886   auto *Stmt = &Stmts.back();
4887   StmtMap[BB].push_back(Stmt);
4888   for (Instruction *Inst : Instructions) {
4889     assert(!InstStmtMap.count(Inst) &&
4890            "Unexpected statement corresponding to the instruction.");
4891     InstStmtMap[Inst] = Stmt;
4892   }
4893 }
4894
4895 void Scop::addScopStmt(Region *R, Loop *SurroundingLoop) {
4896   assert(R && "Unexpected nullptr!");
4897   Stmts.emplace_back(*this, *R, SurroundingLoop);
4898   auto *Stmt = &Stmts.back();
4899   for (BasicBlock *BB : R->blocks()) {
4900     StmtMap[BB].push_back(Stmt);
4901     for (Instruction &Inst : *BB) {
4902       assert(!InstStmtMap.count(&Inst) &&
4903              "Unexpected statement corresponding to the instruction.");
4904       InstStmtMap[&Inst] = Stmt;
4905     }
4906   }
4907 }
4908
4909 ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel,
4910                             isl::set Domain) {
4911 #ifndef NDEBUG
4912   isl::set SourceDomain = SourceRel.domain();
4913   isl::set TargetDomain = TargetRel.domain();
4914   assert(Domain.is_subset(TargetDomain) &&
4915          "Target access not defined for complete statement domain");
4916   assert(Domain.is_subset(SourceDomain) &&
4917          "Source access not defined for complete statement domain");
4918 #endif
4919   Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4920   CopyStmtsNum++;
4921   return &(Stmts.back());
4922 }
4923
4924 void Scop::buildSchedule(LoopInfo &LI) {
4925   Loop *L = getLoopSurroundingScop(*this, LI);
4926   LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4927   buildSchedule(getRegion().getNode(), LoopStack, LI);
4928   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4929   Schedule = LoopStack[0].Schedule;
4930 }
4931
4932 /// To generate a schedule for the elements in a Region we traverse the Region
4933 /// in reverse-post-order and add the contained RegionNodes in traversal order
4934 /// to the schedule of the loop that is currently at the top of the LoopStack.
4935 /// For loop-free codes, this results in a correct sequential ordering.
4936 ///
4937 /// Example:
4938 ///           bb1(0)
4939 ///         /     \.
4940 ///      bb2(1)   bb3(2)
4941 ///         \    /  \.
4942 ///          bb4(3)  bb5(4)
4943 ///             \   /
4944 ///              bb6(5)
4945 ///
4946 /// Including loops requires additional processing. Whenever a loop header is
4947 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4948 /// from an empty schedule, we first process all RegionNodes that are within
4949 /// this loop and complete the sequential schedule at this loop-level before
4950 /// processing about any other nodes. To implement this
4951 /// loop-nodes-first-processing, the reverse post-order traversal is
4952 /// insufficient. Hence, we additionally check if the traversal yields
4953 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4954 /// These region-nodes are then queue and only traverse after the all nodes
4955 /// within the current loop have been processed.
4956 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4957   Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4958
4959   ReversePostOrderTraversal<Region *> RTraversal(R);
4960   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4961   std::deque<RegionNode *> DelayList;
4962   bool LastRNWaiting = false;
4963
4964   // Iterate over the region @p R in reverse post-order but queue
4965   // sub-regions/blocks iff they are not part of the last encountered but not
4966   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4967   // that we queued the last sub-region/block from the reverse post-order
4968   // iterator. If it is set we have to explore the next sub-region/block from
4969   // the iterator (if any) to guarantee progress. If it is not set we first try
4970   // the next queued sub-region/blocks.
4971   while (!WorkList.empty() || !DelayList.empty()) {
4972     RegionNode *RN;
4973
4974     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
4975       RN = WorkList.front();
4976       WorkList.pop_front();
4977       LastRNWaiting = false;
4978     } else {
4979       RN = DelayList.front();
4980       DelayList.pop_front();
4981     }
4982
4983     Loop *L = getRegionNodeLoop(RN, LI);
4984     if (!contains(L))
4985       L = OuterScopLoop;
4986
4987     Loop *LastLoop = LoopStack.back().L;
4988     if (LastLoop != L) {
4989       if (LastLoop && !LastLoop->contains(L)) {
4990         LastRNWaiting = true;
4991         DelayList.push_back(RN);
4992         continue;
4993       }
4994       LoopStack.push_back({L, nullptr, 0});
4995     }
4996     buildSchedule(RN, LoopStack, LI);
4997   }
4998 }
4999
5000 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
5001   if (RN->isSubRegion()) {
5002     auto *LocalRegion = RN->getNodeAs<Region>();
5003     if (!isNonAffineSubRegion(LocalRegion)) {
5004       buildSchedule(LocalRegion, LoopStack, LI);
5005       return;
5006     }
5007   }
5008
5009   auto &LoopData = LoopStack.back();
5010   LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN);
5011
5012   for (auto *Stmt : getStmtListFor(RN)) {
5013     auto *UDomain = isl_union_set_from_set(Stmt->getDomain().release());
5014     auto *StmtSchedule = isl_schedule_from_domain(UDomain);
5015     LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule);
5016   }
5017
5018   // Check if we just processed the last node in this loop. If we did, finalize
5019   // the loop by:
5020   //
5021   //   - adding new schedule dimensions
5022   //   - folding the resulting schedule into the parent loop schedule
5023   //   - dropping the loop schedule from the LoopStack.
5024   //
5025   // Then continue to check surrounding loops, which might also have been
5026   // completed by this node.
5027   while (LoopData.L &&
5028          LoopData.NumBlocksProcessed == getNumBlocksInLoop(LoopData.L)) {
5029     auto *Schedule = LoopData.Schedule;
5030     auto NumBlocksProcessed = LoopData.NumBlocksProcessed;
5031
5032     LoopStack.pop_back();
5033     auto &NextLoopData = LoopStack.back();
5034
5035     if (Schedule) {
5036       isl::union_set Domain = give(isl_schedule_get_domain(Schedule));
5037       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, LoopStack.size());
5038       Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA.release());
5039       NextLoopData.Schedule =
5040           combineInSequence(NextLoopData.Schedule, Schedule);
5041     }
5042
5043     NextLoopData.NumBlocksProcessed += NumBlocksProcessed;
5044     LoopData = NextLoopData;
5045   }
5046 }
5047
5048 ArrayRef<ScopStmt *> Scop::getStmtListFor(BasicBlock *BB) const {
5049   auto StmtMapIt = StmtMap.find(BB);
5050   if (StmtMapIt == StmtMap.end())
5051     return {};
5052   assert(StmtMapIt->second.size() == 1 &&
5053          "Each statement corresponds to exactly one BB.");
5054   return StmtMapIt->second;
5055 }
5056
5057 ScopStmt *Scop::getLastStmtFor(BasicBlock *BB) const {
5058   ArrayRef<ScopStmt *> StmtList = getStmtListFor(BB);
5059   if (!StmtList.empty())
5060     return StmtList.back();
5061   return nullptr;
5062 }
5063
5064 ArrayRef<ScopStmt *> Scop::getStmtListFor(RegionNode *RN) const {
5065   if (RN->isSubRegion())
5066     return getStmtListFor(RN->getNodeAs<Region>());
5067   return getStmtListFor(RN->getNodeAs<BasicBlock>());
5068 }
5069
5070 ArrayRef<ScopStmt *> Scop::getStmtListFor(Region *R) const {
5071   return getStmtListFor(R->getEntry());
5072 }
5073
5074 int Scop::getRelativeLoopDepth(const Loop *L) const {
5075   if (!L || !R.contains(L))
5076     return -1;
5077   // outermostLoopInRegion always returns nullptr for top level regions
5078   if (R.isTopLevelRegion()) {
5079     // LoopInfo's depths start at 1, we start at 0
5080     return L->getLoopDepth() - 1;
5081   } else {
5082     Loop *OuterLoop = R.outermostLoopInRegion(const_cast<Loop *>(L));
5083     assert(OuterLoop);
5084     return L->getLoopDepth() - OuterLoop->getLoopDepth();
5085   }
5086 }
5087
5088 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
5089   for (auto &SAI : arrays()) {
5090     if (SAI->getName() == BaseName)
5091       return SAI;
5092   }
5093   return nullptr;
5094 }
5095
5096 void Scop::addAccessData(MemoryAccess *Access) {
5097   const ScopArrayInfo *SAI = Access->getOriginalScopArrayInfo();
5098   assert(SAI && "can only use after access relations have been constructed");
5099
5100   if (Access->isOriginalValueKind() && Access->isRead())
5101     ValueUseAccs[SAI].push_back(Access);
5102   else if (Access->isOriginalAnyPHIKind() && Access->isWrite())
5103     PHIIncomingAccs[SAI].push_back(Access);
5104 }
5105
5106 void Scop::removeAccessData(MemoryAccess *Access) {
5107   if (Access->isOriginalValueKind() && Access->isRead()) {
5108     auto &Uses = ValueUseAccs[Access->getScopArrayInfo()];
5109     std::remove(Uses.begin(), Uses.end(), Access);
5110   } else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) {
5111     auto &Incomings = PHIIncomingAccs[Access->getScopArrayInfo()];
5112     std::remove(Incomings.begin(), Incomings.end(), Access);
5113   }
5114 }
5115
5116 MemoryAccess *Scop::getValueDef(const ScopArrayInfo *SAI) const {
5117   assert(SAI->isValueKind());
5118
5119   Instruction *Val = dyn_cast<Instruction>(SAI->getBasePtr());
5120   if (!Val)
5121     return nullptr;
5122
5123   ScopStmt *Stmt = getStmtFor(Val);
5124   if (!Stmt)
5125     return nullptr;
5126
5127   return Stmt->lookupValueWriteOf(Val);
5128 }
5129
5130 ArrayRef<MemoryAccess *> Scop::getValueUses(const ScopArrayInfo *SAI) const {
5131   assert(SAI->isValueKind());
5132   auto It = ValueUseAccs.find(SAI);
5133   if (It == ValueUseAccs.end())
5134     return {};
5135   return It->second;
5136 }
5137
5138 MemoryAccess *Scop::getPHIRead(const ScopArrayInfo *SAI) const {
5139   assert(SAI->isPHIKind() || SAI->isExitPHIKind());
5140
5141   if (SAI->isExitPHIKind())
5142     return nullptr;
5143
5144   PHINode *PHI = cast<PHINode>(SAI->getBasePtr());
5145   ScopStmt *Stmt = getStmtFor(PHI);
5146   assert(Stmt && "PHINode must be within the SCoP");
5147
5148   return Stmt->lookupPHIReadOf(PHI);
5149 }
5150
5151 ArrayRef<MemoryAccess *> Scop::getPHIIncomings(const ScopArrayInfo *SAI) const {
5152   assert(SAI->isPHIKind() || SAI->isExitPHIKind());
5153   auto It = PHIIncomingAccs.find(SAI);
5154   if (It == PHIIncomingAccs.end())
5155     return {};
5156   return It->second;
5157 }
5158
5159 bool Scop::isEscaping(Instruction *Inst) {
5160   assert(contains(Inst) && "The concept of escaping makes only sense for "
5161                            "values defined inside the SCoP");
5162
5163   for (Use &Use : Inst->uses()) {
5164     BasicBlock *UserBB = getUseBlock(Use);
5165     if (!contains(UserBB))
5166       return true;
5167
5168     // When the SCoP region exit needs to be simplified, PHIs in the region exit
5169     // move to a new basic block such that its incoming blocks are not in the
5170     // SCoP anymore.
5171     if (hasSingleExitEdge() && isa<PHINode>(Use.getUser()) &&
5172         isExit(cast<PHINode>(Use.getUser())->getParent()))
5173       return true;
5174   }
5175   return false;
5176 }
5177
5178 Scop::ScopStatistics Scop::getStatistics() const {
5179   ScopStatistics Result;
5180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5181   auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
5182
5183   int NumTotalLoops = LoopStat.NumLoops;
5184   Result.NumBoxedLoops = getBoxedLoops().size();
5185   Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
5186
5187   for (const ScopStmt &Stmt : *this) {
5188     isl::set Domain = Stmt.getDomain().intersect_params(getContext());
5189     bool IsInLoop = Stmt.getNumIterators() >= 1;
5190     for (MemoryAccess *MA : Stmt) {
5191       if (!MA->isWrite())
5192         continue;
5193
5194       if (MA->isLatestValueKind()) {
5195         Result.NumValueWrites += 1;
5196         if (IsInLoop)
5197           Result.NumValueWritesInLoops += 1;
5198       }
5199
5200       if (MA->isLatestAnyPHIKind()) {
5201         Result.NumPHIWrites += 1;
5202         if (IsInLoop)
5203           Result.NumPHIWritesInLoops += 1;
5204       }
5205
5206       isl::set AccSet =
5207           MA->getAccessRelation().intersect_domain(Domain).range();
5208       if (AccSet.is_singleton()) {
5209         Result.NumSingletonWrites += 1;
5210         if (IsInLoop)
5211           Result.NumSingletonWritesInLoops += 1;
5212       }
5213     }
5214   }
5215 #endif
5216   return Result;
5217 }
5218
5219 raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
5220   scop.print(OS, PollyPrintInstructions);
5221   return OS;
5222 }
5223
5224 //===----------------------------------------------------------------------===//
5225 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
5226   AU.addRequired<LoopInfoWrapperPass>();
5227   AU.addRequired<RegionInfoPass>();
5228   AU.addRequired<DominatorTreeWrapperPass>();
5229   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5230   AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5231   AU.addRequired<AAResultsWrapperPass>();
5232   AU.addRequired<AssumptionCacheTracker>();
5233   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5234   AU.setPreservesAll();
5235 }
5236
5237 void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
5238                               Scop::ScopStatistics ScopStats) {
5239   assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
5240
5241   NumScops++;
5242   NumLoopsInScop += Stats.NumLoops;
5243   MaxNumLoopsInScop =
5244       std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
5245
5246   if (Stats.MaxDepth == 1)
5247     NumScopsDepthOne++;
5248   else if (Stats.MaxDepth == 2)
5249     NumScopsDepthTwo++;
5250   else if (Stats.MaxDepth == 3)
5251     NumScopsDepthThree++;
5252   else if (Stats.MaxDepth == 4)
5253     NumScopsDepthFour++;
5254   else if (Stats.MaxDepth == 5)
5255     NumScopsDepthFive++;
5256   else
5257     NumScopsDepthLarger++;
5258
5259   NumAffineLoops += ScopStats.NumAffineLoops;
5260   NumBoxedLoops += ScopStats.NumBoxedLoops;
5261
5262   NumValueWrites += ScopStats.NumValueWrites;
5263   NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
5264   NumPHIWrites += ScopStats.NumPHIWrites;
5265   NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
5266   NumSingletonWrites += ScopStats.NumSingletonWrites;
5267   NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
5268 }
5269
5270 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
5271   auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5272
5273   if (!SD.isMaxRegionInScop(*R))
5274     return false;
5275
5276   Function *F = R->getEntry()->getParent();
5277   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5278   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5279   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5280   auto const &DL = F->getParent()->getDataLayout();
5281   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5282   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
5283   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5284
5285   ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5286   S = SB.getScop(); // take ownership of scop object
5287
5288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5289   if (S) {
5290     ScopDetection::LoopStats Stats =
5291         ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5292     updateLoopCountStatistic(Stats, S->getStatistics());
5293   }
5294 #endif
5295
5296   return false;
5297 }
5298
5299 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
5300   if (S)
5301     S->print(OS, PollyPrintInstructions);
5302   else
5303     OS << "Invalid Scop!\n";
5304 }
5305
5306 char ScopInfoRegionPass::ID = 0;
5307
5308 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5309
5310 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops",
5311                       "Polly - Create polyhedral description of Scops", false,
5312                       false);
5313 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5314 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5315 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5316 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5317 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5318 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5319 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5320 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
5321                     "Polly - Create polyhedral description of Scops", false,
5322                     false)
5323
5324 //===----------------------------------------------------------------------===//
5325 ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE,
5326                    LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT,
5327                    AssumptionCache &AC, OptimizationRemarkEmitter &ORE)
5328     : DL(DL), SD(SD), SE(SE), LI(LI), AA(AA), DT(DT), AC(AC), ORE(ORE) {
5329   recompute();
5330 }
5331
5332 void ScopInfo::recompute() {
5333   RegionToScopMap.clear();
5334   /// Create polyhedral description of scops for all the valid regions of a
5335   /// function.
5336   for (auto &It : SD) {
5337     Region *R = const_cast<Region *>(It);
5338     if (!SD.isMaxRegionInScop(*R))
5339       continue;
5340
5341     ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5342     std::unique_ptr<Scop> S = SB.getScop();
5343     if (!S)
5344       continue;
5345 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5346     ScopDetection::LoopStats Stats =
5347         ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5348     updateLoopCountStatistic(Stats, S->getStatistics());
5349 #endif
5350     bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
5351     assert(Inserted && "Building Scop for the same region twice!");
5352     (void)Inserted;
5353   }
5354 }
5355
5356 bool ScopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
5357                           FunctionAnalysisManager::Invalidator &Inv) {
5358   // Check whether the analysis, all analyses on functions have been preserved
5359   // or anything we're holding references to is being invalidated
5360   auto PAC = PA.getChecker<ScopInfoAnalysis>();
5361   return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
5362          Inv.invalidate<ScopAnalysis>(F, PA) ||
5363          Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
5364          Inv.invalidate<LoopAnalysis>(F, PA) ||
5365          Inv.invalidate<AAManager>(F, PA) ||
5366          Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
5367          Inv.invalidate<AssumptionAnalysis>(F, PA);
5368 }
5369
5370 AnalysisKey ScopInfoAnalysis::Key;
5371
5372 ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F,
5373                                                FunctionAnalysisManager &FAM) {
5374   auto &SD = FAM.getResult<ScopAnalysis>(F);
5375   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
5376   auto &LI = FAM.getResult<LoopAnalysis>(F);
5377   auto &AA = FAM.getResult<AAManager>(F);
5378   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
5379   auto &AC = FAM.getResult<AssumptionAnalysis>(F);
5380   auto &DL = F.getParent()->getDataLayout();
5381   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5382   return {DL, SD, SE, LI, AA, DT, AC, ORE};
5383 }
5384
5385 PreservedAnalyses ScopInfoPrinterPass::run(Function &F,
5386                                            FunctionAnalysisManager &FAM) {
5387   auto &SI = FAM.getResult<ScopInfoAnalysis>(F);
5388   // Since the legacy PM processes Scops in bottom up, we print them in reverse
5389   // order here to keep the output persistent
5390   for (auto &It : reverse(SI)) {
5391     if (It.second)
5392       It.second->print(Stream, PollyPrintInstructions);
5393     else
5394       Stream << "Invalid Scop!\n";
5395   }
5396   return PreservedAnalyses::all();
5397 }
5398
5399 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5400   AU.addRequired<LoopInfoWrapperPass>();
5401   AU.addRequired<RegionInfoPass>();
5402   AU.addRequired<DominatorTreeWrapperPass>();
5403   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5404   AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5405   AU.addRequired<AAResultsWrapperPass>();
5406   AU.addRequired<AssumptionCacheTracker>();
5407   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5408   AU.setPreservesAll();
5409 }
5410
5411 bool ScopInfoWrapperPass::runOnFunction(Function &F) {
5412   auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5413   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5414   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5415   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5416   auto const &DL = F.getParent()->getDataLayout();
5417   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5418   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5419   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5420
5421   Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC, ORE});
5422   return false;
5423 }
5424
5425 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
5426   for (auto &It : *Result) {
5427     if (It.second)
5428       It.second->print(OS, PollyPrintInstructions);
5429     else
5430       OS << "Invalid Scop!\n";
5431   }
5432 }
5433
5434 char ScopInfoWrapperPass::ID = 0;
5435
5436 Pass *polly::createScopInfoWrapperPassPass() {
5437   return new ScopInfoWrapperPass();
5438 }
5439
5440 INITIALIZE_PASS_BEGIN(
5441     ScopInfoWrapperPass, "polly-function-scops",
5442     "Polly - Create polyhedral description of all Scops of a function", false,
5443     false);
5444 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5445 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5446 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5447 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5448 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5449 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5450 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5451 INITIALIZE_PASS_END(
5452     ScopInfoWrapperPass, "polly-function-scops",
5453     "Polly - Create polyhedral description of all Scops of a function", false,
5454     false)