lib/Analysis/ScopInfo.cpp

   1 //===- ScopInfo.cpp -------------------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Create a polyhedral description for a static control flow region.
  11 //
  12 // The pass creates a polyhedral description of the Scops detected by the Scop
  13 // detection derived from their LLVM-IR code.
  14 //
  15 // This representation is shared among several tools in the polyhedral
  16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
  17 //
  18 //===----------------------------------------------------------------------===//
  19
  20 #include "polly/ScopInfo.h"
  21 #include "polly/LinkAllPasses.h"
  22 #include "polly/Options.h"
  23 #include "polly/ScopBuilder.h"
  24 #include "polly/ScopDetection.h"
  25 #include "polly/Support/GICHelper.h"
  26 #include "polly/Support/ISLOStream.h"
  27 #include "polly/Support/SCEVAffinator.h"
  28 #include "polly/Support/SCEVValidator.h"
  29 #include "polly/Support/ScopHelper.h"
  30 #include "llvm/ADT/APInt.h"
  31 #include "llvm/ADT/ArrayRef.h"
  32 #include "llvm/ADT/DenseMap.h"
  33 #include "llvm/ADT/DenseSet.h"
  34 #include "llvm/ADT/PostOrderIterator.h"
  35 #include "llvm/ADT/STLExtras.h"
  36 #include "llvm/ADT/SetVector.h"
  37 #include "llvm/ADT/SmallPtrSet.h"
  38 #include "llvm/ADT/SmallSet.h"
  39 #include "llvm/ADT/SmallVector.h"
  40 #include "llvm/ADT/Statistic.h"
  41 #include "llvm/ADT/StringExtras.h"
  42 #include "llvm/ADT/StringMap.h"
  43 #include "llvm/Analysis/AliasAnalysis.h"
  44 #include "llvm/Analysis/AliasSetTracker.h"
  45 #include "llvm/Analysis/AssumptionCache.h"
  46 #include "llvm/Analysis/Loads.h"
  47 #include "llvm/Analysis/LoopInfo.h"
  48 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
  49 #include "llvm/Analysis/RegionInfo.h"
  50 #include "llvm/Analysis/RegionIterator.h"
  51 #include "llvm/Analysis/ScalarEvolution.h"
  52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  53 #include "llvm/IR/Argument.h"
  54 #include "llvm/IR/BasicBlock.h"
  55 #include "llvm/IR/CFG.h"
  56 #include "llvm/IR/ConstantRange.h"
  57 #include "llvm/IR/Constants.h"
  58 #include "llvm/IR/DataLayout.h"
  59 #include "llvm/IR/DebugLoc.h"
  60 #include "llvm/IR/DerivedTypes.h"
  61 #include "llvm/IR/DiagnosticInfo.h"
  62 #include "llvm/IR/Dominators.h"
  63 #include "llvm/IR/Function.h"
  64 #include "llvm/IR/InstrTypes.h"
  65 #include "llvm/IR/Instruction.h"
  66 #include "llvm/IR/Instructions.h"
  67 #include "llvm/IR/IntrinsicInst.h"
  68 #include "llvm/IR/Module.h"
  69 #include "llvm/IR/PassManager.h"
  70 #include "llvm/IR/Type.h"
  71 #include "llvm/IR/Use.h"
  72 #include "llvm/IR/User.h"
  73 #include "llvm/IR/Value.h"
  74 #include "llvm/Pass.h"
  75 #include "llvm/Support/Casting.h"
  76 #include "llvm/Support/CommandLine.h"
  77 #include "llvm/Support/Compiler.h"
  78 #include "llvm/Support/Debug.h"
  79 #include "llvm/Support/ErrorHandling.h"
  80 #include "llvm/Support/MathExtras.h"
  81 #include "llvm/Support/raw_ostream.h"
  82 #include "isl/aff.h"
  83 #include "isl/constraint.h"
  84 #include "isl/local_space.h"
  85 #include "isl/map.h"
  86 #include "isl/options.h"
  87 #include "isl/printer.h"
  88 #include "isl/schedule.h"
  89 #include "isl/schedule_node.h"
  90 #include "isl/set.h"
  91 #include "isl/union_map.h"
  92 #include "isl/union_set.h"
  93 #include "isl/val.h"
  94 #include <algorithm>
  95 #include <cassert>
  96 #include <cstdlib>
  97 #include <cstring>
  98 #include <deque>
  99 #include <iterator>
 100 #include <memory>
 101 #include <string>
 102 #include <tuple>
 103 #include <utility>
 104 #include <vector>
 105
 106 using namespace llvm;
 107 using namespace polly;
 108
 109 #define DEBUG_TYPE "polly-scops"
 110
 111 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
 112 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
 113 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
 114 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
 115 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
 116 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
 117 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
 118 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
 119 STATISTIC(AssumptionsInvariantLoad,
 120           "Number of invariant loads assumptions taken.");
 121 STATISTIC(AssumptionsDelinearization,
 122           "Number of delinearization assumptions taken.");
 123
 124 STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
 125 STATISTIC(NumLoopsInScop, "Number of loops in scops");
 126 STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
 127 STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
 128
 129 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
 130 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
 131 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
 132 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
 133 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
 134 STATISTIC(NumScopsDepthLarger,
 135           "Number of scops with maximal loop depth 6 and larger");
 136 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
 137
 138 STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
 139 STATISTIC(
 140     NumValueWritesInLoops,
 141     "Number of scalar value writes nested in affine loops after ScopInfo");
 142 STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
 143 STATISTIC(NumPHIWritesInLoops,
 144           "Number of scalar phi writes nested in affine loops after ScopInfo");
 145 STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
 146 STATISTIC(NumSingletonWritesInLoops,
 147           "Number of singleton writes nested in affine loops after ScopInfo");
 148
 149 // The maximal number of basic sets we allow during domain construction to
 150 // be created. More complex scops will result in very high compile time and
 151 // are also unlikely to result in good code
 152 static int const MaxDisjunctsInDomain = 20;
 153
 154 // The number of disjunct in the context after which we stop to add more
 155 // disjuncts. This parameter is there to avoid exponential growth in the
 156 // number of disjunct when adding non-convex sets to the context.
 157 static int const MaxDisjunctsInContext = 4;
 158
 159 // The maximal number of dimensions we allow during invariant load construction.
 160 // More complex access ranges will result in very high compile time and are also
 161 // unlikely to result in good code. This value is very high and should only
 162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
 163 static int const MaxDimensionsInAccessRange = 9;
 164
 165 static cl::opt<int>
 166     OptComputeOut("polly-analysis-computeout",
 167                   cl::desc("Bound the scop analysis by a maximal amount of "
 168                            "computational steps (0 means no bound)"),
 169                   cl::Hidden, cl::init(800000), cl::ZeroOrMore,
 170                   cl::cat(PollyCategory));
 171
 172 static cl::opt<bool> PollyRemarksMinimal(
 173     "polly-remarks-minimal",
 174     cl::desc("Do not emit remarks about assumptions that are known"),
 175     cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
 176
 177 // Multiplicative reductions can be disabled separately as these kind of
 178 // operations can overflow easily. Additive reductions and bit operations
 179 // are in contrast pretty stable.
 180 static cl::opt<bool> DisableMultiplicativeReductions(
 181     "polly-disable-multiplicative-reductions",
 182     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
 183     cl::init(false), cl::cat(PollyCategory));
 184
 185 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
 186     "polly-rtc-max-array-disjuncts",
 187     cl::desc("The maximal number of disjunts allowed in memory accesses to "
 188              "to build RTCs."),
 189     cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
 190
 191 static cl::opt<unsigned> RunTimeChecksMaxParameters(
 192     "polly-rtc-max-parameters",
 193     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
 194     cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
 195
 196 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
 197     "polly-rtc-max-arrays-per-group",
 198     cl::desc("The maximal number of arrays to compare in each alias group."),
 199     cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
 200
 201 static cl::opt<std::string> UserContextStr(
 202     "polly-context", cl::value_desc("isl parameter set"),
 203     cl::desc("Provide additional constraints on the context parameters"),
 204     cl::init(""), cl::cat(PollyCategory));
 205
 206 static cl::opt<bool> DetectReductions("polly-detect-reductions",
 207                                       cl::desc("Detect and exploit reductions"),
 208                                       cl::Hidden, cl::ZeroOrMore,
 209                                       cl::init(true), cl::cat(PollyCategory));
 210
 211 static cl::opt<bool>
 212     IslOnErrorAbort("polly-on-isl-error-abort",
 213                     cl::desc("Abort if an isl error is encountered"),
 214                     cl::init(true), cl::cat(PollyCategory));
 215
 216 static cl::opt<bool> PollyPreciseInbounds(
 217     "polly-precise-inbounds",
 218     cl::desc("Take more precise inbounds assumptions (do not scale well)"),
 219     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 220
 221 static cl::opt<bool>
 222     PollyIgnoreInbounds("polly-ignore-inbounds",
 223                         cl::desc("Do not take inbounds assumptions at all"),
 224                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 225
 226 static cl::opt<bool> PollyIgnoreParamBounds(
 227     "polly-ignore-parameter-bounds",
 228     cl::desc(
 229         "Do not add parameter bounds and do no gist simplify sets accordingly"),
 230     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 231
 232 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
 233     "polly-allow-dereference-of-all-function-parameters",
 234     cl::desc(
 235         "Treat all parameters to functions that are pointers as dereferencible."
 236         " This is useful for invariant load hoisting, since we can generate"
 237         " less runtime checks. This is only valid if all pointers to functions"
 238         " are always initialized, so that Polly can choose to hoist"
 239         " their loads. "),
 240     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 241
 242 static cl::opt<bool> PollyPreciseFoldAccesses(
 243     "polly-precise-fold-accesses",
 244     cl::desc("Fold memory accesses to model more possible delinearizations "
 245              "(does not scale well)"),
 246     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 247
 248 bool polly::UseInstructionNames;
 249
 250 static cl::opt<bool, true> XUseInstructionNames(
 251     "polly-use-llvm-names",
 252     cl::desc("Use LLVM-IR names when deriving statement names"),
 253     cl::location(UseInstructionNames), cl::Hidden, cl::init(false),
 254     cl::ZeroOrMore, cl::cat(PollyCategory));
 255
 256 static cl::opt<bool> PollyPrintInstructions(
 257     "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
 258     cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory));
 259
 260 //===----------------------------------------------------------------------===//
 261
 262 // Create a sequence of two schedules. Either argument may be null and is
 263 // interpreted as the empty schedule. Can also return null if both schedules are
 264 // empty.
 265 static __isl_give isl_schedule *
 266 combineInSequence(__isl_take isl_schedule *Prev,
 267                   __isl_take isl_schedule *Succ) {
 268   if (!Prev)
 269     return Succ;
 270   if (!Succ)
 271     return Prev;
 272
 273   return isl_schedule_sequence(Prev, Succ);
 274 }
 275
 276 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
 277                                     int dim, isl::dim type) {
 278   isl::val V;
 279   isl::ctx Ctx = S.get_ctx();
 280
 281   // The upper and lower bound for a parameter value is derived either from
 282   // the data type of the parameter or from the - possibly more restrictive -
 283   // range metadata.
 284   V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true);
 285   S = S.lower_bound_val(type, dim, V);
 286   V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true);
 287   S = S.upper_bound_val(type, dim, V);
 288
 289   if (Range.isFullSet())
 290     return S;
 291
 292   if (isl_set_n_basic_set(S.get()) > MaxDisjunctsInContext)
 293     return S;
 294
 295   // In case of signed wrapping, we can refine the set of valid values by
 296   // excluding the part not covered by the wrapping range.
 297   if (Range.isSignWrappedSet()) {
 298     V = valFromAPInt(Ctx.get(), Range.getLower(), true);
 299     isl::set SLB = S.lower_bound_val(type, dim, V);
 300
 301     V = valFromAPInt(Ctx.get(), Range.getUpper(), true);
 302     V = V.sub_ui(1);
 303     isl::set SUB = S.upper_bound_val(type, dim, V);
 304     S = SLB.unite(SUB);
 305   }
 306
 307   return S;
 308 }
 309
 310 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
 311   LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
 312   if (!BasePtrLI)
 313     return nullptr;
 314
 315   if (!S->contains(BasePtrLI))
 316     return nullptr;
 317
 318   ScalarEvolution &SE = *S->getSE();
 319
 320   auto *OriginBaseSCEV =
 321       SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
 322   if (!OriginBaseSCEV)
 323     return nullptr;
 324
 325   auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
 326   if (!OriginBaseSCEVUnknown)
 327     return nullptr;
 328
 329   return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
 330                              MemoryKind::Array);
 331 }
 332
 333 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx,
 334                              ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
 335                              const DataLayout &DL, Scop *S,
 336                              const char *BaseName)
 337     : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
 338   std::string BasePtrName =
 339       BaseName ? BaseName
 340                : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
 341                                       Kind == MemoryKind::PHI ? "__phi" : "",
 342                                       UseInstructionNames);
 343   Id = isl::id::alloc(Ctx, BasePtrName, this);
 344
 345   updateSizes(Sizes);
 346
 347   if (!BasePtr || Kind != MemoryKind::Array) {
 348     BasePtrOriginSAI = nullptr;
 349     return;
 350   }
 351
 352   BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
 353   if (BasePtrOriginSAI)
 354     const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
 355 }
 356
 357 ScopArrayInfo::~ScopArrayInfo() = default;
 358
 359 isl::space ScopArrayInfo::getSpace() const {
 360   auto Space = isl::space(Id.get_ctx(), 0, getNumberOfDimensions());
 361   Space = Space.set_tuple_id(isl::dim::set, Id);
 362   return Space;
 363 }
 364
 365 bool ScopArrayInfo::isReadOnly() {
 366   isl::union_set WriteSet = S.getWrites().range();
 367   isl::space Space = getSpace();
 368   WriteSet = WriteSet.extract_set(Space);
 369
 370   return bool(WriteSet.is_empty());
 371 }
 372
 373 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const {
 374   if (Array->getElementType() != getElementType())
 375     return false;
 376
 377   if (Array->getNumberOfDimensions() != getNumberOfDimensions())
 378     return false;
 379
 380   for (unsigned i = 0; i < getNumberOfDimensions(); i++)
 381     if (Array->getDimensionSize(i) != getDimensionSize(i))
 382       return false;
 383
 384   return true;
 385 }
 386
 387 void ScopArrayInfo::updateElementType(Type *NewElementType) {
 388   if (NewElementType == ElementType)
 389     return;
 390
 391   auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
 392   auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
 393
 394   if (NewElementSize == OldElementSize || NewElementSize == 0)
 395     return;
 396
 397   if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) {
 398     ElementType = NewElementType;
 399   } else {
 400     auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
 401     ElementType = IntegerType::get(ElementType->getContext(), GCD);
 402   }
 403 }
 404
 405 /// Make the ScopArrayInfo model a Fortran Array
 406 void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
 407   assert(FAD && "got invalid Fortran array descriptor");
 408   if (this->FAD) {
 409     assert(this->FAD == FAD &&
 410            "receiving different array descriptors for same array");
 411     return;
 412   }
 413
 414   assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
 415   assert(!this->FAD);
 416   this->FAD = FAD;
 417
 418   isl::space Space(S.getIslCtx(), 1, 0);
 419
 420   std::string param_name = getName();
 421   param_name += "_fortranarr_size";
 422   isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this);
 423
 424   Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff);
 425   isl::pw_aff PwAff =
 426       isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0);
 427
 428   DimensionSizesPw[0] = PwAff;
 429 }
 430
 431 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
 432                                 bool CheckConsistency) {
 433   int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
 434   int ExtraDimsNew = NewSizes.size() - SharedDims;
 435   int ExtraDimsOld = DimensionSizes.size() - SharedDims;
 436
 437   if (CheckConsistency) {
 438     for (int i = 0; i < SharedDims; i++) {
 439       auto *NewSize = NewSizes[i + ExtraDimsNew];
 440       auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
 441       if (NewSize && KnownSize && NewSize != KnownSize)
 442         return false;
 443     }
 444
 445     if (DimensionSizes.size() >= NewSizes.size())
 446       return true;
 447   }
 448
 449   DimensionSizes.clear();
 450   DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
 451                         NewSizes.end());
 452   DimensionSizesPw.clear();
 453   for (const SCEV *Expr : DimensionSizes) {
 454     if (!Expr) {
 455       DimensionSizesPw.push_back(nullptr);
 456       continue;
 457     }
 458     isl::pw_aff Size = S.getPwAffOnly(Expr);
 459     DimensionSizesPw.push_back(Size);
 460   }
 461   return true;
 462 }
 463
 464 std::string ScopArrayInfo::getName() const { return Id.get_name(); }
 465
 466 int ScopArrayInfo::getElemSizeInBytes() const {
 467   return DL.getTypeAllocSize(ElementType);
 468 }
 469
 470 isl::id ScopArrayInfo::getBasePtrId() const { return Id; }
 471
 472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 473 LLVM_DUMP_METHOD void ScopArrayInfo::dump() const { print(errs()); }
 474 #endif
 475
 476 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
 477   OS.indent(8) << *getElementType() << " " << getName();
 478   unsigned u = 0;
 479   // If this is a Fortran array, then we can print the outermost dimension
 480   // as a isl_pw_aff even though there is no SCEV information.
 481   bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
 482
 483   if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
 484       !getDimensionSize(0)) {
 485     OS << "[*]";
 486     u++;
 487   }
 488   for (; u < getNumberOfDimensions(); u++) {
 489     OS << "[";
 490
 491     if (SizeAsPwAff) {
 492       isl::pw_aff Size = getDimensionSizePw(u);
 493       OS << " " << Size << " ";
 494     } else {
 495       OS << *getDimensionSize(u);
 496     }
 497
 498     OS << "]";
 499   }
 500
 501   OS << ";";
 502
 503   if (BasePtrOriginSAI)
 504     OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
 505
 506   OS << " // Element size " << getElemSizeInBytes() << "\n";
 507 }
 508
 509 const ScopArrayInfo *
 510 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA) {
 511   isl::id Id = PMA.get_tuple_id(isl::dim::out);
 512   assert(!Id.is_null() && "Output dimension didn't have an ID");
 513   return getFromId(Id);
 514 }
 515
 516 const ScopArrayInfo *ScopArrayInfo::getFromId(isl::id Id) {
 517   void *User = Id.get_user();
 518   const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
 519   return SAI;
 520 }
 521
 522 void MemoryAccess::wrapConstantDimensions() {
 523   auto *SAI = getScopArrayInfo();
 524   isl::space ArraySpace = SAI->getSpace();
 525   isl::ctx Ctx = ArraySpace.get_ctx();
 526   unsigned DimsArray = SAI->getNumberOfDimensions();
 527
 528   isl::multi_aff DivModAff = isl::multi_aff::identity(
 529       ArraySpace.map_from_domain_and_range(ArraySpace));
 530   isl::local_space LArraySpace = isl::local_space(ArraySpace);
 531
 532   // Begin with last dimension, to iteratively carry into higher dimensions.
 533   for (int i = DimsArray - 1; i > 0; i--) {
 534     auto *DimSize = SAI->getDimensionSize(i);
 535     auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
 536
 537     // This transformation is not applicable to dimensions with dynamic size.
 538     if (!DimSizeCst)
 539       continue;
 540
 541     // This transformation is not applicable to dimensions of size zero.
 542     if (DimSize->isZero())
 543       continue;
 544
 545     isl::val DimSizeVal =
 546         valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false);
 547     isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i);
 548     isl::aff PrevVar =
 549         isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1);
 550
 551     // Compute: index % size
 552     // Modulo must apply in the divide of the previous iteration, if any.
 553     isl::aff Modulo = Var.mod(DimSizeVal);
 554     Modulo = Modulo.pullback(DivModAff);
 555
 556     // Compute: floor(index / size)
 557     isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal));
 558     Divide = Divide.floor();
 559     Divide = Divide.add(PrevVar);
 560     Divide = Divide.pullback(DivModAff);
 561
 562     // Apply Modulo and Divide.
 563     DivModAff = DivModAff.set_aff(i, Modulo);
 564     DivModAff = DivModAff.set_aff(i - 1, Divide);
 565   }
 566
 567   // Apply all modulo/divides on the accesses.
 568   isl::map Relation = AccessRelation;
 569   Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff));
 570   Relation = Relation.detect_equalities();
 571   AccessRelation = Relation;
 572 }
 573
 574 void MemoryAccess::updateDimensionality() {
 575   auto *SAI = getScopArrayInfo();
 576   isl::space ArraySpace = SAI->getSpace();
 577   isl::space AccessSpace = AccessRelation.get_space().range();
 578   isl::ctx Ctx = ArraySpace.get_ctx();
 579
 580   auto DimsArray = ArraySpace.dim(isl::dim::set);
 581   auto DimsAccess = AccessSpace.dim(isl::dim::set);
 582   auto DimsMissing = DimsArray - DimsAccess;
 583
 584   auto *BB = getStatement()->getEntryBlock();
 585   auto &DL = BB->getModule()->getDataLayout();
 586   unsigned ArrayElemSize = SAI->getElemSizeInBytes();
 587   unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
 588
 589   isl::map Map = isl::map::from_domain_and_range(
 590       isl::set::universe(AccessSpace), isl::set::universe(ArraySpace));
 591
 592   for (unsigned i = 0; i < DimsMissing; i++)
 593     Map = Map.fix_si(isl::dim::out, i, 0);
 594
 595   for (unsigned i = DimsMissing; i < DimsArray; i++)
 596     Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i);
 597
 598   AccessRelation = AccessRelation.apply_range(Map);
 599
 600   // For the non delinearized arrays, divide the access function of the last
 601   // subscript by the size of the elements in the array.
 602   //
 603   // A stride one array access in C expressed as A[i] is expressed in
 604   // LLVM-IR as something like A[i * elementsize]. This hides the fact that
 605   // two subsequent values of 'i' index two values that are stored next to
 606   // each other in memory. By this division we make this characteristic
 607   // obvious again. If the base pointer was accessed with offsets not divisible
 608   // by the accesses element size, we will have chosen a smaller ArrayElemSize
 609   // that divides the offsets of all accesses to this base pointer.
 610   if (DimsAccess == 1) {
 611     isl::val V = isl::val(Ctx, ArrayElemSize);
 612     AccessRelation = AccessRelation.floordiv_val(V);
 613   }
 614
 615   // We currently do this only if we added at least one dimension, which means
 616   // some dimension's indices have not been specified, an indicator that some
 617   // index values have been added together.
 618   // TODO: Investigate general usefulness; Effect on unit tests is to make index
 619   // expressions more complicated.
 620   if (DimsMissing)
 621     wrapConstantDimensions();
 622
 623   if (!isAffine())
 624     computeBoundsOnAccessRelation(ArrayElemSize);
 625
 626   // Introduce multi-element accesses in case the type loaded by this memory
 627   // access is larger than the canonical element type of the array.
 628   //
 629   // An access ((float *)A)[i] to an array char *A is modeled as
 630   // {[i] -> A[o] : 4 i <= o <= 4 i + 3
 631   if (ElemBytes > ArrayElemSize) {
 632     assert(ElemBytes % ArrayElemSize == 0 &&
 633            "Loaded element size should be multiple of canonical element size");
 634     isl::map Map = isl::map::from_domain_and_range(
 635         isl::set::universe(ArraySpace), isl::set::universe(ArraySpace));
 636     for (unsigned i = 0; i < DimsArray - 1; i++)
 637       Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
 638
 639     isl::constraint C;
 640     isl::local_space LS;
 641
 642     LS = isl::local_space(Map.get_space());
 643     int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
 644
 645     C = isl::constraint::alloc_inequality(LS);
 646     C = C.set_constant_val(isl::val(Ctx, Num - 1));
 647     C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1);
 648     C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1);
 649     Map = Map.add_constraint(C);
 650
 651     C = isl::constraint::alloc_inequality(LS);
 652     C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1);
 653     C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1);
 654     C = C.set_constant_val(isl::val(Ctx, 0));
 655     Map = Map.add_constraint(C);
 656     AccessRelation = AccessRelation.apply_range(Map);
 657   }
 658 }
 659
 660 const std::string
 661 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
 662   switch (RT) {
 663   case MemoryAccess::RT_NONE:
 664     llvm_unreachable("Requested a reduction operator string for a memory "
 665                      "access which isn't a reduction");
 666   case MemoryAccess::RT_ADD:
 667     return "+";
 668   case MemoryAccess::RT_MUL:
 669     return "*";
 670   case MemoryAccess::RT_BOR:
 671     return "|";
 672   case MemoryAccess::RT_BXOR:
 673     return "^";
 674   case MemoryAccess::RT_BAND:
 675     return "&";
 676   }
 677   llvm_unreachable("Unknown reduction type");
 678 }
 679
 680 /// Return the reduction type for a given binary operator.
 681 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
 682                                                     const Instruction *Load) {
 683   if (!BinOp)
 684     return MemoryAccess::RT_NONE;
 685   switch (BinOp->getOpcode()) {
 686   case Instruction::FAdd:
 687     if (!BinOp->hasUnsafeAlgebra())
 688       return MemoryAccess::RT_NONE;
 689   // Fall through
 690   case Instruction::Add:
 691     return MemoryAccess::RT_ADD;
 692   case Instruction::Or:
 693     return MemoryAccess::RT_BOR;
 694   case Instruction::Xor:
 695     return MemoryAccess::RT_BXOR;
 696   case Instruction::And:
 697     return MemoryAccess::RT_BAND;
 698   case Instruction::FMul:
 699     if (!BinOp->hasUnsafeAlgebra())
 700       return MemoryAccess::RT_NONE;
 701   // Fall through
 702   case Instruction::Mul:
 703     if (DisableMultiplicativeReductions)
 704       return MemoryAccess::RT_NONE;
 705     return MemoryAccess::RT_MUL;
 706   default:
 707     return MemoryAccess::RT_NONE;
 708   }
 709 }
 710
 711 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
 712   isl::id ArrayId = getArrayId();
 713   void *User = ArrayId.get_user();
 714   const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
 715   return SAI;
 716 }
 717
 718 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
 719   isl::id ArrayId = getLatestArrayId();
 720   void *User = ArrayId.get_user();
 721   const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
 722   return SAI;
 723 }
 724
 725 isl::id MemoryAccess::getOriginalArrayId() const {
 726   return AccessRelation.get_tuple_id(isl::dim::out);
 727 }
 728
 729 isl::id MemoryAccess::getLatestArrayId() const {
 730   if (!hasNewAccessRelation())
 731     return getOriginalArrayId();
 732   return NewAccessRelation.get_tuple_id(isl::dim::out);
 733 }
 734
 735 isl::map MemoryAccess::getAddressFunction() const {
 736   return getAccessRelation().lexmin();
 737 }
 738
 739 isl::pw_multi_aff
 740 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule) const {
 741   isl::map Schedule, ScheduledAccRel;
 742   isl::union_set UDomain;
 743
 744   UDomain = getStatement()->getDomain();
 745   USchedule = USchedule.intersect_domain(UDomain);
 746   Schedule = isl::map::from_union_map(USchedule);
 747   ScheduledAccRel = getAddressFunction().apply_domain(Schedule);
 748   return isl::pw_multi_aff::from_map(ScheduledAccRel);
 749 }
 750
 751 isl::map MemoryAccess::getOriginalAccessRelation() const {
 752   return AccessRelation;
 753 }
 754
 755 std::string MemoryAccess::getOriginalAccessRelationStr() const {
 756   return stringFromIslObj(AccessRelation.get());
 757 }
 758
 759 isl::space MemoryAccess::getOriginalAccessRelationSpace() const {
 760   return AccessRelation.get_space();
 761 }
 762
 763 isl::map MemoryAccess::getNewAccessRelation() const {
 764   return NewAccessRelation;
 765 }
 766
 767 std::string MemoryAccess::getNewAccessRelationStr() const {
 768   return stringFromIslObj(NewAccessRelation.get());
 769 }
 770
 771 std::string MemoryAccess::getAccessRelationStr() const {
 772   return getAccessRelation().to_str();
 773 }
 774
 775 isl::basic_map MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
 776   isl::space Space = isl::space(Statement->getIslCtx(), 0, 1);
 777   Space = Space.align_params(Statement->getDomainSpace());
 778
 779   return isl::basic_map::from_domain_and_range(
 780       isl::basic_set::universe(Statement->getDomainSpace()),
 781       isl::basic_set::universe(Space));
 782 }
 783
 784 // Formalize no out-of-bound access assumption
 785 //
 786 // When delinearizing array accesses we optimistically assume that the
 787 // delinearized accesses do not access out of bound locations (the subscript
 788 // expression of each array evaluates for each statement instance that is
 789 // executed to a value that is larger than zero and strictly smaller than the
 790 // size of the corresponding dimension). The only exception is the outermost
 791 // dimension for which we do not need to assume any upper bound.  At this point
 792 // we formalize this assumption to ensure that at code generation time the
 793 // relevant run-time checks can be generated.
 794 //
 795 // To find the set of constraints necessary to avoid out of bound accesses, we
 796 // first build the set of data locations that are not within array bounds. We
 797 // then apply the reverse access relation to obtain the set of iterations that
 798 // may contain invalid accesses and reduce this set of iterations to the ones
 799 // that are actually executed by intersecting them with the domain of the
 800 // statement. If we now project out all loop dimensions, we obtain a set of
 801 // parameters that may cause statement instances to be executed that may
 802 // possibly yield out of bound memory accesses. The complement of these
 803 // constraints is the set of constraints that needs to be assumed to ensure such
 804 // statement instances are never executed.
 805 void MemoryAccess::assumeNoOutOfBound() {
 806   if (PollyIgnoreInbounds)
 807     return;
 808   auto *SAI = getScopArrayInfo();
 809   isl::space Space = getOriginalAccessRelationSpace().range();
 810   isl::set Outside = isl::set::empty(Space);
 811   for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) {
 812     isl::local_space LS(Space);
 813     isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i);
 814     isl::pw_aff Zero = isl::pw_aff(LS);
 815
 816     isl::set DimOutside = Var.lt_set(Zero);
 817     isl::pw_aff SizeE = SAI->getDimensionSizePw(i);
 818     SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set));
 819     SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set));
 820     DimOutside = DimOutside.unite(SizeE.le_set(Var));
 821
 822     Outside = Outside.unite(DimOutside);
 823   }
 824
 825   Outside = Outside.apply(getAccessRelation().reverse());
 826   Outside = Outside.intersect(Statement->getDomain());
 827   Outside = Outside.params();
 828
 829   // Remove divs to avoid the construction of overly complicated assumptions.
 830   // Doing so increases the set of parameter combinations that are assumed to
 831   // not appear. This is always save, but may make the resulting run-time check
 832   // bail out more often than strictly necessary.
 833   Outside = Outside.remove_divs();
 834   Outside = Outside.complement();
 835   const auto &Loc = getAccessInstruction()
 836                         ? getAccessInstruction()->getDebugLoc()
 837                         : DebugLoc();
 838   if (!PollyPreciseInbounds)
 839     Outside = Outside.gist_params(Statement->getDomain().params());
 840   Statement->getParent()->recordAssumption(INBOUNDS, Outside.release(), Loc,
 841                                            AS_ASSUMPTION);
 842 }
 843
 844 void MemoryAccess::buildMemIntrinsicAccessRelation() {
 845   assert(isMemoryIntrinsic());
 846   assert(Subscripts.size() == 2 && Sizes.size() == 1);
 847
 848   isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]);
 849   isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA);
 850
 851   isl::map LengthMap;
 852   if (Subscripts[1] == nullptr) {
 853     LengthMap = isl::map::universe(SubscriptMap.get_space());
 854   } else {
 855     isl::pw_aff LengthPWA = getPwAff(Subscripts[1]);
 856     LengthMap = isl::map::from_pw_aff(LengthPWA);
 857     isl::space RangeSpace = LengthMap.get_space().range();
 858     LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace));
 859   }
 860   LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0);
 861   LengthMap = LengthMap.align_params(SubscriptMap.get_space());
 862   SubscriptMap = SubscriptMap.align_params(LengthMap.get_space());
 863   LengthMap = LengthMap.sum(SubscriptMap);
 864   AccessRelation =
 865       LengthMap.set_tuple_id(isl::dim::in, getStatement()->getDomainId());
 866 }
 867
 868 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
 869   ScalarEvolution *SE = Statement->getParent()->getSE();
 870
 871   auto MAI = MemAccInst(getAccessInstruction());
 872   if (isa<MemIntrinsic>(MAI))
 873     return;
 874
 875   Value *Ptr = MAI.getPointerOperand();
 876   if (!Ptr || !SE->isSCEVable(Ptr->getType()))
 877     return;
 878
 879   auto *PtrSCEV = SE->getSCEV(Ptr);
 880   if (isa<SCEVCouldNotCompute>(PtrSCEV))
 881     return;
 882
 883   auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
 884   if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV))
 885     PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
 886
 887   const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
 888   if (Range.isFullSet())
 889     return;
 890
 891   if (Range.isWrappedSet() || Range.isSignWrappedSet())
 892     return;
 893
 894   bool isWrapping = Range.isSignWrappedSet();
 895
 896   unsigned BW = Range.getBitWidth();
 897   const auto One = APInt(BW, 1);
 898   const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin();
 899   const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax();
 900
 901   auto Min = LB.sdiv(APInt(BW, ElementSize));
 902   auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
 903
 904   assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
 905
 906   isl::map Relation = AccessRelation;
 907   isl::set AccessRange = Relation.range();
 908   AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0,
 909                                     isl::dim::set);
 910   AccessRelation = Relation.intersect_range(AccessRange);
 911 }
 912
 913 void MemoryAccess::foldAccessRelation() {
 914   if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1]))
 915     return;
 916
 917   int Size = Subscripts.size();
 918
 919   isl::map NewAccessRelation = AccessRelation;
 920
 921   for (int i = Size - 2; i >= 0; --i) {
 922     isl::space Space;
 923     isl::map MapOne, MapTwo;
 924     isl::pw_aff DimSize = getPwAff(Sizes[i + 1]);
 925
 926     isl::space SpaceSize = DimSize.get_space();
 927     isl::id ParamId =
 928         give(isl_space_get_dim_id(SpaceSize.get(), isl_dim_param, 0));
 929
 930     Space = AccessRelation.get_space();
 931     Space = Space.range().map_from_set();
 932     Space = Space.align_params(SpaceSize);
 933
 934     int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId);
 935
 936     MapOne = isl::map::universe(Space);
 937     for (int j = 0; j < Size; ++j)
 938       MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j);
 939     MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0);
 940
 941     MapTwo = isl::map::universe(Space);
 942     for (int j = 0; j < Size; ++j)
 943       if (j < i || j > i + 1)
 944         MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j);
 945
 946     isl::local_space LS(Space);
 947     isl::constraint C;
 948     C = isl::constraint::alloc_equality(LS);
 949     C = C.set_constant_si(-1);
 950     C = C.set_coefficient_si(isl::dim::in, i, 1);
 951     C = C.set_coefficient_si(isl::dim::out, i, -1);
 952     MapTwo = MapTwo.add_constraint(C);
 953     C = isl::constraint::alloc_equality(LS);
 954     C = C.set_coefficient_si(isl::dim::in, i + 1, 1);
 955     C = C.set_coefficient_si(isl::dim::out, i + 1, -1);
 956     C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1);
 957     MapTwo = MapTwo.add_constraint(C);
 958     MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1);
 959
 960     MapOne = MapOne.unite(MapTwo);
 961     NewAccessRelation = NewAccessRelation.apply_range(MapOne);
 962   }
 963
 964   isl::id BaseAddrId = getScopArrayInfo()->getBasePtrId();
 965   isl::space Space = Statement->getDomainSpace();
 966   NewAccessRelation = NewAccessRelation.set_tuple_id(
 967       isl::dim::in, Space.get_tuple_id(isl::dim::set));
 968   NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
 969   NewAccessRelation = NewAccessRelation.gist_domain(Statement->getDomain());
 970
 971   // Access dimension folding might in certain cases increase the number of
 972   // disjuncts in the memory access, which can possibly complicate the generated
 973   // run-time checks and can lead to costly compilation.
 974   if (!PollyPreciseFoldAccesses &&
 975       isl_map_n_basic_map(NewAccessRelation.get()) >
 976           isl_map_n_basic_map(AccessRelation.get())) {
 977   } else {
 978     AccessRelation = NewAccessRelation;
 979   }
 980 }
 981
 982 /// Check if @p Expr is divisible by @p Size.
 983 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
 984   assert(Size != 0);
 985   if (Size == 1)
 986     return true;
 987
 988   // Only one factor needs to be divisible.
 989   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
 990     for (auto *FactorExpr : MulExpr->operands())
 991       if (isDivisible(FactorExpr, Size, SE))
 992         return true;
 993     return false;
 994   }
 995
 996   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
 997   // to be divisible.
 998   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
 999     for (auto *OpExpr : NAryExpr->operands())
1000       if (!isDivisible(OpExpr, Size, SE))
1001         return false;
1002     return true;
1003   }
1004
1005   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
1006   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
1007   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
1008   return MulSCEV == Expr;
1009 }
1010
1011 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
1012   assert(AccessRelation.is_null() && "AccessRelation already built");
1013
1014   // Initialize the invalid domain which describes all iterations for which the
1015   // access relation is not modeled correctly.
1016   isl::set StmtInvalidDomain = getStatement()->getInvalidDomain();
1017   InvalidDomain = isl::set::empty(StmtInvalidDomain.get_space());
1018
1019   isl::ctx Ctx = Id.get_ctx();
1020   isl::id BaseAddrId = SAI->getBasePtrId();
1021
1022   if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) {
1023     buildMemIntrinsicAccessRelation();
1024     AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1025     return;
1026   }
1027
1028   if (!isAffine()) {
1029     // We overapproximate non-affine accesses with a possible access to the
1030     // whole array. For read accesses it does not make a difference, if an
1031     // access must or may happen. However, for write accesses it is important to
1032     // differentiate between writes that must happen and writes that may happen.
1033     if (AccessRelation.is_null())
1034       AccessRelation = createBasicAccessMap(Statement);
1035
1036     AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1037     return;
1038   }
1039
1040   isl::space Space = isl::space(Ctx, 0, Statement->getNumIterators(), 0);
1041   AccessRelation = isl::map::universe(Space);
1042
1043   for (int i = 0, Size = Subscripts.size(); i < Size; ++i) {
1044     isl::pw_aff Affine = getPwAff(Subscripts[i]);
1045     isl::map SubscriptMap = isl::map::from_pw_aff(Affine);
1046     AccessRelation = AccessRelation.flat_range_product(SubscriptMap);
1047   }
1048
1049   Space = Statement->getDomainSpace();
1050   AccessRelation = AccessRelation.set_tuple_id(
1051       isl::dim::in, Space.get_tuple_id(isl::dim::set));
1052   AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1053
1054   AccessRelation = AccessRelation.gist_domain(Statement->getDomain());
1055 }
1056
1057 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
1058                            AccessType AccType, Value *BaseAddress,
1059                            Type *ElementType, bool Affine,
1060                            ArrayRef<const SCEV *> Subscripts,
1061                            ArrayRef<const SCEV *> Sizes, Value *AccessValue,
1062                            MemoryKind Kind)
1063     : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr),
1064       BaseAddr(BaseAddress), ElementType(ElementType),
1065       Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst),
1066       AccessValue(AccessValue), IsAffine(Affine),
1067       Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
1068       NewAccessRelation(nullptr), FAD(nullptr) {
1069   static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1070   const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1071
1072   std::string IdName = Stmt->getBaseName() + Access;
1073   Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1074 }
1075
1076 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel)
1077     : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt),
1078       InvalidDomain(nullptr), AccessRelation(nullptr),
1079       NewAccessRelation(AccRel), FAD(nullptr) {
1080   isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out);
1081   auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
1082   Sizes.push_back(nullptr);
1083   for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++)
1084     Sizes.push_back(SAI->getDimensionSize(i));
1085   ElementType = SAI->getElementType();
1086   BaseAddr = SAI->getBasePtr();
1087   static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1088   const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1089
1090   std::string IdName = Stmt->getBaseName() + Access;
1091   Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1092 }
1093
1094 MemoryAccess::~MemoryAccess() = default;
1095
1096 void MemoryAccess::realignParams() {
1097   isl::set Ctx = Statement->getParent()->getContext();
1098   InvalidDomain = InvalidDomain.gist_params(Ctx);
1099   AccessRelation = AccessRelation.gist_params(Ctx);
1100 }
1101
1102 const std::string MemoryAccess::getReductionOperatorStr() const {
1103   return MemoryAccess::getReductionOperatorStr(getReductionType());
1104 }
1105
1106 isl::id MemoryAccess::getId() const { return Id; }
1107
1108 raw_ostream &polly::operator<<(raw_ostream &OS,
1109                                MemoryAccess::ReductionType RT) {
1110   if (RT == MemoryAccess::RT_NONE)
1111     OS << "NONE";
1112   else
1113     OS << MemoryAccess::getReductionOperatorStr(RT);
1114   return OS;
1115 }
1116
1117 void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; }
1118
1119 void MemoryAccess::print(raw_ostream &OS) const {
1120   switch (AccType) {
1121   case READ:
1122     OS.indent(12) << "ReadAccess :=\t";
1123     break;
1124   case MUST_WRITE:
1125     OS.indent(12) << "MustWriteAccess :=\t";
1126     break;
1127   case MAY_WRITE:
1128     OS.indent(12) << "MayWriteAccess :=\t";
1129     break;
1130   }
1131
1132   OS << "[Reduction Type: " << getReductionType() << "] ";
1133
1134   if (FAD) {
1135     OS << "[Fortran array descriptor: " << FAD->getName();
1136     OS << "] ";
1137   };
1138
1139   OS << "[Scalar: " << isScalarKind() << "]\n";
1140   OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1141   if (hasNewAccessRelation())
1142     OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1143 }
1144
1145 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1146 LLVM_DUMP_METHOD void MemoryAccess::dump() const { print(errs()); }
1147 #endif
1148
1149 isl::pw_aff MemoryAccess::getPwAff(const SCEV *E) {
1150   auto *Stmt = getStatement();
1151   PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1152   isl::set StmtDom = getStatement()->getDomain();
1153   StmtDom = StmtDom.reset_tuple_id();
1154   isl::set NewInvalidDom = StmtDom.intersect(isl::manage(PWAC.second));
1155   InvalidDomain = InvalidDomain.unite(NewInvalidDom);
1156   return isl::manage(PWAC.first);
1157 }
1158
1159 // Create a map in the size of the provided set domain, that maps from the
1160 // one element of the provided set domain to another element of the provided
1161 // set domain.
1162 // The mapping is limited to all points that are equal in all but the last
1163 // dimension and for which the last dimension of the input is strict smaller
1164 // than the last dimension of the output.
1165 //
1166 //   getEqualAndLarger(set[i0, i1, ..., iX]):
1167 //
1168 //   set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1169 //     : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1170 //
1171 static isl::map getEqualAndLarger(isl::space SetDomain) {
1172   isl::space Space = SetDomain.map_from_set();
1173   isl::map Map = isl::map::universe(Space);
1174   unsigned lastDimension = Map.dim(isl::dim::in) - 1;
1175
1176   // Set all but the last dimension to be equal for the input and output
1177   //
1178   //   input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1179   //     : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1180   for (unsigned i = 0; i < lastDimension; ++i)
1181     Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
1182
1183   // Set the last dimension of the input to be strict smaller than the
1184   // last dimension of the output.
1185   //
1186   //   input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1187   Map = Map.order_lt(isl::dim::in, lastDimension, isl::dim::out, lastDimension);
1188   return Map;
1189 }
1190
1191 isl::set MemoryAccess::getStride(isl::map Schedule) const {
1192   isl::map AccessRelation = getAccessRelation();
1193   isl::space Space = Schedule.get_space().range();
1194   isl::map NextScatt = getEqualAndLarger(Space);
1195
1196   Schedule = Schedule.reverse();
1197   NextScatt = NextScatt.lexmin();
1198
1199   NextScatt = NextScatt.apply_range(Schedule);
1200   NextScatt = NextScatt.apply_range(AccessRelation);
1201   NextScatt = NextScatt.apply_domain(Schedule);
1202   NextScatt = NextScatt.apply_domain(AccessRelation);
1203
1204   isl::set Deltas = NextScatt.deltas();
1205   return Deltas;
1206 }
1207
1208 bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const {
1209   isl::set Stride, StrideX;
1210   bool IsStrideX;
1211
1212   Stride = getStride(Schedule);
1213   StrideX = isl::set::universe(Stride.get_space());
1214   for (unsigned i = 0; i < StrideX.dim(isl::dim::set) - 1; i++)
1215     StrideX = StrideX.fix_si(isl::dim::set, i, 0);
1216   StrideX = StrideX.fix_si(isl::dim::set, StrideX.dim(isl::dim::set) - 1,
1217                            StrideWidth);
1218   IsStrideX = Stride.is_subset(StrideX);
1219
1220   return IsStrideX;
1221 }
1222
1223 bool MemoryAccess::isStrideZero(isl::map Schedule) const {
1224   return isStrideX(Schedule, 0);
1225 }
1226
1227 bool MemoryAccess::isStrideOne(isl::map Schedule) const {
1228   return isStrideX(Schedule, 1);
1229 }
1230
1231 void MemoryAccess::setAccessRelation(isl::map NewAccess) {
1232   AccessRelation = NewAccess;
1233 }
1234
1235 void MemoryAccess::setNewAccessRelation(isl::map NewAccess) {
1236   assert(NewAccess);
1237
1238 #ifndef NDEBUG
1239   // Check domain space compatibility.
1240   isl::space NewSpace = NewAccess.get_space();
1241   isl::space NewDomainSpace = NewSpace.domain();
1242   isl::space OriginalDomainSpace = getStatement()->getDomainSpace();
1243   assert(OriginalDomainSpace.has_equal_tuples(NewDomainSpace));
1244
1245   // Reads must be executed unconditionally. Writes might be executed in a
1246   // subdomain only.
1247   if (isRead()) {
1248     // Check whether there is an access for every statement instance.
1249     isl::set StmtDomain = getStatement()->getDomain();
1250     StmtDomain =
1251         StmtDomain.intersect_params(getStatement()->getParent()->getContext());
1252     isl::set NewDomain = NewAccess.domain();
1253     assert(StmtDomain.is_subset(NewDomain) &&
1254            "Partial READ accesses not supported");
1255   }
1256
1257   isl::space NewAccessSpace = NewAccess.get_space();
1258   assert(NewAccessSpace.has_tuple_id(isl::dim::set) &&
1259          "Must specify the array that is accessed");
1260   isl::id NewArrayId = NewAccessSpace.get_tuple_id(isl::dim::set);
1261   auto *SAI = static_cast<ScopArrayInfo *>(NewArrayId.get_user());
1262   assert(SAI && "Must set a ScopArrayInfo");
1263
1264   if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1265     InvariantEquivClassTy *EqClass =
1266         getStatement()->getParent()->lookupInvariantEquivClass(
1267             SAI->getBasePtr());
1268     assert(EqClass &&
1269            "Access functions to indirect arrays must have an invariant and "
1270            "hoisted base pointer");
1271   }
1272
1273   // Check whether access dimensions correspond to number of dimensions of the
1274   // accesses array.
1275   auto Dims = SAI->getNumberOfDimensions();
1276   assert(NewAccessSpace.dim(isl::dim::set) == Dims &&
1277          "Access dims must match array dims");
1278 #endif
1279
1280   NewAccess = NewAccess.gist_domain(getStatement()->getDomain());
1281   NewAccessRelation = NewAccess;
1282 }
1283
1284 bool MemoryAccess::isLatestPartialAccess() const {
1285   isl::set StmtDom = getStatement()->getDomain();
1286   isl::set AccDom = getLatestAccessRelation().domain();
1287
1288   return isl_set_is_subset(StmtDom.keep(), AccDom.keep()) == isl_bool_false;
1289 }
1290
1291 //===----------------------------------------------------------------------===//
1292
1293 isl::map ScopStmt::getSchedule() const {
1294   isl::set Domain = getDomain();
1295   if (Domain.is_empty())
1296     return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1297   auto Schedule = getParent()->getSchedule();
1298   if (!Schedule)
1299     return nullptr;
1300   Schedule = Schedule.intersect_domain(isl::union_set(Domain));
1301   if (Schedule.is_empty())
1302     return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1303   isl::map M = M.from_union_map(Schedule);
1304   M = M.coalesce();
1305   M = M.gist_domain(Domain);
1306   M = M.coalesce();
1307   return M;
1308 }
1309
1310 void ScopStmt::restrictDomain(isl::set NewDomain) {
1311   assert(NewDomain.is_subset(Domain) &&
1312          "New domain is not a subset of old domain!");
1313   Domain = NewDomain;
1314 }
1315
1316 void ScopStmt::buildAccessRelations() {
1317   Scop &S = *getParent();
1318   for (MemoryAccess *Access : MemAccs) {
1319     Type *ElementType = Access->getElementType();
1320
1321     MemoryKind Ty;
1322     if (Access->isPHIKind())
1323       Ty = MemoryKind::PHI;
1324     else if (Access->isExitPHIKind())
1325       Ty = MemoryKind::ExitPHI;
1326     else if (Access->isValueKind())
1327       Ty = MemoryKind::Value;
1328     else
1329       Ty = MemoryKind::Array;
1330
1331     auto *SAI = S.getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1332                                            ElementType, Access->Sizes, Ty);
1333     Access->buildAccessRelation(SAI);
1334     S.addAccessData(Access);
1335   }
1336 }
1337
1338 void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
1339   Instruction *AccessInst = Access->getAccessInstruction();
1340
1341   if (Access->isArrayKind()) {
1342     MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1343     MAL.emplace_front(Access);
1344   } else if (Access->isValueKind() && Access->isWrite()) {
1345     Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1346     assert(!ValueWrites.lookup(AccessVal));
1347
1348     ValueWrites[AccessVal] = Access;
1349   } else if (Access->isValueKind() && Access->isRead()) {
1350     Value *AccessVal = Access->getAccessValue();
1351     assert(!ValueReads.lookup(AccessVal));
1352
1353     ValueReads[AccessVal] = Access;
1354   } else if (Access->isAnyPHIKind() && Access->isWrite()) {
1355     PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1356     assert(!PHIWrites.lookup(PHI));
1357
1358     PHIWrites[PHI] = Access;
1359   } else if (Access->isAnyPHIKind() && Access->isRead()) {
1360     PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1361     assert(!PHIReads.lookup(PHI));
1362
1363     PHIReads[PHI] = Access;
1364   }
1365
1366   if (Prepend) {
1367     MemAccs.insert(MemAccs.begin(), Access);
1368     return;
1369   }
1370   MemAccs.push_back(Access);
1371 }
1372
1373 void ScopStmt::realignParams() {
1374   for (MemoryAccess *MA : *this)
1375     MA->realignParams();
1376
1377   isl::set Ctx = Parent.getContext();
1378   InvalidDomain = InvalidDomain.gist_params(Ctx);
1379   Domain = Domain.gist_params(Ctx);
1380 }
1381
1382 /// Add @p BSet to the set @p User if @p BSet is bounded.
1383 static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet,
1384                                     void *User) {
1385   isl_set **BoundedParts = static_cast<isl_set **>(User);
1386   if (isl_basic_set_is_bounded(BSet))
1387     *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet));
1388   else
1389     isl_basic_set_free(BSet);
1390   return isl_stat_ok;
1391 }
1392
1393 /// Return the bounded parts of @p S.
1394 static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) {
1395   isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S));
1396   isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts);
1397   isl_set_free(S);
1398   return BoundedParts;
1399 }
1400
1401 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1402 ///
1403 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1404 ///          both with regards to the dimension @p Dim.
1405 static std::pair<__isl_give isl_set *, __isl_give isl_set *>
1406 partitionSetParts(__isl_take isl_set *S, unsigned Dim) {
1407   for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++)
1408     S = isl_set_lower_bound_si(S, isl_dim_set, u, 0);
1409
1410   unsigned NumDimsS = isl_set_n_dim(S);
1411   isl_set *OnlyDimS = isl_set_copy(S);
1412
1413   // Remove dimensions that are greater than Dim as they are not interesting.
1414   assert(NumDimsS >= Dim + 1);
1415   OnlyDimS =
1416       isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1);
1417
1418   // Create artificial parametric upper bounds for dimensions smaller than Dim
1419   // as we are not interested in them.
1420   OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim);
1421   for (unsigned u = 0; u < Dim; u++) {
1422     isl_constraint *C = isl_inequality_alloc(
1423         isl_local_space_from_space(isl_set_get_space(OnlyDimS)));
1424     C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1);
1425     C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1);
1426     OnlyDimS = isl_set_add_constraint(OnlyDimS, C);
1427   }
1428
1429   // Collect all bounded parts of OnlyDimS.
1430   isl_set *BoundedParts = collectBoundedParts(OnlyDimS);
1431
1432   // Create the dimensions greater than Dim again.
1433   BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1,
1434                                      NumDimsS - Dim - 1);
1435
1436   // Remove the artificial upper bound parameters again.
1437   BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim);
1438
1439   isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts));
1440   return std::make_pair(UnboundedParts, BoundedParts);
1441 }
1442
1443 /// Set the dimension Ids from @p From in @p To.
1444 static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From,
1445                                            __isl_take isl_set *To) {
1446   for (unsigned u = 0, e = isl_set_n_dim(From); u < e; u++) {
1447     isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u);
1448     To = isl_set_set_dim_id(To, isl_dim_set, u, DimId);
1449   }
1450   return To;
1451 }
1452
1453 /// Create the conditions under which @p L @p Pred @p R is true.
1454 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1455                                              __isl_take isl_pw_aff *L,
1456                                              __isl_take isl_pw_aff *R) {
1457   switch (Pred) {
1458   case ICmpInst::ICMP_EQ:
1459     return isl_pw_aff_eq_set(L, R);
1460   case ICmpInst::ICMP_NE:
1461     return isl_pw_aff_ne_set(L, R);
1462   case ICmpInst::ICMP_SLT:
1463     return isl_pw_aff_lt_set(L, R);
1464   case ICmpInst::ICMP_SLE:
1465     return isl_pw_aff_le_set(L, R);
1466   case ICmpInst::ICMP_SGT:
1467     return isl_pw_aff_gt_set(L, R);
1468   case ICmpInst::ICMP_SGE:
1469     return isl_pw_aff_ge_set(L, R);
1470   case ICmpInst::ICMP_ULT:
1471     return isl_pw_aff_lt_set(L, R);
1472   case ICmpInst::ICMP_UGT:
1473     return isl_pw_aff_gt_set(L, R);
1474   case ICmpInst::ICMP_ULE:
1475     return isl_pw_aff_le_set(L, R);
1476   case ICmpInst::ICMP_UGE:
1477     return isl_pw_aff_ge_set(L, R);
1478   default:
1479     llvm_unreachable("Non integer predicate not supported");
1480   }
1481 }
1482
1483 /// Create the conditions under which @p L @p Pred @p R is true.
1484 ///
1485 /// Helper function that will make sure the dimensions of the result have the
1486 /// same isl_id's as the @p Domain.
1487 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1488                                              __isl_take isl_pw_aff *L,
1489                                              __isl_take isl_pw_aff *R,
1490                                              __isl_keep isl_set *Domain) {
1491   isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R);
1492   return setDimensionIds(Domain, ConsequenceCondSet);
1493 }
1494
1495 /// Compute the isl representation for the SCEV @p E in this BB.
1496 ///
1497 /// @param S                The Scop in which @p BB resides in.
1498 /// @param BB               The BB for which isl representation is to be
1499 /// computed.
1500 /// @param InvalidDomainMap A map of BB to their invalid domains.
1501 /// @param E                The SCEV that should be translated.
1502 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
1503 ///
1504 /// Note that this function will also adjust the invalid context accordingly.
1505
1506 __isl_give isl_pw_aff *
1507 getPwAff(Scop &S, BasicBlock *BB,
1508          DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, const SCEV *E,
1509          bool NonNegative = false) {
1510   PWACtx PWAC = S.getPwAff(E, BB, NonNegative);
1511   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(isl::manage(PWAC.second));
1512   return PWAC.first;
1513 }
1514
1515 /// Build the conditions sets for the switch @p SI in the @p Domain.
1516 ///
1517 /// This will fill @p ConditionSets with the conditions under which control
1518 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1519 /// have as many elements as @p SI has successors.
1520 static bool
1521 buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L,
1522                    __isl_keep isl_set *Domain,
1523                    DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1524                    SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1525   Value *Condition = getConditionFromTerminator(SI);
1526   assert(Condition && "No condition for switch");
1527
1528   ScalarEvolution &SE = *S.getSE();
1529   isl_pw_aff *LHS, *RHS;
1530   LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
1531
1532   unsigned NumSuccessors = SI->getNumSuccessors();
1533   ConditionSets.resize(NumSuccessors);
1534   for (auto &Case : SI->cases()) {
1535     unsigned Idx = Case.getSuccessorIndex();
1536     ConstantInt *CaseValue = Case.getCaseValue();
1537
1538     RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue));
1539     isl_set *CaseConditionSet =
1540         buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain);
1541     ConditionSets[Idx] = isl_set_coalesce(
1542         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1543   }
1544
1545   assert(ConditionSets[0] == nullptr && "Default condition set was set");
1546   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1547   for (unsigned u = 2; u < NumSuccessors; u++)
1548     ConditionSetUnion =
1549         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1550   ConditionSets[0] = setDimensionIds(
1551       Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion));
1552
1553   isl_pw_aff_free(LHS);
1554
1555   return true;
1556 }
1557
1558 /// Build condition sets for unsigned ICmpInst(s).
1559 /// Special handling is required for unsigned operands to ensure that if
1560 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1561 /// it should wrap around.
1562 ///
1563 /// @param IsStrictUpperBound holds information on the predicate relation
1564 /// between TestVal and UpperBound, i.e,
1565 /// TestVal < UpperBound  OR  TestVal <= UpperBound
1566 static __isl_give isl_set *
1567 buildUnsignedConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1568                            __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal,
1569                            const SCEV *SCEV_UpperBound,
1570                            DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1571                            bool IsStrictUpperBound) {
1572   // Do not take NonNeg assumption on TestVal
1573   // as it might have MSB (Sign bit) set.
1574   isl_pw_aff *TestVal = getPwAff(S, BB, InvalidDomainMap, SCEV_TestVal, false);
1575   // Take NonNeg assumption on UpperBound.
1576   isl_pw_aff *UpperBound =
1577       getPwAff(S, BB, InvalidDomainMap, SCEV_UpperBound, true);
1578
1579   // 0 <= TestVal
1580   isl_set *First =
1581       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1582                             isl_pw_aff_get_domain_space(TestVal))),
1583                         isl_pw_aff_copy(TestVal));
1584
1585   isl_set *Second;
1586   if (IsStrictUpperBound)
1587     // TestVal < UpperBound
1588     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
1589   else
1590     // TestVal <= UpperBound
1591     Second = isl_pw_aff_le_set(TestVal, UpperBound);
1592
1593   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
1594   ConsequenceCondSet = setDimensionIds(Domain, ConsequenceCondSet);
1595   return ConsequenceCondSet;
1596 }
1597
1598 /// Build the conditions sets for the branch condition @p Condition in
1599 /// the @p Domain.
1600 ///
1601 /// This will fill @p ConditionSets with the conditions under which control
1602 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1603 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1604 /// context under which @p Condition is true/false will be returned as the
1605 /// new elements of @p ConditionSets.
1606 static bool
1607 buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1608                    TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain,
1609                    DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1610                    SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1611   isl_set *ConsequenceCondSet = nullptr;
1612   if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
1613     if (CCond->isZero())
1614       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1615     else
1616       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1617   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
1618     auto Opcode = BinOp->getOpcode();
1619     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1620
1621     bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain,
1622                                     InvalidDomainMap, ConditionSets) &&
1623                  buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain,
1624                                     InvalidDomainMap, ConditionSets);
1625     if (!Valid) {
1626       while (!ConditionSets.empty())
1627         isl_set_free(ConditionSets.pop_back_val());
1628       return false;
1629     }
1630
1631     isl_set_free(ConditionSets.pop_back_val());
1632     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1633     isl_set_free(ConditionSets.pop_back_val());
1634     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1635
1636     if (Opcode == Instruction::And)
1637       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1638     else
1639       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1640   } else {
1641     auto *ICond = dyn_cast<ICmpInst>(Condition);
1642     assert(ICond &&
1643            "Condition of exiting branch was neither constant nor ICmp!");
1644
1645     ScalarEvolution &SE = *S.getSE();
1646     isl_pw_aff *LHS, *RHS;
1647     // For unsigned comparisons we assumed the signed bit of neither operand
1648     // to be set. The comparison is equal to a signed comparison under this
1649     // assumption.
1650     bool NonNeg = ICond->isUnsigned();
1651     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
1652                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
1653
1654     switch (ICond->getPredicate()) {
1655     case ICmpInst::ICMP_ULT:
1656       ConsequenceCondSet =
1657           buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1658                                      RightOperand, InvalidDomainMap, true);
1659       break;
1660     case ICmpInst::ICMP_ULE:
1661       ConsequenceCondSet =
1662           buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1663                                      RightOperand, InvalidDomainMap, false);
1664       break;
1665     case ICmpInst::ICMP_UGT:
1666       ConsequenceCondSet =
1667           buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1668                                      LeftOperand, InvalidDomainMap, true);
1669       break;
1670     case ICmpInst::ICMP_UGE:
1671       ConsequenceCondSet =
1672           buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1673                                      LeftOperand, InvalidDomainMap, false);
1674       break;
1675     default:
1676       LHS = getPwAff(S, BB, InvalidDomainMap, LeftOperand, NonNeg);
1677       RHS = getPwAff(S, BB, InvalidDomainMap, RightOperand, NonNeg);
1678       ConsequenceCondSet =
1679           buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain);
1680       break;
1681     }
1682   }
1683
1684   // If no terminator was given we are only looking for parameter constraints
1685   // under which @p Condition is true/false.
1686   if (!TI)
1687     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1688   assert(ConsequenceCondSet);
1689   ConsequenceCondSet = isl_set_coalesce(
1690       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1691
1692   isl_set *AlternativeCondSet = nullptr;
1693   bool TooComplex =
1694       isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1695
1696   if (!TooComplex) {
1697     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1698                                           isl_set_copy(ConsequenceCondSet));
1699     TooComplex =
1700         isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1701   }
1702
1703   if (TooComplex) {
1704     S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
1705                  TI ? TI->getParent() : nullptr /* BasicBlock */);
1706     isl_set_free(AlternativeCondSet);
1707     isl_set_free(ConsequenceCondSet);
1708     return false;
1709   }
1710
1711   ConditionSets.push_back(ConsequenceCondSet);
1712   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1713
1714   return true;
1715 }
1716
1717 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1718 ///
1719 /// This will fill @p ConditionSets with the conditions under which control
1720 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1721 /// have as many elements as @p TI has successors.
1722 static bool
1723 buildConditionSets(Scop &S, BasicBlock *BB, TerminatorInst *TI, Loop *L,
1724                    __isl_keep isl_set *Domain,
1725                    DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1726                    SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1727   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1728     return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap,
1729                               ConditionSets);
1730
1731   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1732
1733   if (TI->getNumSuccessors() == 1) {
1734     ConditionSets.push_back(isl_set_copy(Domain));
1735     return true;
1736   }
1737
1738   Value *Condition = getConditionFromTerminator(TI);
1739   assert(Condition && "No condition for Terminator");
1740
1741   return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap,
1742                             ConditionSets);
1743 }
1744
1745 void ScopStmt::buildDomain() {
1746   isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1747
1748   Domain = getParent()->getDomainConditions(this);
1749   Domain = Domain.set_tuple_id(Id);
1750 }
1751
1752 void ScopStmt::collectSurroundingLoops() {
1753   for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
1754     isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
1755     NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
1756   }
1757 }
1758
1759 ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop)
1760     : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R),
1761       Build(nullptr), SurroundingLoop(SurroundingLoop) {
1762   BaseName = getIslCompatibleName(
1763       "Stmt", R.getNameStr(), parent.getNextStmtIdx(), "", UseInstructionNames);
1764 }
1765
1766 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, Loop *SurroundingLoop,
1767                    std::vector<Instruction *> Instructions, int Count)
1768     : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1769       Build(nullptr), SurroundingLoop(SurroundingLoop),
1770       Instructions(Instructions) {
1771   std::string S = "";
1772   if (Count != 0)
1773     S += std::to_string(Count);
1774   BaseName = getIslCompatibleName("Stmt", &bb, parent.getNextStmtIdx(), S,
1775                                   UseInstructionNames);
1776 }
1777
1778 ScopStmt::ScopStmt(Scop &parent, isl::map SourceRel, isl::map TargetRel,
1779                    isl::set NewDomain)
1780     : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain),
1781       Build(nullptr) {
1782   BaseName = getIslCompatibleName("CopyStmt_", "",
1783                                   std::to_string(parent.getCopyStmtsNum()));
1784   isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1785   Domain = Domain.set_tuple_id(Id);
1786   TargetRel = TargetRel.set_tuple_id(isl::dim::in, Id);
1787   auto *Access =
1788       new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1789   parent.addAccessFunction(Access);
1790   addAccess(Access);
1791   SourceRel = SourceRel.set_tuple_id(isl::dim::in, Id);
1792   Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1793   parent.addAccessFunction(Access);
1794   addAccess(Access);
1795 }
1796
1797 ScopStmt::~ScopStmt() = default;
1798
1799 void ScopStmt::init(LoopInfo &LI) {
1800   assert(!Domain && "init must be called only once");
1801
1802   buildDomain();
1803   collectSurroundingLoops();
1804   buildAccessRelations();
1805
1806   if (DetectReductions)
1807     checkForReductions();
1808 }
1809
1810 /// Collect loads which might form a reduction chain with @p StoreMA.
1811 ///
1812 /// Check if the stored value for @p StoreMA is a binary operator with one or
1813 /// two loads as operands. If the binary operand is commutative & associative,
1814 /// used only once (by @p StoreMA) and its load operands are also used only
1815 /// once, we have found a possible reduction chain. It starts at an operand
1816 /// load and includes the binary operator and @p StoreMA.
1817 ///
1818 /// Note: We allow only one use to ensure the load and binary operator cannot
1819 ///       escape this block or into any other store except @p StoreMA.
1820 void ScopStmt::collectCandiateReductionLoads(
1821     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1822   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1823   if (!Store)
1824     return;
1825
1826   // Skip if there is not one binary operator between the load and the store
1827   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1828   if (!BinOp)
1829     return;
1830
1831   // Skip if the binary operators has multiple uses
1832   if (BinOp->getNumUses() != 1)
1833     return;
1834
1835   // Skip if the opcode of the binary operator is not commutative/associative
1836   if (!BinOp->isCommutative() || !BinOp->isAssociative())
1837     return;
1838
1839   // Skip if the binary operator is outside the current SCoP
1840   if (BinOp->getParent() != Store->getParent())
1841     return;
1842
1843   // Skip if it is a multiplicative reduction and we disabled them
1844   if (DisableMultiplicativeReductions &&
1845       (BinOp->getOpcode() == Instruction::Mul ||
1846        BinOp->getOpcode() == Instruction::FMul))
1847     return;
1848
1849   // Check the binary operator operands for a candidate load
1850   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1851   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1852   if (!PossibleLoad0 && !PossibleLoad1)
1853     return;
1854
1855   // A load is only a candidate if it cannot escape (thus has only this use)
1856   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1857     if (PossibleLoad0->getParent() == Store->getParent())
1858       Loads.push_back(&getArrayAccessFor(PossibleLoad0));
1859   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1860     if (PossibleLoad1->getParent() == Store->getParent())
1861       Loads.push_back(&getArrayAccessFor(PossibleLoad1));
1862 }
1863
1864 /// Check for reductions in this ScopStmt.
1865 ///
1866 /// Iterate over all store memory accesses and check for valid binary reduction
1867 /// like chains. For all candidates we check if they have the same base address
1868 /// and there are no other accesses which overlap with them. The base address
1869 /// check rules out impossible reductions candidates early. The overlap check,
1870 /// together with the "only one user" check in collectCandiateReductionLoads,
1871 /// guarantees that none of the intermediate results will escape during
1872 /// execution of the loop nest. We basically check here that no other memory
1873 /// access can access the same memory as the potential reduction.
1874 void ScopStmt::checkForReductions() {
1875   SmallVector<MemoryAccess *, 2> Loads;
1876   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1877
1878   // First collect candidate load-store reduction chains by iterating over all
1879   // stores and collecting possible reduction loads.
1880   for (MemoryAccess *StoreMA : MemAccs) {
1881     if (StoreMA->isRead())
1882       continue;
1883
1884     Loads.clear();
1885     collectCandiateReductionLoads(StoreMA, Loads);
1886     for (MemoryAccess *LoadMA : Loads)
1887       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1888   }
1889
1890   // Then check each possible candidate pair.
1891   for (const auto &CandidatePair : Candidates) {
1892     bool Valid = true;
1893     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1894     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1895
1896     // Skip those with obviously unequal base addresses.
1897     if (!LoadAccs.has_equal_space(StoreAccs)) {
1898       continue;
1899     }
1900
1901     // And check if the remaining for overlap with other memory accesses.
1902     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1903     AllAccsRel = AllAccsRel.intersect_domain(getDomain());
1904     isl::set AllAccs = AllAccsRel.range();
1905
1906     for (MemoryAccess *MA : MemAccs) {
1907       if (MA == CandidatePair.first || MA == CandidatePair.second)
1908         continue;
1909
1910       isl::map AccRel = MA->getAccessRelation().intersect_domain(getDomain());
1911       isl::set Accs = AccRel.range();
1912
1913       if (AllAccs.has_equal_space(Accs)) {
1914         isl::set OverlapAccs = Accs.intersect(AllAccs);
1915         Valid = Valid && OverlapAccs.is_empty();
1916       }
1917     }
1918
1919     if (!Valid)
1920       continue;
1921
1922     const LoadInst *Load =
1923         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1924     MemoryAccess::ReductionType RT =
1925         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1926
1927     // If no overlapping access was found we mark the load and store as
1928     // reduction like.
1929     CandidatePair.first->markAsReductionLike(RT);
1930     CandidatePair.second->markAsReductionLike(RT);
1931   }
1932 }
1933
1934 std::string ScopStmt::getDomainStr() const { return Domain.to_str(); }
1935
1936 std::string ScopStmt::getScheduleStr() const {
1937   auto *S = getSchedule().release();
1938   if (!S)
1939     return {};
1940   auto Str = stringFromIslObj(S);
1941   isl_map_free(S);
1942   return Str;
1943 }
1944
1945 void ScopStmt::setInvalidDomain(isl::set ID) { InvalidDomain = ID; }
1946
1947 BasicBlock *ScopStmt::getEntryBlock() const {
1948   if (isBlockStmt())
1949     return getBasicBlock();
1950   return getRegion()->getEntry();
1951 }
1952
1953 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1954
1955 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1956
1957 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1958   return NestLoops[Dimension];
1959 }
1960
1961 isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1962
1963 isl::set ScopStmt::getDomain() const { return Domain; }
1964
1965 isl::space ScopStmt::getDomainSpace() const { return Domain.get_space(); }
1966
1967 isl::id ScopStmt::getDomainId() const { return Domain.get_tuple_id(); }
1968
1969 void ScopStmt::printInstructions(raw_ostream &OS) const {
1970   OS << "Instructions {\n";
1971
1972   for (Instruction *Inst : Instructions)
1973     OS.indent(16) << *Inst << "\n";
1974
1975   OS.indent(12) << "}\n";
1976 }
1977
1978 void ScopStmt::print(raw_ostream &OS, bool PrintInstructions) const {
1979   OS << "\t" << getBaseName() << "\n";
1980   OS.indent(12) << "Domain :=\n";
1981
1982   if (Domain) {
1983     OS.indent(16) << getDomainStr() << ";\n";
1984   } else
1985     OS.indent(16) << "n/a\n";
1986
1987   OS.indent(12) << "Schedule :=\n";
1988
1989   if (Domain) {
1990     OS.indent(16) << getScheduleStr() << ";\n";
1991   } else
1992     OS.indent(16) << "n/a\n";
1993
1994   for (MemoryAccess *Access : MemAccs)
1995     Access->print(OS);
1996
1997   if (PrintInstructions && isBlockStmt())
1998     printInstructions(OS.indent(12));
1999 }
2000
2001 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2002 LLVM_DUMP_METHOD void ScopStmt::dump() const { print(dbgs(), true); }
2003 #endif
2004
2005 void ScopStmt::removeAccessData(MemoryAccess *MA) {
2006   if (MA->isRead() && MA->isOriginalValueKind()) {
2007     bool Found = ValueReads.erase(MA->getAccessValue());
2008     (void)Found;
2009     assert(Found && "Expected access data not found");
2010   }
2011   if (MA->isWrite() && MA->isOriginalValueKind()) {
2012     bool Found = ValueWrites.erase(cast<Instruction>(MA->getAccessValue()));
2013     (void)Found;
2014     assert(Found && "Expected access data not found");
2015   }
2016   if (MA->isWrite() && MA->isOriginalAnyPHIKind()) {
2017     bool Found = PHIWrites.erase(cast<PHINode>(MA->getAccessInstruction()));
2018     (void)Found;
2019     assert(Found && "Expected access data not found");
2020   }
2021   if (MA->isRead() && MA->isOriginalAnyPHIKind()) {
2022     bool Found = PHIReads.erase(cast<PHINode>(MA->getAccessInstruction()));
2023     (void)Found;
2024     assert(Found && "Expected access data not found");
2025   }
2026 }
2027
2028 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
2029   // Remove the memory accesses from this statement together with all scalar
2030   // accesses that were caused by it. MemoryKind::Value READs have no access
2031   // instruction, hence would not be removed by this function. However, it is
2032   // only used for invariant LoadInst accesses, its arguments are always affine,
2033   // hence synthesizable, and therefore there are no MemoryKind::Value READ
2034   // accesses to be removed.
2035   auto Predicate = [&](MemoryAccess *Acc) {
2036     return Acc->getAccessInstruction() == MA->getAccessInstruction();
2037   };
2038   for (auto *MA : MemAccs) {
2039     if (Predicate(MA)) {
2040       removeAccessData(MA);
2041       Parent.removeAccessData(MA);
2042     }
2043   }
2044   MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
2045                 MemAccs.end());
2046   InstructionToAccess.erase(MA->getAccessInstruction());
2047 }
2048
2049 void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) {
2050   auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
2051   assert(MAIt != MemAccs.end());
2052   MemAccs.erase(MAIt);
2053
2054   removeAccessData(MA);
2055   Parent.removeAccessData(MA);
2056
2057   auto It = InstructionToAccess.find(MA->getAccessInstruction());
2058   if (It != InstructionToAccess.end()) {
2059     It->second.remove(MA);
2060     if (It->second.empty())
2061       InstructionToAccess.erase(MA->getAccessInstruction());
2062   }
2063 }
2064
2065 MemoryAccess *ScopStmt::ensureValueRead(Value *V) {
2066   MemoryAccess *Access = lookupInputAccessOf(V);
2067   if (Access)
2068     return Access;
2069
2070   ScopArrayInfo *SAI =
2071       Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value);
2072   Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(),
2073                             true, {}, {}, V, MemoryKind::Value);
2074   Parent.addAccessFunction(Access);
2075   Access->buildAccessRelation(SAI);
2076   addAccess(Access);
2077   Parent.addAccessData(Access);
2078   return Access;
2079 }
2080
2081 raw_ostream &polly::operator<<(raw_ostream &OS, const ScopStmt &S) {
2082   S.print(OS, PollyPrintInstructions);
2083   return OS;
2084 }
2085
2086 //===----------------------------------------------------------------------===//
2087 /// Scop class implement
2088
2089 void Scop::setContext(__isl_take isl_set *NewContext) {
2090   NewContext = isl_set_align_params(NewContext, isl_set_get_space(Context));
2091   isl_set_free(Context);
2092   Context = NewContext;
2093 }
2094
2095 namespace {
2096
2097 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
2098 struct SCEVSensitiveParameterRewriter
2099     : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
2100   const ValueToValueMap &VMap;
2101
2102 public:
2103   SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap,
2104                                  ScalarEvolution &SE)
2105       : SCEVRewriteVisitor(SE), VMap(VMap) {}
2106
2107   static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
2108                              const ValueToValueMap &VMap) {
2109     SCEVSensitiveParameterRewriter SSPR(VMap, SE);
2110     return SSPR.visit(E);
2111   }
2112
2113   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
2114     auto *Start = visit(E->getStart());
2115     auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
2116                                     visit(E->getStepRecurrence(SE)),
2117                                     E->getLoop(), SCEV::FlagAnyWrap);
2118     return SE.getAddExpr(Start, AddRec);
2119   }
2120
2121   const SCEV *visitUnknown(const SCEVUnknown *E) {
2122     if (auto *NewValue = VMap.lookup(E->getValue()))
2123       return SE.getUnknown(NewValue);
2124     return E;
2125   }
2126 };
2127
2128 /// Check whether we should remap a SCEV expression.
2129 struct SCEVFindInsideScop : public SCEVTraversal<SCEVFindInsideScop> {
2130   const ValueToValueMap &VMap;
2131   bool FoundInside = false;
2132   const Scop *S;
2133
2134 public:
2135   SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE,
2136                      const Scop *S)
2137       : SCEVTraversal(*this), VMap(VMap), S(S) {}
2138
2139   static bool hasVariant(const SCEV *E, ScalarEvolution &SE,
2140                          const ValueToValueMap &VMap, const Scop *S) {
2141     SCEVFindInsideScop SFIS(VMap, SE, S);
2142     SFIS.visitAll(E);
2143     return SFIS.FoundInside;
2144   }
2145
2146   bool follow(const SCEV *E) {
2147     if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E)) {
2148       FoundInside |= S->getRegion().contains(AddRec->getLoop());
2149     } else if (auto *Unknown = dyn_cast<SCEVUnknown>(E)) {
2150       if (Instruction *I = dyn_cast<Instruction>(Unknown->getValue()))
2151         FoundInside |= S->getRegion().contains(I) && !VMap.count(I);
2152     }
2153     return !FoundInside;
2154   }
2155
2156   bool isDone() { return FoundInside; }
2157 };
2158
2159 } // end anonymous namespace
2160
2161 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const {
2162   // Check whether it makes sense to rewrite the SCEV.  (ScalarEvolution
2163   // doesn't like addition between an AddRec and an expression that
2164   // doesn't have a dominance relationship with it.)
2165   if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this))
2166     return E;
2167
2168   // Rewrite SCEV.
2169   return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
2170 }
2171
2172 // This table of function names is used to translate parameter names in more
2173 // human-readable names. This makes it easier to interpret Polly analysis
2174 // results.
2175 StringMap<std::string> KnownNames = {
2176     {"_Z13get_global_idj", "global_id"},
2177     {"_Z12get_local_idj", "local_id"},
2178     {"_Z15get_global_sizej", "global_size"},
2179     {"_Z14get_local_sizej", "local_size"},
2180     {"_Z12get_work_dimv", "work_dim"},
2181     {"_Z17get_global_offsetj", "global_offset"},
2182     {"_Z12get_group_idj", "group_id"},
2183     {"_Z14get_num_groupsj", "num_groups"},
2184 };
2185
2186 static std::string getCallParamName(CallInst *Call) {
2187   std::string Result;
2188   raw_string_ostream OS(Result);
2189   std::string Name = Call->getCalledFunction()->getName();
2190
2191   auto Iterator = KnownNames.find(Name);
2192   if (Iterator != KnownNames.end())
2193     Name = "__" + Iterator->getValue();
2194   OS << Name;
2195   for (auto &Operand : Call->arg_operands()) {
2196     ConstantInt *Op = cast<ConstantInt>(&Operand);
2197     OS << "_" << Op->getValue();
2198   }
2199   OS.flush();
2200   return Result;
2201 }
2202
2203 void Scop::createParameterId(const SCEV *Parameter) {
2204   assert(Parameters.count(Parameter));
2205   assert(!ParameterIds.count(Parameter));
2206
2207   std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
2208
2209   if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
2210     Value *Val = ValueParameter->getValue();
2211     CallInst *Call = dyn_cast<CallInst>(Val);
2212
2213     if (Call && isConstCall(Call)) {
2214       ParameterName = getCallParamName(Call);
2215     } else if (UseInstructionNames) {
2216       // If this parameter references a specific Value and this value has a name
2217       // we use this name as it is likely to be unique and more useful than just
2218       // a number.
2219       if (Val->hasName())
2220         ParameterName = Val->getName();
2221       else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) {
2222         auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
2223         if (LoadOrigin->hasName()) {
2224           ParameterName += "_loaded_from_";
2225           ParameterName +=
2226               LI->getPointerOperand()->stripInBoundsOffsets()->getName();
2227         }
2228       }
2229     }
2230
2231     ParameterName = getIslCompatibleName("", ParameterName, "");
2232   }
2233
2234   isl::id Id = isl::id::alloc(getIslCtx(), ParameterName,
2235                               const_cast<void *>((const void *)Parameter));
2236   ParameterIds[Parameter] = Id;
2237 }
2238
2239 void Scop::addParams(const ParameterSetTy &NewParameters) {
2240   for (const SCEV *Parameter : NewParameters) {
2241     // Normalize the SCEV to get the representing element for an invariant load.
2242     Parameter = extractConstantFactor(Parameter, *SE).second;
2243     Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2244
2245     if (Parameters.insert(Parameter))
2246       createParameterId(Parameter);
2247   }
2248 }
2249
2250 isl::id Scop::getIdForParam(const SCEV *Parameter) const {
2251   // Normalize the SCEV to get the representing element for an invariant load.
2252   Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2253   return ParameterIds.lookup(Parameter);
2254 }
2255
2256 isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
2257   isl_set *DomainContext = isl_union_set_params(getDomains().release());
2258   return isl::manage(isl_set_intersect_params(C.release(), DomainContext));
2259 }
2260
2261 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
2262   return DT.dominates(BB, getEntry());
2263 }
2264
2265 void Scop::addUserAssumptions(
2266     AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
2267     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2268   for (auto &Assumption : AC.assumptions()) {
2269     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
2270     if (!CI || CI->getNumArgOperands() != 1)
2271       continue;
2272
2273     bool InScop = contains(CI);
2274     if (!InScop && !isDominatedBy(DT, CI->getParent()))
2275       continue;
2276
2277     auto *L = LI.getLoopFor(CI->getParent());
2278     auto *Val = CI->getArgOperand(0);
2279     ParameterSetTy DetectedParams;
2280     if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) {
2281       ORE.emit(
2282           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
2283           << "Non-affine user assumption ignored.");
2284       continue;
2285     }
2286
2287     // Collect all newly introduced parameters.
2288     ParameterSetTy NewParams;
2289     for (auto *Param : DetectedParams) {
2290       Param = extractConstantFactor(Param, *SE).second;
2291       Param = getRepresentingInvariantLoadSCEV(Param);
2292       if (Parameters.count(Param))
2293         continue;
2294       NewParams.insert(Param);
2295     }
2296
2297     SmallVector<isl_set *, 2> ConditionSets;
2298     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
2299     BasicBlock *BB = InScop ? CI->getParent() : getRegion().getEntry();
2300     auto *Dom = InScop ? DomainMap[BB].copy() : isl_set_copy(Context);
2301     assert(Dom && "Cannot propagate a nullptr.");
2302     bool Valid = buildConditionSets(*this, BB, Val, TI, L, Dom,
2303                                     InvalidDomainMap, ConditionSets);
2304     isl_set_free(Dom);
2305
2306     if (!Valid)
2307       continue;
2308
2309     isl_set *AssumptionCtx = nullptr;
2310     if (InScop) {
2311       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
2312       isl_set_free(ConditionSets[0]);
2313     } else {
2314       AssumptionCtx = isl_set_complement(ConditionSets[1]);
2315       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
2316     }
2317
2318     // Project out newly introduced parameters as they are not otherwise useful.
2319     if (!NewParams.empty()) {
2320       for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
2321         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2322         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2323         isl_id_free(Id);
2324
2325         if (!NewParams.count(Param))
2326           continue;
2327
2328         AssumptionCtx =
2329             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2330       }
2331     }
2332     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
2333              << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
2334     Context = isl_set_intersect(Context, AssumptionCtx);
2335   }
2336 }
2337
2338 void Scop::addUserContext() {
2339   if (UserContextStr.empty())
2340     return;
2341
2342   isl_set *UserContext =
2343       isl_set_read_from_str(getIslCtx(), UserContextStr.c_str());
2344   isl_space *Space = getParamSpace().release();
2345   if (isl_space_dim(Space, isl_dim_param) !=
2346       isl_set_dim(UserContext, isl_dim_param)) {
2347     auto SpaceStr = isl_space_to_str(Space);
2348     errs() << "Error: the context provided in -polly-context has not the same "
2349            << "number of dimensions than the computed context. Due to this "
2350            << "mismatch, the -polly-context option is ignored. Please provide "
2351            << "the context in the parameter space: " << SpaceStr << ".\n";
2352     free(SpaceStr);
2353     isl_set_free(UserContext);
2354     isl_space_free(Space);
2355     return;
2356   }
2357
2358   for (unsigned i = 0; i < isl_space_dim(Space, isl_dim_param); i++) {
2359     auto *NameContext = isl_set_get_dim_name(Context, isl_dim_param, i);
2360     auto *NameUserContext = isl_set_get_dim_name(UserContext, isl_dim_param, i);
2361
2362     if (strcmp(NameContext, NameUserContext) != 0) {
2363       auto SpaceStr = isl_space_to_str(Space);
2364       errs() << "Error: the name of dimension " << i
2365              << " provided in -polly-context "
2366              << "is '" << NameUserContext << "', but the name in the computed "
2367              << "context is '" << NameContext
2368              << "'. Due to this name mismatch, "
2369              << "the -polly-context option is ignored. Please provide "
2370              << "the context in the parameter space: " << SpaceStr << ".\n";
2371       free(SpaceStr);
2372       isl_set_free(UserContext);
2373       isl_space_free(Space);
2374       return;
2375     }
2376
2377     UserContext =
2378         isl_set_set_dim_id(UserContext, isl_dim_param, i,
2379                            isl_space_get_dim_id(Space, isl_dim_param, i));
2380   }
2381
2382   Context = isl_set_intersect(Context, UserContext);
2383   isl_space_free(Space);
2384 }
2385
2386 void Scop::buildInvariantEquivalenceClasses() {
2387   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2388
2389   const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2390   for (LoadInst *LInst : RIL) {
2391     const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2392
2393     Type *Ty = LInst->getType();
2394     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2395     if (ClassRep) {
2396       InvEquivClassVMap[LInst] = ClassRep;
2397       continue;
2398     }
2399
2400     ClassRep = LInst;
2401     InvariantEquivClasses.emplace_back(
2402         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2403   }
2404 }
2405
2406 void Scop::buildContext() {
2407   isl_space *Space = isl_space_params_alloc(getIslCtx(), 0);
2408   Context = isl_set_universe(isl_space_copy(Space));
2409   InvalidContext = isl_set_empty(isl_space_copy(Space));
2410   AssumedContext = isl_set_universe(Space);
2411 }
2412
2413 void Scop::addParameterBounds() {
2414   unsigned PDim = 0;
2415   for (auto *Parameter : Parameters) {
2416     ConstantRange SRange = SE->getSignedRange(Parameter);
2417     Context =
2418         addRangeBoundsToSet(give(Context), SRange, PDim++, isl::dim::param)
2419             .release();
2420   }
2421 }
2422
2423 static std::vector<isl::id> getFortranArrayIds(Scop::array_range Arrays) {
2424   std::vector<isl::id> OutermostSizeIds;
2425   for (auto Array : Arrays) {
2426     // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2427     // for its outermost dimension. Fortran arrays will have this since the
2428     // outermost dimension size can be picked up from their runtime description.
2429     // TODO: actually need to check if it has a FAD, but for now this works.
2430     if (Array->getNumberOfDimensions() > 0) {
2431       isl::pw_aff PwAff = Array->getDimensionSizePw(0);
2432       if (!PwAff)
2433         continue;
2434
2435       isl::id Id =
2436           isl::manage(isl_pw_aff_get_dim_id(PwAff.get(), isl_dim_param, 0));
2437       assert(!Id.is_null() &&
2438              "Invalid Id for PwAff expression in Fortran array");
2439       Id.dump();
2440       OutermostSizeIds.push_back(Id);
2441     }
2442   }
2443   return OutermostSizeIds;
2444 }
2445
2446 // The FORTRAN array size parameters are known to be non-negative.
2447 static isl_set *boundFortranArrayParams(__isl_give isl_set *Context,
2448                                         Scop::array_range Arrays) {
2449   std::vector<isl::id> OutermostSizeIds;
2450   OutermostSizeIds = getFortranArrayIds(Arrays);
2451
2452   for (isl::id Id : OutermostSizeIds) {
2453     int dim = isl_set_find_dim_by_id(Context, isl_dim_param, Id.get());
2454     Context = isl_set_lower_bound_si(Context, isl_dim_param, dim, 0);
2455   }
2456
2457   return Context;
2458 }
2459
2460 void Scop::realignParams() {
2461   if (PollyIgnoreParamBounds)
2462     return;
2463
2464   // Add all parameters into a common model.
2465   isl::space Space = getFullParamSpace();
2466
2467   // Align the parameters of all data structures to the model.
2468   Context = isl_set_align_params(Context, Space.copy());
2469
2470   // Bound the size of the fortran array dimensions.
2471   Context = boundFortranArrayParams(Context, arrays());
2472
2473   // As all parameters are known add bounds to them.
2474   addParameterBounds();
2475
2476   for (ScopStmt &Stmt : *this)
2477     Stmt.realignParams();
2478   // Simplify the schedule according to the context too.
2479   Schedule = isl_schedule_gist_domain_params(Schedule, getContext().release());
2480 }
2481
2482 static __isl_give isl_set *
2483 simplifyAssumptionContext(__isl_take isl_set *AssumptionContext,
2484                           const Scop &S) {
2485   // If we have modeled all blocks in the SCoP that have side effects we can
2486   // simplify the context with the constraints that are needed for anything to
2487   // be executed at all. However, if we have error blocks in the SCoP we already
2488   // assumed some parameter combinations cannot occur and removed them from the
2489   // domains, thus we cannot use the remaining domain to simplify the
2490   // assumptions.
2491   if (!S.hasErrorBlock()) {
2492     isl_set *DomainParameters = isl_union_set_params(S.getDomains().release());
2493     AssumptionContext =
2494         isl_set_gist_params(AssumptionContext, DomainParameters);
2495   }
2496
2497   AssumptionContext =
2498       isl_set_gist_params(AssumptionContext, S.getContext().release());
2499   return AssumptionContext;
2500 }
2501
2502 void Scop::simplifyContexts() {
2503   // The parameter constraints of the iteration domains give us a set of
2504   // constraints that need to hold for all cases where at least a single
2505   // statement iteration is executed in the whole scop. We now simplify the
2506   // assumed context under the assumption that such constraints hold and at
2507   // least a single statement iteration is executed. For cases where no
2508   // statement instances are executed, the assumptions we have taken about
2509   // the executed code do not matter and can be changed.
2510   //
2511   // WARNING: This only holds if the assumptions we have taken do not reduce
2512   //          the set of statement instances that are executed. Otherwise we
2513   //          may run into a case where the iteration domains suggest that
2514   //          for a certain set of parameter constraints no code is executed,
2515   //          but in the original program some computation would have been
2516   //          performed. In such a case, modifying the run-time conditions and
2517   //          possibly influencing the run-time check may cause certain scops
2518   //          to not be executed.
2519   //
2520   // Example:
2521   //
2522   //   When delinearizing the following code:
2523   //
2524   //     for (long i = 0; i < 100; i++)
2525   //       for (long j = 0; j < m; j++)
2526   //         A[i+p][j] = 1.0;
2527   //
2528   //   we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2529   //   otherwise we would access out of bound data. Now, knowing that code is
2530   //   only executed for the case m >= 0, it is sufficient to assume p >= 0.
2531   AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2532   InvalidContext =
2533       isl_set_align_params(InvalidContext, getParamSpace().release());
2534 }
2535
2536 /// Add the minimal/maximal access in @p Set to @p User.
2537 static isl::stat
2538 buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2539   isl::pw_multi_aff MinPMA, MaxPMA;
2540   isl::pw_aff LastDimAff;
2541   isl::aff OneAff;
2542   unsigned Pos;
2543   isl::ctx Ctx = Set.get_ctx();
2544
2545   Set = Set.remove_divs();
2546
2547   if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain)
2548     return isl::stat::error;
2549
2550   // Restrict the number of parameters involved in the access as the lexmin/
2551   // lexmax computation will take too long if this number is high.
2552   //
2553   // Experiments with a simple test case using an i7 4800MQ:
2554   //
2555   //  #Parameters involved | Time (in sec)
2556   //            6          |     0.01
2557   //            7          |     0.04
2558   //            8          |     0.12
2559   //            9          |     0.40
2560   //           10          |     1.54
2561   //           11          |     6.78
2562   //           12          |    30.38
2563   //
2564   if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2565     unsigned InvolvedParams = 0;
2566     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
2567       if (Set.involves_dims(isl::dim::param, u, 1))
2568         InvolvedParams++;
2569
2570     if (InvolvedParams > RunTimeChecksMaxParameters)
2571       return isl::stat::error;
2572   }
2573
2574   if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts)
2575     return isl::stat::error;
2576
2577   MinPMA = Set.lexmin_pw_multi_aff();
2578   MaxPMA = Set.lexmax_pw_multi_aff();
2579
2580   if (isl_ctx_last_error(Ctx.get()) == isl_error_quota)
2581     return isl::stat::error;
2582
2583   MinPMA = MinPMA.coalesce();
2584   MaxPMA = MaxPMA.coalesce();
2585
2586   // Adjust the last dimension of the maximal access by one as we want to
2587   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2588   // we test during code generation might now point after the end of the
2589   // allocated array but we will never dereference it anyway.
2590   assert(MaxPMA.dim(isl::dim::out) && "Assumed at least one output dimension");
2591   Pos = MaxPMA.dim(isl::dim::out) - 1;
2592   LastDimAff = MaxPMA.get_pw_aff(Pos);
2593   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2594   OneAff = OneAff.add_constant_si(1);
2595   LastDimAff = LastDimAff.add(OneAff);
2596   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2597
2598   MinMaxAccesses.push_back(std::make_pair(MinPMA.copy(), MaxPMA.copy()));
2599
2600   return isl::stat::ok;
2601 }
2602
2603 static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) {
2604   isl_set *Domain = MA->getStatement()->getDomain().release();
2605   Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain));
2606   return isl_set_reset_tuple_id(Domain);
2607 }
2608
2609 /// Wrapper function to calculate minimal/maximal accesses to each array.
2610 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2611                                   Scop::MinMaxVectorTy &MinMaxAccesses) {
2612   MinMaxAccesses.reserve(AliasGroup.size());
2613
2614   isl::union_set Domains = S.getDomains();
2615   isl::union_map Accesses = isl::union_map::empty(S.getParamSpace());
2616
2617   for (MemoryAccess *MA : AliasGroup)
2618     Accesses = Accesses.add_map(give(MA->getAccessRelation().release()));
2619
2620   Accesses = Accesses.intersect_domain(Domains);
2621   isl::union_set Locations = Accesses.range();
2622   Locations = Locations.coalesce();
2623   Locations = Locations.detect_equalities();
2624
2625   auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat {
2626     return buildMinMaxAccess(Set, MinMaxAccesses, S);
2627   };
2628   return Locations.foreach_set(Lambda) == isl::stat::ok;
2629 }
2630
2631 /// Helper to treat non-affine regions and basic blocks the same.
2632 ///
2633 ///{
2634
2635 /// Return the block that is the representing block for @p RN.
2636 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2637   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2638                            : RN->getNodeAs<BasicBlock>();
2639 }
2640
2641 /// Return the @p idx'th block that is executed after @p RN.
2642 static inline BasicBlock *
2643 getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) {
2644   if (RN->isSubRegion()) {
2645     assert(idx == 0);
2646     return RN->getNodeAs<Region>()->getExit();
2647   }
2648   return TI->getSuccessor(idx);
2649 }
2650
2651 /// Return the smallest loop surrounding @p RN.
2652 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2653   if (!RN->isSubRegion()) {
2654     BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2655     Loop *L = LI.getLoopFor(BB);
2656
2657     // Unreachable statements are not considered to belong to a LLVM loop, as
2658     // they are not part of an actual loop in the control flow graph.
2659     // Nevertheless, we handle certain unreachable statements that are common
2660     // when modeling run-time bounds checks as being part of the loop to be
2661     // able to model them and to later eliminate the run-time bounds checks.
2662     //
2663     // Specifically, for basic blocks that terminate in an unreachable and
2664     // where the immediate predecessor is part of a loop, we assume these
2665     // basic blocks belong to the loop the predecessor belongs to. This
2666     // allows us to model the following code.
2667     //
2668     // for (i = 0; i < N; i++) {
2669     //   if (i > 1024)
2670     //     abort();            <- this abort might be translated to an
2671     //                            unreachable
2672     //
2673     //   A[i] = ...
2674     // }
2675     if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
2676       L = LI.getLoopFor(BB->getPrevNode());
2677     return L;
2678   }
2679
2680   Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2681   Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2682   while (L && NonAffineSubRegion->contains(L))
2683     L = L->getParentLoop();
2684   return L;
2685 }
2686
2687 /// Get the number of blocks in @p L.
2688 ///
2689 /// The number of blocks in a loop are the number of basic blocks actually
2690 /// belonging to the loop, as well as all single basic blocks that the loop
2691 /// exits to and which terminate in an unreachable instruction. We do not
2692 /// allow such basic blocks in the exit of a scop, hence they belong to the
2693 /// scop and represent run-time conditions which we want to model and
2694 /// subsequently speculate away.
2695 ///
2696 /// @see getRegionNodeLoop for additional details.
2697 unsigned getNumBlocksInLoop(Loop *L) {
2698   unsigned NumBlocks = L->getNumBlocks();
2699   SmallVector<BasicBlock *, 4> ExitBlocks;
2700   L->getExitBlocks(ExitBlocks);
2701
2702   for (auto ExitBlock : ExitBlocks) {
2703     if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2704       NumBlocks++;
2705   }
2706   return NumBlocks;
2707 }
2708
2709 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2710   if (!RN->isSubRegion())
2711     return 1;
2712
2713   Region *R = RN->getNodeAs<Region>();
2714   return std::distance(R->block_begin(), R->block_end());
2715 }
2716
2717 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2718                                const DominatorTree &DT) {
2719   if (!RN->isSubRegion())
2720     return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2721   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2722     if (isErrorBlock(*BB, R, LI, DT))
2723       return true;
2724   return false;
2725 }
2726
2727 ///}
2728
2729 static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain,
2730                                                  unsigned Dim, Loop *L) {
2731   Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1);
2732   isl_id *DimId =
2733       isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L));
2734   return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId);
2735 }
2736
2737 isl::set Scop::getDomainConditions(const ScopStmt *Stmt) const {
2738   return getDomainConditions(Stmt->getEntryBlock());
2739 }
2740
2741 isl::set Scop::getDomainConditions(BasicBlock *BB) const {
2742   auto DIt = DomainMap.find(BB);
2743   if (DIt != DomainMap.end())
2744     return DIt->getSecond();
2745
2746   auto &RI = *R.getRegionInfo();
2747   auto *BBR = RI.getRegionFor(BB);
2748   while (BBR->getEntry() == BB)
2749     BBR = BBR->getParent();
2750   return getDomainConditions(BBR->getEntry());
2751 }
2752
2753 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI,
2754                         DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2755   bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2756   auto *EntryBB = R->getEntry();
2757   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
2758   int LD = getRelativeLoopDepth(L);
2759   auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD + 1));
2760
2761   while (LD-- >= 0) {
2762     S = addDomainDimId(S, LD + 1, L);
2763     L = L->getParentLoop();
2764   }
2765
2766   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
2767   DomainMap[EntryBB] = isl::manage(S);
2768
2769   if (IsOnlyNonAffineRegion)
2770     return !containsErrorBlock(R->getNode(), *R, LI, DT);
2771
2772   if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap))
2773     return false;
2774
2775   if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap))
2776     return false;
2777
2778   // Error blocks and blocks dominated by them have been assumed to never be
2779   // executed. Representing them in the Scop does not add any value. In fact,
2780   // it is likely to cause issues during construction of the ScopStmts. The
2781   // contents of error blocks have not been verified to be expressible and
2782   // will cause problems when building up a ScopStmt for them.
2783   // Furthermore, basic blocks dominated by error blocks may reference
2784   // instructions in the error block which, if the error block is not modeled,
2785   // can themselves not be constructed properly. To this end we will replace
2786   // the domains of error blocks and those only reachable via error blocks
2787   // with an empty set. Additionally, we will record for each block under which
2788   // parameter combination it would be reached via an error block in its
2789   // InvalidDomain. This information is needed during load hoisting.
2790   if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap))
2791     return false;
2792
2793   return true;
2794 }
2795
2796 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2797 ///        to be compatible to domains constructed for loop @p NewL.
2798 ///
2799 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2800 /// edge from @p OldL to @p NewL.
2801 static __isl_give isl_set *adjustDomainDimensions(Scop &S,
2802                                                   __isl_take isl_set *Dom,
2803                                                   Loop *OldL, Loop *NewL) {
2804   // If the loops are the same there is nothing to do.
2805   if (NewL == OldL)
2806     return Dom;
2807
2808   int OldDepth = S.getRelativeLoopDepth(OldL);
2809   int NewDepth = S.getRelativeLoopDepth(NewL);
2810   // If both loops are non-affine loops there is nothing to do.
2811   if (OldDepth == -1 && NewDepth == -1)
2812     return Dom;
2813
2814   // Distinguish three cases:
2815   //   1) The depth is the same but the loops are not.
2816   //      => One loop was left one was entered.
2817   //   2) The depth increased from OldL to NewL.
2818   //      => One loop was entered, none was left.
2819   //   3) The depth decreased from OldL to NewL.
2820   //      => Loops were left were difference of the depths defines how many.
2821   if (OldDepth == NewDepth) {
2822     assert(OldL->getParentLoop() == NewL->getParentLoop());
2823     Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1);
2824     Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2825     Dom = addDomainDimId(Dom, NewDepth, NewL);
2826   } else if (OldDepth < NewDepth) {
2827     assert(OldDepth + 1 == NewDepth);
2828     auto &R = S.getRegion();
2829     (void)R;
2830     assert(NewL->getParentLoop() == OldL ||
2831            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2832     Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2833     Dom = addDomainDimId(Dom, NewDepth, NewL);
2834   } else {
2835     assert(OldDepth > NewDepth);
2836     int Diff = OldDepth - NewDepth;
2837     int NumDim = isl_set_n_dim(Dom);
2838     assert(NumDim >= Diff);
2839     Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff);
2840   }
2841
2842   return Dom;
2843 }
2844
2845 bool Scop::propagateInvalidStmtDomains(
2846     Region *R, DominatorTree &DT, LoopInfo &LI,
2847     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2848   ReversePostOrderTraversal<Region *> RTraversal(R);
2849   for (auto *RN : RTraversal) {
2850
2851     // Recurse for affine subregions but go on for basic blocks and non-affine
2852     // subregions.
2853     if (RN->isSubRegion()) {
2854       Region *SubRegion = RN->getNodeAs<Region>();
2855       if (!isNonAffineSubRegion(SubRegion)) {
2856         propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap);
2857         continue;
2858       }
2859     }
2860
2861     bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2862     BasicBlock *BB = getRegionNodeBasicBlock(RN);
2863     isl::set &Domain = DomainMap[BB];
2864     assert(Domain && "Cannot propagate a nullptr");
2865
2866     isl::set InvalidDomain = InvalidDomainMap[BB];
2867
2868     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
2869
2870     if (!IsInvalidBlock) {
2871       InvalidDomain = InvalidDomain.intersect(Domain);
2872     } else {
2873       InvalidDomain = Domain;
2874       isl::set DomPar = Domain.params();
2875       recordAssumption(ERRORBLOCK, DomPar.release(),
2876                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
2877       Domain = nullptr;
2878     }
2879
2880     if (InvalidDomain.is_empty()) {
2881       InvalidDomainMap[BB] = InvalidDomain;
2882       continue;
2883     }
2884
2885     auto *BBLoop = getRegionNodeLoop(RN, LI);
2886     auto *TI = BB->getTerminator();
2887     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
2888     for (unsigned u = 0; u < NumSuccs; u++) {
2889       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2890
2891       // Skip successors outside the SCoP.
2892       if (!contains(SuccBB))
2893         continue;
2894
2895       // Skip backedges.
2896       if (DT.dominates(SuccBB, BB))
2897         continue;
2898
2899       Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2900
2901       auto *AdjustedInvalidDomain = adjustDomainDimensions(
2902           *this, InvalidDomain.copy(), BBLoop, SuccBBLoop);
2903
2904       auto *SuccInvalidDomain = InvalidDomainMap[SuccBB].copy();
2905       SuccInvalidDomain =
2906           isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain);
2907       SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain);
2908       unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain);
2909
2910       InvalidDomainMap[SuccBB] = isl::manage(SuccInvalidDomain);
2911
2912       // Check if the maximal number of domain disjunctions was reached.
2913       // In case this happens we will bail.
2914       if (NumConjucts < MaxDisjunctsInDomain)
2915         continue;
2916
2917       InvalidDomainMap.erase(BB);
2918       invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
2919       return false;
2920     }
2921
2922     InvalidDomainMap[BB] = InvalidDomain;
2923   }
2924
2925   return true;
2926 }
2927
2928 void Scop::propagateDomainConstraintsToRegionExit(
2929     BasicBlock *BB, Loop *BBLoop,
2930     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI,
2931     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2932   // Check if the block @p BB is the entry of a region. If so we propagate it's
2933   // domain to the exit block of the region. Otherwise we are done.
2934   auto *RI = R.getRegionInfo();
2935   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
2936   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
2937   if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB))
2938     return;
2939
2940   // Do not propagate the domain if there is a loop backedge inside the region
2941   // that would prevent the exit block from being executed.
2942   auto *L = BBLoop;
2943   while (L && contains(L)) {
2944     SmallVector<BasicBlock *, 4> LatchBBs;
2945     BBLoop->getLoopLatches(LatchBBs);
2946     for (auto *LatchBB : LatchBBs)
2947       if (BB != LatchBB && BBReg->contains(LatchBB))
2948         return;
2949     L = L->getParentLoop();
2950   }
2951
2952   isl::set Domain = DomainMap[BB];
2953   assert(Domain && "Cannot propagate a nullptr");
2954
2955   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops());
2956
2957   // Since the dimensions of @p BB and @p ExitBB might be different we have to
2958   // adjust the domain before we can propagate it.
2959   isl::set AdjustedDomain = isl::manage(
2960       adjustDomainDimensions(*this, Domain.copy(), BBLoop, ExitBBLoop));
2961   isl::set &ExitDomain = DomainMap[ExitBB];
2962
2963   // If the exit domain is not yet created we set it otherwise we "add" the
2964   // current domain.
2965   ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
2966
2967   // Initialize the invalid domain.
2968   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
2969
2970   FinishedExitBlocks.insert(ExitBB);
2971 }
2972
2973 bool Scop::buildDomainsWithBranchConstraints(
2974     Region *R, DominatorTree &DT, LoopInfo &LI,
2975     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2976   // To create the domain for each block in R we iterate over all blocks and
2977   // subregions in R and propagate the conditions under which the current region
2978   // element is executed. To this end we iterate in reverse post order over R as
2979   // it ensures that we first visit all predecessors of a region node (either a
2980   // basic block or a subregion) before we visit the region node itself.
2981   // Initially, only the domain for the SCoP region entry block is set and from
2982   // there we propagate the current domain to all successors, however we add the
2983   // condition that the successor is actually executed next.
2984   // As we are only interested in non-loop carried constraints here we can
2985   // simply skip loop back edges.
2986
2987   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2988   ReversePostOrderTraversal<Region *> RTraversal(R);
2989   for (auto *RN : RTraversal) {
2990     // Recurse for affine subregions but go on for basic blocks and non-affine
2991     // subregions.
2992     if (RN->isSubRegion()) {
2993       Region *SubRegion = RN->getNodeAs<Region>();
2994       if (!isNonAffineSubRegion(SubRegion)) {
2995         if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI,
2996                                                InvalidDomainMap))
2997           return false;
2998         continue;
2999       }
3000     }
3001
3002     if (containsErrorBlock(RN, getRegion(), LI, DT))
3003       HasErrorBlock = true;
3004
3005     BasicBlock *BB = getRegionNodeBasicBlock(RN);
3006     TerminatorInst *TI = BB->getTerminator();
3007
3008     if (isa<UnreachableInst>(TI))
3009       continue;
3010
3011     isl::set Domain = DomainMap.lookup(BB);
3012     if (!Domain)
3013       continue;
3014     MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get()));
3015
3016     auto *BBLoop = getRegionNodeLoop(RN, LI);
3017     // Propagate the domain from BB directly to blocks that have a superset
3018     // domain, at the moment only region exit nodes of regions that start in BB.
3019     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI,
3020                                            InvalidDomainMap);
3021
3022     // If all successors of BB have been set a domain through the propagation
3023     // above we do not need to build condition sets but can just skip this
3024     // block. However, it is important to note that this is a local property
3025     // with regards to the region @p R. To this end FinishedExitBlocks is a
3026     // local variable.
3027     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
3028       return FinishedExitBlocks.count(SuccBB);
3029     };
3030     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
3031       continue;
3032
3033     // Build the condition sets for the successor nodes of the current region
3034     // node. If it is a non-affine subregion we will always execute the single
3035     // exit node, hence the single entry node domain is the condition set. For
3036     // basic blocks we use the helper function buildConditionSets.
3037     SmallVector<isl_set *, 8> ConditionSets;
3038     if (RN->isSubRegion())
3039       ConditionSets.push_back(Domain.copy());
3040     else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(),
3041                                  InvalidDomainMap, ConditionSets))
3042       return false;
3043
3044     // Now iterate over the successors and set their initial domain based on
3045     // their condition set. We skip back edges here and have to be careful when
3046     // we leave a loop not to keep constraints over a dimension that doesn't
3047     // exist anymore.
3048     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
3049     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
3050       isl::set CondSet = isl::manage(ConditionSets[u]);
3051       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
3052
3053       // Skip blocks outside the region.
3054       if (!contains(SuccBB))
3055         continue;
3056
3057       // If we propagate the domain of some block to "SuccBB" we do not have to
3058       // adjust the domain.
3059       if (FinishedExitBlocks.count(SuccBB))
3060         continue;
3061
3062       // Skip back edges.
3063       if (DT.dominates(SuccBB, BB))
3064         continue;
3065
3066       Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
3067
3068       CondSet = isl::manage(
3069           adjustDomainDimensions(*this, CondSet.copy(), BBLoop, SuccBBLoop));
3070
3071       // Set the domain for the successor or merge it with an existing domain in
3072       // case there are multiple paths (without loop back edges) to the
3073       // successor block.
3074       isl::set &SuccDomain = DomainMap[SuccBB];
3075
3076       if (SuccDomain) {
3077         SuccDomain = SuccDomain.unite(CondSet).coalesce();
3078       } else {
3079         // Initialize the invalid domain.
3080         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
3081         SuccDomain = CondSet;
3082       }
3083
3084       SuccDomain = SuccDomain.detect_equalities();
3085
3086       // Check if the maximal number of domain disjunctions was reached.
3087       // In case this happens we will clean up and bail.
3088       if (isl_set_n_basic_set(SuccDomain.get()) < MaxDisjunctsInDomain)
3089         continue;
3090
3091       invalidate(COMPLEXITY, DebugLoc());
3092       while (++u < ConditionSets.size())
3093         isl_set_free(ConditionSets[u]);
3094       return false;
3095     }
3096   }
3097
3098   return true;
3099 }
3100
3101 isl::set Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain,
3102                                                DominatorTree &DT,
3103                                                LoopInfo &LI) {
3104   // If @p BB is the ScopEntry we are done
3105   if (R.getEntry() == BB)
3106     return isl::set::universe(Domain.get_space());
3107
3108   // The region info of this function.
3109   auto &RI = *R.getRegionInfo();
3110
3111   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops());
3112
3113   // A domain to collect all predecessor domains, thus all conditions under
3114   // which the block is executed. To this end we start with the empty domain.
3115   isl::set PredDom = isl::set::empty(Domain.get_space());
3116
3117   // Set of regions of which the entry block domain has been propagated to BB.
3118   // all predecessors inside any of the regions can be skipped.
3119   SmallSet<Region *, 8> PropagatedRegions;
3120
3121   for (auto *PredBB : predecessors(BB)) {
3122     // Skip backedges.
3123     if (DT.dominates(BB, PredBB))
3124       continue;
3125
3126     // If the predecessor is in a region we used for propagation we can skip it.
3127     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
3128     if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
3129                     PredBBInRegion)) {
3130       continue;
3131     }
3132
3133     // Check if there is a valid region we can use for propagation, thus look
3134     // for a region that contains the predecessor and has @p BB as exit block.
3135     auto *PredR = RI.getRegionFor(PredBB);
3136     while (PredR->getExit() != BB && !PredR->contains(BB))
3137       PredR->getParent();
3138
3139     // If a valid region for propagation was found use the entry of that region
3140     // for propagation, otherwise the PredBB directly.
3141     if (PredR->getExit() == BB) {
3142       PredBB = PredR->getEntry();
3143       PropagatedRegions.insert(PredR);
3144     }
3145
3146     auto *PredBBDom = getDomainConditions(PredBB).release();
3147     Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops());
3148
3149     PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
3150
3151     PredDom = PredDom.unite(isl::manage(PredBBDom));
3152   }
3153
3154   return PredDom;
3155 }
3156
3157 bool Scop::propagateDomainConstraints(
3158     Region *R, DominatorTree &DT, LoopInfo &LI,
3159     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3160   // Iterate over the region R and propagate the domain constrains from the
3161   // predecessors to the current node. In contrast to the
3162   // buildDomainsWithBranchConstraints function, this one will pull the domain
3163   // information from the predecessors instead of pushing it to the successors.
3164   // Additionally, we assume the domains to be already present in the domain
3165   // map here. However, we iterate again in reverse post order so we know all
3166   // predecessors have been visited before a block or non-affine subregion is
3167   // visited.
3168
3169   ReversePostOrderTraversal<Region *> RTraversal(R);
3170   for (auto *RN : RTraversal) {
3171     // Recurse for affine subregions but go on for basic blocks and non-affine
3172     // subregions.
3173     if (RN->isSubRegion()) {
3174       Region *SubRegion = RN->getNodeAs<Region>();
3175       if (!isNonAffineSubRegion(SubRegion)) {
3176         if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap))
3177           return false;
3178         continue;
3179       }
3180     }
3181
3182     BasicBlock *BB = getRegionNodeBasicBlock(RN);
3183     isl::set &Domain = DomainMap[BB];
3184     assert(Domain);
3185
3186     // Under the union of all predecessor conditions we can reach this block.
3187     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI);
3188     Domain = Domain.intersect(PredDom).coalesce();
3189     Domain = Domain.align_params(getParamSpace());
3190
3191     Loop *BBLoop = getRegionNodeLoop(RN, LI);
3192     if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop))
3193       if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap))
3194         return false;
3195   }
3196
3197   return true;
3198 }
3199
3200 /// Create a map to map from a given iteration to a subsequent iteration.
3201 ///
3202 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3203 /// is incremented by one and all other dimensions are equal, e.g.,
3204 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3205 ///
3206 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3207 static __isl_give isl_map *
3208 createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) {
3209   auto *MapSpace = isl_space_map_from_set(SetSpace);
3210   auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace));
3211   for (unsigned u = 0; u < isl_map_dim(NextIterationMap, isl_dim_in); u++)
3212     if (u != Dim)
3213       NextIterationMap =
3214           isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u);
3215   auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace));
3216   C = isl_constraint_set_constant_si(C, 1);
3217   C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1);
3218   C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1);
3219   NextIterationMap = isl_map_add_constraint(NextIterationMap, C);
3220   return NextIterationMap;
3221 }
3222
3223 bool Scop::addLoopBoundsToHeaderDomain(
3224     Loop *L, LoopInfo &LI, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3225   int LoopDepth = getRelativeLoopDepth(L);
3226   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
3227
3228   BasicBlock *HeaderBB = L->getHeader();
3229   assert(DomainMap.count(HeaderBB));
3230   isl::set &HeaderBBDom = DomainMap[HeaderBB];
3231
3232   isl::map NextIterationMap = isl::manage(
3233       createNextIterationMap(HeaderBBDom.get_space().release(), LoopDepth));
3234
3235   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
3236
3237   SmallVector<BasicBlock *, 4> LatchBlocks;
3238   L->getLoopLatches(LatchBlocks);
3239
3240   for (BasicBlock *LatchBB : LatchBlocks) {
3241     // If the latch is only reachable via error statements we skip it.
3242     isl::set LatchBBDom = DomainMap.lookup(LatchBB);
3243     if (!LatchBBDom)
3244       continue;
3245
3246     isl::set BackedgeCondition = nullptr;
3247
3248     TerminatorInst *TI = LatchBB->getTerminator();
3249     BranchInst *BI = dyn_cast<BranchInst>(TI);
3250     assert(BI && "Only branch instructions allowed in loop latches");
3251
3252     if (BI->isUnconditional())
3253       BackedgeCondition = LatchBBDom;
3254     else {
3255       SmallVector<isl_set *, 8> ConditionSets;
3256       int idx = BI->getSuccessor(0) != HeaderBB;
3257       if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(),
3258                               InvalidDomainMap, ConditionSets))
3259         return false;
3260
3261       // Free the non back edge condition set as we do not need it.
3262       isl_set_free(ConditionSets[1 - idx]);
3263
3264       BackedgeCondition = isl::manage(ConditionSets[idx]);
3265     }
3266
3267     int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
3268     assert(LatchLoopDepth >= LoopDepth);
3269     BackedgeCondition = BackedgeCondition.project_out(
3270         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
3271     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
3272   }
3273
3274   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
3275   for (int i = 0; i < LoopDepth; i++)
3276     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
3277
3278   isl::set UnionBackedgeConditionComplement =
3279       UnionBackedgeCondition.complement();
3280   UnionBackedgeConditionComplement =
3281       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
3282                                                       0);
3283   UnionBackedgeConditionComplement =
3284       UnionBackedgeConditionComplement.apply(ForwardMap);
3285   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
3286   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
3287
3288   auto Parts = partitionSetParts(HeaderBBDom.copy(), LoopDepth);
3289   HeaderBBDom = isl::manage(Parts.second);
3290
3291   // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3292   // the bounded assumptions to the context as they are already implied by the
3293   // <nsw> tag.
3294   if (Affinator.hasNSWAddRecForLoop(L)) {
3295     isl_set_free(Parts.first);
3296     return true;
3297   }
3298
3299   isl_set *UnboundedCtx = isl_set_params(Parts.first);
3300   recordAssumption(INFINITELOOP, UnboundedCtx,
3301                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
3302   return true;
3303 }
3304
3305 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
3306   Value *PointerBase = MA->getOriginalBaseAddr();
3307
3308   auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
3309   if (!PointerBaseInst)
3310     return nullptr;
3311
3312   auto *BasePtrStmt = getStmtFor(PointerBaseInst);
3313   if (!BasePtrStmt)
3314     return nullptr;
3315
3316   return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
3317 }
3318
3319 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
3320                                         isl::union_map Writes) {
3321   if (auto *BasePtrMA = lookupBasePtrAccess(MA)) {
3322     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
3323   }
3324
3325   Value *BaseAddr = MA->getOriginalBaseAddr();
3326   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
3327     if (!isa<LoadInst>(BasePtrInst))
3328       return contains(BasePtrInst);
3329
3330   return false;
3331 }
3332
3333 bool Scop::buildAliasChecks(AliasAnalysis &AA) {
3334   if (!PollyUseRuntimeAliasChecks)
3335     return true;
3336
3337   if (buildAliasGroups(AA)) {
3338     // Aliasing assumptions do not go through addAssumption but we still want to
3339     // collect statistics so we do it here explicitly.
3340     if (MinMaxAliasGroups.size())
3341       AssumptionsAliasing++;
3342     return true;
3343   }
3344
3345   // If a problem occurs while building the alias groups we need to delete
3346   // this SCoP and pretend it wasn't valid in the first place. To this end
3347   // we make the assumed context infeasible.
3348   invalidate(ALIASING, DebugLoc());
3349
3350   DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3351                << " could not be created as the number of parameters involved "
3352                   "is too high. The SCoP will be "
3353                   "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3354                   "the maximal number of parameters but be advised that the "
3355                   "compile time might increase exponentially.\n\n");
3356   return false;
3357 }
3358
3359 std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3360 Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3361   AliasSetTracker AST(AA);
3362
3363   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3364   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3365   for (ScopStmt &Stmt : *this) {
3366
3367     isl_set *StmtDomain = Stmt.getDomain().release();
3368     bool StmtDomainEmpty = isl_set_is_empty(StmtDomain);
3369     isl_set_free(StmtDomain);
3370
3371     // Statements with an empty domain will never be executed.
3372     if (StmtDomainEmpty)
3373       continue;
3374
3375     for (MemoryAccess *MA : Stmt) {
3376       if (MA->isScalarKind())
3377         continue;
3378       if (!MA->isRead())
3379         HasWriteAccess.insert(MA->getScopArrayInfo());
3380       MemAccInst Acc(MA->getAccessInstruction());
3381       if (MA->isRead() && isa<MemTransferInst>(Acc))
3382         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3383       else
3384         PtrToAcc[Acc.getPointerOperand()] = MA;
3385       AST.add(Acc);
3386     }
3387   }
3388
3389   AliasGroupVectorTy AliasGroups;
3390   for (AliasSet &AS : AST) {
3391     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3392       continue;
3393     AliasGroupTy AG;
3394     for (auto &PR : AS)
3395       AG.push_back(PtrToAcc[PR.getValue()]);
3396     if (AG.size() < 2)
3397       continue;
3398     AliasGroups.push_back(std::move(AG));
3399   }
3400
3401   return std::make_tuple(AliasGroups, HasWriteAccess);
3402 }
3403
3404 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3405   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3406     AliasGroupTy NewAG;
3407     AliasGroupTy &AG = AliasGroups[u];
3408     AliasGroupTy::iterator AGI = AG.begin();
3409     isl_set *AGDomain = getAccessDomain(*AGI);
3410     while (AGI != AG.end()) {
3411       MemoryAccess *MA = *AGI;
3412       isl_set *MADomain = getAccessDomain(MA);
3413       if (isl_set_is_disjoint(AGDomain, MADomain)) {
3414         NewAG.push_back(MA);
3415         AGI = AG.erase(AGI);
3416         isl_set_free(MADomain);
3417       } else {
3418         AGDomain = isl_set_union(AGDomain, MADomain);
3419         AGI++;
3420       }
3421     }
3422     if (NewAG.size() > 1)
3423       AliasGroups.push_back(std::move(NewAG));
3424     isl_set_free(AGDomain);
3425   }
3426 }
3427
3428 bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3429   // To create sound alias checks we perform the following steps:
3430   //   o) We partition each group into read only and non read only accesses.
3431   //   o) For each group with more than one base pointer we then compute minimal
3432   //      and maximal accesses to each array of a group in read only and non
3433   //      read only partitions separately.
3434   AliasGroupVectorTy AliasGroups;
3435   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3436
3437   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3438
3439   splitAliasGroupsByDomain(AliasGroups);
3440
3441   for (AliasGroupTy &AG : AliasGroups) {
3442     if (!hasFeasibleRuntimeContext())
3443       return false;
3444
3445     {
3446       IslMaxOperationsGuard MaxOpGuard(getIslCtx(), OptComputeOut);
3447       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3448       if (!Valid)
3449         return false;
3450     }
3451     if (isl_ctx_last_error(getIslCtx()) == isl_error_quota) {
3452       invalidate(COMPLEXITY, DebugLoc());
3453       return false;
3454     }
3455   }
3456
3457   return true;
3458 }
3459
3460 bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3461                            DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3462   AliasGroupTy ReadOnlyAccesses;
3463   AliasGroupTy ReadWriteAccesses;
3464   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3465   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3466
3467   if (AliasGroup.size() < 2)
3468     return true;
3469
3470   for (MemoryAccess *Access : AliasGroup) {
3471     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3472                                         Access->getAccessInstruction())
3473              << "Possibly aliasing pointer, use restrict keyword.");
3474     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3475     if (HasWriteAccess.count(Array)) {
3476       ReadWriteArrays.insert(Array);
3477       ReadWriteAccesses.push_back(Access);
3478     } else {
3479       ReadOnlyArrays.insert(Array);
3480       ReadOnlyAccesses.push_back(Access);
3481     }
3482   }
3483
3484   // If there are no read-only pointers, and less than two read-write pointers,
3485   // no alias check is needed.
3486   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3487     return true;
3488
3489   // If there is no read-write pointer, no alias check is needed.
3490   if (ReadWriteArrays.empty())
3491     return true;
3492
3493   // For non-affine accesses, no alias check can be generated as we cannot
3494   // compute a sufficiently tight lower and upper bound: bail out.
3495   for (MemoryAccess *MA : AliasGroup) {
3496     if (!MA->isAffine()) {
3497       invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3498                  MA->getAccessInstruction()->getParent());
3499       return false;
3500     }
3501   }
3502
3503   // Ensure that for all memory accesses for which we generate alias checks,
3504   // their base pointers are available.
3505   for (MemoryAccess *MA : AliasGroup) {
3506     if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3507       addRequiredInvariantLoad(
3508           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3509   }
3510
3511   MinMaxAliasGroups.emplace_back();
3512   MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3513   MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3514   MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3515
3516   bool Valid;
3517
3518   Valid =
3519       calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3520
3521   if (!Valid)
3522     return false;
3523
3524   // Bail out if the number of values we need to compare is too large.
3525   // This is important as the number of comparisons grows quadratically with
3526   // the number of values we need to compare.
3527   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3528       RunTimeChecksMaxArraysPerGroup)
3529     return false;
3530
3531   Valid =
3532       calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3533
3534   if (!Valid)
3535     return false;
3536
3537   return true;
3538 }
3539
3540 /// Get the smallest loop that contains @p S but is not in @p S.
3541 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3542   // Start with the smallest loop containing the entry and expand that
3543   // loop until it contains all blocks in the region. If there is a loop
3544   // containing all blocks in the region check if it is itself contained
3545   // and if so take the parent loop as it will be the smallest containing
3546   // the region but not contained by it.
3547   Loop *L = LI.getLoopFor(S.getEntry());
3548   while (L) {
3549     bool AllContained = true;
3550     for (auto *BB : S.blocks())
3551       AllContained &= L->contains(BB);
3552     if (AllContained)
3553       break;
3554     L = L->getParentLoop();
3555   }
3556
3557   return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
3558 }
3559
3560 int Scop::NextScopID = 0;
3561
3562 std::string Scop::CurrentFunc;
3563
3564 int Scop::getNextID(std::string ParentFunc) {
3565   if (ParentFunc != CurrentFunc) {
3566     CurrentFunc = ParentFunc;
3567     NextScopID = 0;
3568   }
3569   return NextScopID++;
3570 }
3571
3572 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3573            ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE)
3574     : SE(&ScalarEvolution), R(R), name(R.getNameStr()),
3575       HasSingleExitEdge(R.getExitingBlock()), DC(DC), ORE(ORE),
3576       IslCtx(isl_ctx_alloc(), isl_ctx_free), Affinator(this, LI),
3577       ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
3578   if (IslOnErrorAbort)
3579     isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT);
3580   buildContext();
3581 }
3582
3583 Scop::~Scop() {
3584   isl_set_free(Context);
3585   isl_set_free(AssumedContext);
3586   isl_set_free(InvalidContext);
3587   isl_schedule_free(Schedule);
3588
3589   ParameterIds.clear();
3590
3591   for (auto &AS : RecordedAssumptions)
3592     isl_set_free(AS.Set);
3593
3594   // Free the alias groups
3595   for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) {
3596     for (MinMaxAccessTy &MMA : MinMaxAccessPair.first) {
3597       isl_pw_multi_aff_free(MMA.first);
3598       isl_pw_multi_aff_free(MMA.second);
3599     }
3600     for (MinMaxAccessTy &MMA : MinMaxAccessPair.second) {
3601       isl_pw_multi_aff_free(MMA.first);
3602       isl_pw_multi_aff_free(MMA.second);
3603     }
3604   }
3605
3606   for (const auto &IAClass : InvariantEquivClasses)
3607     isl_set_free(IAClass.ExecutionContext);
3608
3609   // Explicitly release all Scop objects and the underlying isl objects before
3610   // we release the isl context.
3611   Stmts.clear();
3612   ScopArrayInfoSet.clear();
3613   ScopArrayInfoMap.clear();
3614   ScopArrayNameMap.clear();
3615   AccessFunctions.clear();
3616 }
3617
3618 void Scop::foldSizeConstantsToRight() {
3619   isl_union_set *Accessed = isl_union_map_range(getAccesses().release());
3620
3621   for (auto Array : arrays()) {
3622     if (Array->getNumberOfDimensions() <= 1)
3623       continue;
3624
3625     isl_space *Space = Array->getSpace().release();
3626
3627     Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed));
3628
3629     if (!isl_union_set_contains(Accessed, Space)) {
3630       isl_space_free(Space);
3631       continue;
3632     }
3633
3634     isl_set *Elements = isl_union_set_extract_set(Accessed, Space);
3635
3636     isl_map *Transform =
3637         isl_map_universe(isl_space_map_from_set(Array->getSpace().release()));
3638
3639     std::vector<int> Int;
3640
3641     int Dims = isl_set_dim(Elements, isl_dim_set);
3642     for (int i = 0; i < Dims; i++) {
3643       isl_set *DimOnly =
3644           isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i);
3645       DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1);
3646       DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0);
3647
3648       isl_basic_set *DimHull = isl_set_affine_hull(DimOnly);
3649
3650       if (i == Dims - 1) {
3651         Int.push_back(1);
3652         Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3653         isl_basic_set_free(DimHull);
3654         continue;
3655       }
3656
3657       if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) {
3658         isl_aff *Diff = isl_basic_set_get_div(DimHull, 0);
3659         isl_val *Val = isl_aff_get_denominator_val(Diff);
3660         isl_aff_free(Diff);
3661
3662         int ValInt = 1;
3663
3664         if (isl_val_is_int(Val))
3665           ValInt = isl_val_get_num_si(Val);
3666         isl_val_free(Val);
3667
3668         Int.push_back(ValInt);
3669
3670         isl_constraint *C = isl_constraint_alloc_equality(
3671             isl_local_space_from_space(isl_map_get_space(Transform)));
3672         C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt);
3673         C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1);
3674         Transform = isl_map_add_constraint(Transform, C);
3675         isl_basic_set_free(DimHull);
3676         continue;
3677       }
3678
3679       isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull);
3680       ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0);
3681
3682       int ValInt = 1;
3683       if (isl_basic_set_is_equal(ZeroSet, DimHull)) {
3684         ValInt = 0;
3685       }
3686
3687       Int.push_back(ValInt);
3688       Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3689       isl_basic_set_free(DimHull);
3690       isl_basic_set_free(ZeroSet);
3691     }
3692
3693     isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform));
3694
3695     if (!isl_set_is_subset(Elements, MappedElements)) {
3696       isl_set_free(Elements);
3697       isl_set_free(MappedElements);
3698       isl_map_free(Transform);
3699       continue;
3700     }
3701
3702     isl_set_free(MappedElements);
3703
3704     bool CanFold = true;
3705
3706     if (Int[0] <= 1)
3707       CanFold = false;
3708
3709     unsigned NumDims = Array->getNumberOfDimensions();
3710     for (unsigned i = 1; i < NumDims - 1; i++)
3711       if (Int[0] != Int[i] && Int[i])
3712         CanFold = false;
3713
3714     if (!CanFold) {
3715       isl_set_free(Elements);
3716       isl_map_free(Transform);
3717       continue;
3718     }
3719
3720     for (auto &Access : AccessFunctions)
3721       if (Access->getScopArrayInfo() == Array)
3722         Access->setAccessRelation(Access->getAccessRelation().apply_range(
3723             isl::manage(isl_map_copy(Transform))));
3724
3725     isl_map_free(Transform);
3726
3727     std::vector<const SCEV *> Sizes;
3728     for (unsigned i = 0; i < NumDims; i++) {
3729       auto Size = Array->getDimensionSize(i);
3730
3731       if (i == NumDims - 1)
3732         Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3733       Sizes.push_back(Size);
3734     }
3735
3736     Array->updateSizes(Sizes, false /* CheckConsistency */);
3737
3738     isl_set_free(Elements);
3739   }
3740   isl_union_set_free(Accessed);
3741 }
3742
3743 void Scop::markFortranArrays() {
3744   for (ScopStmt &Stmt : Stmts) {
3745     for (MemoryAccess *MemAcc : Stmt) {
3746       Value *FAD = MemAcc->getFortranArrayDescriptor();
3747       if (!FAD)
3748         continue;
3749
3750       // TODO: const_cast-ing to edit
3751       ScopArrayInfo *SAI =
3752           const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
3753       assert(SAI && "memory access into a Fortran array does not "
3754                     "have an associated ScopArrayInfo");
3755       SAI->applyAndSetFAD(FAD);
3756     }
3757   }
3758 }
3759
3760 void Scop::finalizeAccesses() {
3761   updateAccessDimensionality();
3762   foldSizeConstantsToRight();
3763   foldAccessRelations();
3764   assumeNoOutOfBounds();
3765   markFortranArrays();
3766 }
3767
3768 void Scop::updateAccessDimensionality() {
3769   // Check all array accesses for each base pointer and find a (virtual) element
3770   // size for the base pointer that divides all access functions.
3771   for (ScopStmt &Stmt : *this)
3772     for (MemoryAccess *Access : Stmt) {
3773       if (!Access->isArrayKind())
3774         continue;
3775       ScopArrayInfo *Array =
3776           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3777
3778       if (Array->getNumberOfDimensions() != 1)
3779         continue;
3780       unsigned DivisibleSize = Array->getElemSizeInBytes();
3781       const SCEV *Subscript = Access->getSubscript(0);
3782       while (!isDivisible(Subscript, DivisibleSize, *SE))
3783         DivisibleSize /= 2;
3784       auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3785       Array->updateElementType(Ty);
3786     }
3787
3788   for (auto &Stmt : *this)
3789     for (auto &Access : Stmt)
3790       Access->updateDimensionality();
3791 }
3792
3793 void Scop::foldAccessRelations() {
3794   for (auto &Stmt : *this)
3795     for (auto &Access : Stmt)
3796       Access->foldAccessRelation();
3797 }
3798
3799 void Scop::assumeNoOutOfBounds() {
3800   for (auto &Stmt : *this)
3801     for (auto &Access : Stmt)
3802       Access->assumeNoOutOfBound();
3803 }
3804
3805 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
3806   if (Stmt.isRegionStmt())
3807     for (BasicBlock *BB : Stmt.getRegion()->blocks()) {
3808       StmtMap.erase(BB);
3809       for (Instruction &Inst : *BB)
3810         InstStmtMap.erase(&Inst);
3811     }
3812   else {
3813     StmtMap.erase(Stmt.getBasicBlock());
3814     for (Instruction *Inst : Stmt.getInstructions())
3815       InstStmtMap.erase(Inst);
3816   }
3817 }
3818
3819 void Scop::removeStmts(std::function<bool(ScopStmt &)> ShouldDelete) {
3820   for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
3821     if (!ShouldDelete(*StmtIt)) {
3822       StmtIt++;
3823       continue;
3824     }
3825
3826     removeFromStmtMap(*StmtIt);
3827     StmtIt = Stmts.erase(StmtIt);
3828   }
3829 }
3830
3831 void Scop::removeStmtNotInDomainMap() {
3832   auto ShouldDelete = [this](ScopStmt &Stmt) -> bool {
3833     return !this->DomainMap.lookup(Stmt.getEntryBlock());
3834   };
3835   removeStmts(ShouldDelete);
3836 }
3837
3838 void Scop::simplifySCoP(bool AfterHoisting) {
3839   auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool {
3840     bool RemoveStmt = Stmt.isEmpty();
3841
3842     // Remove read only statements only after invariant load hoisting.
3843     if (!RemoveStmt && AfterHoisting) {
3844       bool OnlyRead = true;
3845       for (MemoryAccess *MA : Stmt) {
3846         if (MA->isRead())
3847           continue;
3848
3849         OnlyRead = false;
3850         break;
3851       }
3852
3853       RemoveStmt = OnlyRead;
3854     }
3855     return RemoveStmt;
3856   };
3857
3858   removeStmts(ShouldDelete);
3859 }
3860
3861 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3862   LoadInst *LInst = dyn_cast<LoadInst>(Val);
3863   if (!LInst)
3864     return nullptr;
3865
3866   if (Value *Rep = InvEquivClassVMap.lookup(LInst))
3867     LInst = cast<LoadInst>(Rep);
3868
3869   Type *Ty = LInst->getType();
3870   const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3871   for (auto &IAClass : InvariantEquivClasses) {
3872     if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3873       continue;
3874
3875     auto &MAs = IAClass.InvariantAccesses;
3876     for (auto *MA : MAs)
3877       if (MA->getAccessInstruction() == Val)
3878         return &IAClass;
3879   }
3880
3881   return nullptr;
3882 }
3883
3884 bool isAParameter(llvm::Value *maybeParam, const Function &F) {
3885   for (const llvm::Argument &Arg : F.args())
3886     if (&Arg == maybeParam)
3887       return true;
3888
3889   return false;
3890 }
3891
3892 bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3893                               bool MAInvalidCtxIsEmpty,
3894                               bool NonHoistableCtxIsEmpty) {
3895   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3896   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3897   if (PollyAllowDereferenceOfAllFunctionParams &&
3898       isAParameter(LInst->getPointerOperand(), getFunction()))
3899     return true;
3900
3901   // TODO: We can provide more information for better but more expensive
3902   //       results.
3903   if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3904                                           LInst->getAlignment(), DL))
3905     return false;
3906
3907   // If the location might be overwritten we do not hoist it unconditionally.
3908   //
3909   // TODO: This is probably too conservative.
3910   if (!NonHoistableCtxIsEmpty)
3911     return false;
3912
3913   // If a dereferenceable load is in a statement that is modeled precisely we
3914   // can hoist it.
3915   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3916     return true;
3917
3918   // Even if the statement is not modeled precisely we can hoist the load if it
3919   // does not involve any parameters that might have been specialized by the
3920   // statement domain.
3921   for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
3922     if (!isa<SCEVConstant>(MA->getSubscript(u)))
3923       return false;
3924   return true;
3925 }
3926
3927 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3928   if (InvMAs.empty())
3929     return;
3930
3931   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3932   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3933
3934   // Get the context under which the statement is executed but remove the error
3935   // context under which this statement is reached.
3936   isl::set DomainCtx = Stmt.getDomain().params();
3937   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3938
3939   if (isl_set_n_basic_set(DomainCtx.get()) >= MaxDisjunctsInDomain) {
3940     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3941     invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3942     return;
3943   }
3944
3945   // Project out all parameters that relate to loads in the statement. Otherwise
3946   // we could have cyclic dependences on the constraints under which the
3947   // hoisted loads are executed and we could not determine an order in which to
3948   // pre-load them. This happens because not only lower bounds are part of the
3949   // domain but also upper bounds.
3950   for (auto &InvMA : InvMAs) {
3951     auto *MA = InvMA.MA;
3952     Instruction *AccInst = MA->getAccessInstruction();
3953     if (SE->isSCEVable(AccInst->getType())) {
3954       SetVector<Value *> Values;
3955       for (const SCEV *Parameter : Parameters) {
3956         Values.clear();
3957         findValues(Parameter, *SE, Values);
3958         if (!Values.count(AccInst))
3959           continue;
3960
3961         if (isl::id ParamId = getIdForParam(Parameter)) {
3962           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3963           if (Dim >= 0)
3964             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3965         }
3966       }
3967     }
3968   }
3969
3970   for (auto &InvMA : InvMAs) {
3971     auto *MA = InvMA.MA;
3972     isl::set NHCtx = InvMA.NonHoistableCtx;
3973
3974     // Check for another invariant access that accesses the same location as
3975     // MA and if found consolidate them. Otherwise create a new equivalence
3976     // class at the end of InvariantEquivClasses.
3977     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3978     Type *Ty = LInst->getType();
3979     const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3980
3981     isl::set MAInvalidCtx = MA->getInvalidContext();
3982     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3983     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3984
3985     isl::set MACtx;
3986     // Check if we know that this pointer can be speculatively accessed.
3987     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3988                            NonHoistableCtxIsEmpty)) {
3989       MACtx = isl::set::universe(DomainCtx.get_space());
3990     } else {
3991       MACtx = DomainCtx;
3992       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3993       MACtx = MACtx.gist_params(getContext());
3994     }
3995
3996     bool Consolidated = false;
3997     for (auto &IAClass : InvariantEquivClasses) {
3998       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3999         continue;
4000
4001       // If the pointer and the type is equal check if the access function wrt.
4002       // to the domain is equal too. It can happen that the domain fixes
4003       // parameter values and these can be different for distinct part of the
4004       // SCoP. If this happens we cannot consolidate the loads but need to
4005       // create a new invariant load equivalence class.
4006       auto &MAs = IAClass.InvariantAccesses;
4007       if (!MAs.empty()) {
4008         auto *LastMA = MAs.front();
4009
4010         isl::set AR = MA->getAccessRelation().range();
4011         isl::set LastAR = LastMA->getAccessRelation().range();
4012         bool SameAR = AR.is_equal(LastAR);
4013
4014         if (!SameAR)
4015           continue;
4016       }
4017
4018       // Add MA to the list of accesses that are in this class.
4019       MAs.push_front(MA);
4020
4021       Consolidated = true;
4022
4023       // Unify the execution context of the class and this statement.
4024       isl::set IAClassDomainCtx = isl::manage(IAClass.ExecutionContext);
4025       if (IAClassDomainCtx)
4026         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
4027       else
4028         IAClassDomainCtx = MACtx;
4029       IAClass.ExecutionContext = IAClassDomainCtx.release();
4030       break;
4031     }
4032
4033     if (Consolidated)
4034       continue;
4035
4036     // If we did not consolidate MA, thus did not find an equivalence class
4037     // for it, we create a new one.
4038     InvariantEquivClasses.emplace_back(InvariantEquivClassTy{
4039         PointerSCEV, MemoryAccessList{MA}, MACtx.release(), Ty});
4040   }
4041 }
4042
4043 /// Check if an access range is too complex.
4044 ///
4045 /// An access range is too complex, if it contains either many disjuncts or
4046 /// very complex expressions. As a simple heuristic, we assume if a set to
4047 /// be too complex if the sum of existentially quantified dimensions and
4048 /// set dimensions is larger than a threshold. This reliably detects both
4049 /// sets with many disjuncts as well as sets with many divisions as they
4050 /// arise in h264.
4051 ///
4052 /// @param AccessRange The range to check for complexity.
4053 ///
4054 /// @returns True if the access range is too complex.
4055 static bool isAccessRangeTooComplex(isl::set AccessRange) {
4056   unsigned NumTotalDims = 0;
4057
4058   auto CountDimensions = [&NumTotalDims](isl::basic_set BSet) -> isl::stat {
4059     NumTotalDims += BSet.dim(isl::dim::div);
4060     NumTotalDims += BSet.dim(isl::dim::set);
4061     return isl::stat::ok;
4062   };
4063
4064   AccessRange.foreach_basic_set(CountDimensions);
4065
4066   if (NumTotalDims > MaxDimensionsInAccessRange)
4067     return true;
4068
4069   return false;
4070 }
4071
4072 isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
4073   // TODO: Loads that are not loop carried, hence are in a statement with
4074   //       zero iterators, are by construction invariant, though we
4075   //       currently "hoist" them anyway. This is necessary because we allow
4076   //       them to be treated as parameters (e.g., in conditions) and our code
4077   //       generation would otherwise use the old value.
4078
4079   auto &Stmt = *Access->getStatement();
4080   BasicBlock *BB = Stmt.getEntryBlock();
4081
4082   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
4083       Access->isMemoryIntrinsic())
4084     return nullptr;
4085
4086   // Skip accesses that have an invariant base pointer which is defined but
4087   // not loaded inside the SCoP. This can happened e.g., if a readnone call
4088   // returns a pointer that is used as a base address. However, as we want
4089   // to hoist indirect pointers, we allow the base pointer to be defined in
4090   // the region if it is also a memory access. Each ScopArrayInfo object
4091   // that has a base pointer origin has a base pointer that is loaded and
4092   // that it is invariant, thus it will be hoisted too. However, if there is
4093   // no base pointer origin we check that the base pointer is defined
4094   // outside the region.
4095   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
4096   if (hasNonHoistableBasePtrInScop(Access, Writes))
4097     return nullptr;
4098
4099   isl::map AccessRelation = give(Access->getAccessRelation().release());
4100   assert(!AccessRelation.is_empty());
4101
4102   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
4103     return nullptr;
4104
4105   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
4106   isl::set SafeToLoad;
4107
4108   auto &DL = getFunction().getParent()->getDataLayout();
4109   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
4110                                   DL)) {
4111     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
4112   } else if (BB != LI->getParent()) {
4113     // Skip accesses in non-affine subregions as they might not be executed
4114     // under the same condition as the entry of the non-affine subregion.
4115     return nullptr;
4116   } else {
4117     SafeToLoad = AccessRelation.range();
4118   }
4119
4120   if (isAccessRangeTooComplex(AccessRelation.range()))
4121     return nullptr;
4122
4123   isl::union_map Written = Writes.intersect_range(SafeToLoad);
4124   isl::set WrittenCtx = Written.params();
4125   bool IsWritten = !WrittenCtx.is_empty();
4126
4127   if (!IsWritten)
4128     return WrittenCtx;
4129
4130   WrittenCtx = WrittenCtx.remove_divs();
4131   bool TooComplex =
4132       isl_set_n_basic_set(WrittenCtx.get()) >= MaxDisjunctsInDomain;
4133   if (TooComplex || !isRequiredInvariantLoad(LI))
4134     return nullptr;
4135
4136   addAssumption(INVARIANTLOAD, WrittenCtx.copy(), LI->getDebugLoc(),
4137                 AS_RESTRICTION, LI->getParent());
4138   return WrittenCtx;
4139 }
4140
4141 void Scop::verifyInvariantLoads() {
4142   auto &RIL = getRequiredInvariantLoads();
4143   for (LoadInst *LI : RIL) {
4144     assert(LI && contains(LI));
4145     // If there exists a statement in the scop which has a memory access for
4146     // @p LI, then mark this scop as infeasible for optimization.
4147     for (ScopStmt &Stmt : Stmts)
4148       if (Stmt.getArrayAccessOrNULLFor(LI)) {
4149         invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
4150         return;
4151       }
4152   }
4153 }
4154
4155 void Scop::hoistInvariantLoads() {
4156   if (!PollyInvariantLoadHoisting)
4157     return;
4158
4159   isl::union_map Writes = getWrites();
4160   for (ScopStmt &Stmt : *this) {
4161     InvariantAccessesTy InvariantAccesses;
4162
4163     for (MemoryAccess *Access : Stmt)
4164       if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
4165         InvariantAccesses.push_back({Access, NHCtx});
4166
4167     // Transfer the memory access from the statement to the SCoP.
4168     for (auto InvMA : InvariantAccesses)
4169       Stmt.removeMemoryAccess(InvMA.MA);
4170     addInvariantLoads(Stmt, InvariantAccesses);
4171   }
4172 }
4173
4174 /// Find the canonical scop array info object for a set of invariant load
4175 /// hoisted loads. The canonical array is the one that corresponds to the
4176 /// first load in the list of accesses which is used as base pointer of a
4177 /// scop array.
4178 static const ScopArrayInfo *findCanonicalArray(Scop *S,
4179                                                MemoryAccessList &Accesses) {
4180   for (MemoryAccess *Access : Accesses) {
4181     const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
4182         Access->getAccessInstruction(), MemoryKind::Array);
4183     if (CanonicalArray)
4184       return CanonicalArray;
4185   }
4186   return nullptr;
4187 }
4188
4189 /// Check if @p Array severs as base array in an invariant load.
4190 static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
4191   for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
4192     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
4193       if (Access2->getScopArrayInfo() == Array)
4194         return true;
4195   return false;
4196 }
4197
4198 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4199 /// with a reference to @p New.
4200 static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
4201                                  const ScopArrayInfo *New) {
4202   for (ScopStmt &Stmt : *S)
4203     for (MemoryAccess *Access : Stmt) {
4204       if (Access->getLatestScopArrayInfo() != Old)
4205         continue;
4206
4207       isl::id Id = New->getBasePtrId();
4208       isl::map Map = Access->getAccessRelation();
4209       Map = Map.set_tuple_id(isl::dim::out, Id);
4210       Access->setAccessRelation(Map);
4211     }
4212 }
4213
4214 void Scop::canonicalizeDynamicBasePtrs() {
4215   for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
4216     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
4217
4218     const ScopArrayInfo *CanonicalBasePtrSAI =
4219         findCanonicalArray(this, BasePtrAccesses);
4220
4221     if (!CanonicalBasePtrSAI)
4222       continue;
4223
4224     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
4225       const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
4226           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
4227       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
4228           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
4229         continue;
4230
4231       // we currently do not canonicalize arrays where some accesses are
4232       // hoisted as invariant loads. If we would, we need to update the access
4233       // function of the invariant loads as well. However, as this is not a
4234       // very common situation, we leave this for now to avoid further
4235       // complexity increases.
4236       if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
4237         continue;
4238
4239       replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
4240     }
4241   }
4242 }
4243
4244 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
4245                                               ArrayRef<const SCEV *> Sizes,
4246                                               MemoryKind Kind,
4247                                               const char *BaseName) {
4248   assert((BasePtr || BaseName) &&
4249          "BasePtr and BaseName can not be nullptr at the same time.");
4250   assert(!(BasePtr && BaseName) && "BaseName is redundant.");
4251   auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
4252                       : ScopArrayNameMap[BaseName];
4253   if (!SAI) {
4254     auto &DL = getFunction().getParent()->getDataLayout();
4255     SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
4256                                 DL, this, BaseName));
4257     ScopArrayInfoSet.insert(SAI.get());
4258   } else {
4259     SAI->updateElementType(ElementType);
4260     // In case of mismatching array sizes, we bail out by setting the run-time
4261     // context to false.
4262     if (!SAI->updateSizes(Sizes))
4263       invalidate(DELINEARIZATION, DebugLoc());
4264   }
4265   return SAI.get();
4266 }
4267
4268 ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
4269                                          const std::string &BaseName,
4270                                          const std::vector<unsigned> &Sizes) {
4271   auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
4272   std::vector<const SCEV *> SCEVSizes;
4273
4274   for (auto size : Sizes)
4275     if (size)
4276       SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
4277     else
4278       SCEVSizes.push_back(nullptr);
4279
4280   auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
4281                                        MemoryKind::Array, BaseName.c_str());
4282   return SAI;
4283 }
4284
4285 const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr,
4286                                                   MemoryKind Kind) {
4287   auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
4288   return SAI;
4289 }
4290
4291 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
4292   auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind);
4293   assert(SAI && "No ScopArrayInfo available for this base pointer");
4294   return SAI;
4295 }
4296
4297 std::string Scop::getContextStr() const { return getContext().to_str(); }
4298
4299 std::string Scop::getAssumedContextStr() const {
4300   assert(AssumedContext && "Assumed context not yet built");
4301   return stringFromIslObj(AssumedContext);
4302 }
4303
4304 std::string Scop::getInvalidContextStr() const {
4305   return stringFromIslObj(InvalidContext);
4306 }
4307
4308 std::string Scop::getNameStr() const {
4309   std::string ExitName, EntryName;
4310   std::tie(EntryName, ExitName) = getEntryExitStr();
4311   return EntryName + "---" + ExitName;
4312 }
4313
4314 std::pair<std::string, std::string> Scop::getEntryExitStr() const {
4315   std::string ExitName, EntryName;
4316   raw_string_ostream ExitStr(ExitName);
4317   raw_string_ostream EntryStr(EntryName);
4318
4319   R.getEntry()->printAsOperand(EntryStr, false);
4320   EntryStr.str();
4321
4322   if (R.getExit()) {
4323     R.getExit()->printAsOperand(ExitStr, false);
4324     ExitStr.str();
4325   } else
4326     ExitName = "FunctionExit";
4327
4328   return std::make_pair(EntryName, ExitName);
4329 }
4330
4331 isl::set Scop::getContext() const { return isl::manage(isl_set_copy(Context)); }
4332 isl::space Scop::getParamSpace() const { return getContext().get_space(); }
4333
4334 isl::space Scop::getFullParamSpace() const {
4335   std::vector<isl::id> FortranIDs;
4336   FortranIDs = getFortranArrayIds(arrays());
4337
4338   isl::space Space = isl::space::params_alloc(
4339       getIslCtx(), ParameterIds.size() + FortranIDs.size());
4340
4341   unsigned PDim = 0;
4342   for (const SCEV *Parameter : Parameters) {
4343     isl::id Id = getIdForParam(Parameter);
4344     Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4345   }
4346
4347   for (isl::id Id : FortranIDs)
4348     Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4349
4350   return Space;
4351 }
4352
4353 isl::set Scop::getAssumedContext() const {
4354   assert(AssumedContext && "Assumed context not yet built");
4355   return isl::manage(isl_set_copy(AssumedContext));
4356 }
4357
4358 bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
4359   if (PollyProcessUnprofitable)
4360     return true;
4361
4362   if (isEmpty())
4363     return false;
4364
4365   unsigned OptimizableStmtsOrLoops = 0;
4366   for (auto &Stmt : *this) {
4367     if (Stmt.getNumIterators() == 0)
4368       continue;
4369
4370     bool ContainsArrayAccs = false;
4371     bool ContainsScalarAccs = false;
4372     for (auto *MA : Stmt) {
4373       if (MA->isRead())
4374         continue;
4375       ContainsArrayAccs |= MA->isLatestArrayKind();
4376       ContainsScalarAccs |= MA->isLatestScalarKind();
4377     }
4378
4379     if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
4380       OptimizableStmtsOrLoops += Stmt.getNumIterators();
4381   }
4382
4383   return OptimizableStmtsOrLoops > 1;
4384 }
4385
4386 bool Scop::hasFeasibleRuntimeContext() const {
4387   auto *PositiveContext = getAssumedContext().release();
4388   auto *NegativeContext = getInvalidContext().release();
4389   PositiveContext =
4390       addNonEmptyDomainConstraints(isl::manage(PositiveContext)).release();
4391   bool IsFeasible = !(isl_set_is_empty(PositiveContext) ||
4392                       isl_set_is_subset(PositiveContext, NegativeContext));
4393   isl_set_free(PositiveContext);
4394   if (!IsFeasible) {
4395     isl_set_free(NegativeContext);
4396     return false;
4397   }
4398
4399   auto *DomainContext = isl_union_set_params(getDomains().release());
4400   IsFeasible = !isl_set_is_subset(DomainContext, NegativeContext);
4401   IsFeasible &= !isl_set_is_subset(Context, NegativeContext);
4402   isl_set_free(NegativeContext);
4403   isl_set_free(DomainContext);
4404
4405   return IsFeasible;
4406 }
4407
4408 static std::string toString(AssumptionKind Kind) {
4409   switch (Kind) {
4410   case ALIASING:
4411     return "No-aliasing";
4412   case INBOUNDS:
4413     return "Inbounds";
4414   case WRAPPING:
4415     return "No-overflows";
4416   case UNSIGNED:
4417     return "Signed-unsigned";
4418   case COMPLEXITY:
4419     return "Low complexity";
4420   case PROFITABLE:
4421     return "Profitable";
4422   case ERRORBLOCK:
4423     return "No-error";
4424   case INFINITELOOP:
4425     return "Finite loop";
4426   case INVARIANTLOAD:
4427     return "Invariant load";
4428   case DELINEARIZATION:
4429     return "Delinearization";
4430   }
4431   llvm_unreachable("Unknown AssumptionKind!");
4432 }
4433
4434 bool Scop::isEffectiveAssumption(__isl_keep isl_set *Set, AssumptionSign Sign) {
4435   if (Sign == AS_ASSUMPTION) {
4436     if (isl_set_is_subset(Context, Set))
4437       return false;
4438
4439     if (isl_set_is_subset(AssumedContext, Set))
4440       return false;
4441   } else {
4442     if (isl_set_is_disjoint(Set, Context))
4443       return false;
4444
4445     if (isl_set_is_subset(Set, InvalidContext))
4446       return false;
4447   }
4448   return true;
4449 }
4450
4451 bool Scop::trackAssumption(AssumptionKind Kind, __isl_keep isl_set *Set,
4452                            DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4453   if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign))
4454     return false;
4455
4456   // Do never emit trivial assumptions as they only clutter the output.
4457   if (!PollyRemarksMinimal) {
4458     isl_set *Univ = nullptr;
4459     if (Sign == AS_ASSUMPTION)
4460       Univ = isl_set_universe(isl_set_get_space(Set));
4461
4462     bool IsTrivial = (Sign == AS_RESTRICTION && isl_set_is_empty(Set)) ||
4463                      (Sign == AS_ASSUMPTION && isl_set_is_equal(Univ, Set));
4464     isl_set_free(Univ);
4465
4466     if (IsTrivial)
4467       return false;
4468   }
4469
4470   switch (Kind) {
4471   case ALIASING:
4472     AssumptionsAliasing++;
4473     break;
4474   case INBOUNDS:
4475     AssumptionsInbounds++;
4476     break;
4477   case WRAPPING:
4478     AssumptionsWrapping++;
4479     break;
4480   case UNSIGNED:
4481     AssumptionsUnsigned++;
4482     break;
4483   case COMPLEXITY:
4484     AssumptionsComplexity++;
4485     break;
4486   case PROFITABLE:
4487     AssumptionsUnprofitable++;
4488     break;
4489   case ERRORBLOCK:
4490     AssumptionsErrorBlock++;
4491     break;
4492   case INFINITELOOP:
4493     AssumptionsInfiniteLoop++;
4494     break;
4495   case INVARIANTLOAD:
4496     AssumptionsInvariantLoad++;
4497     break;
4498   case DELINEARIZATION:
4499     AssumptionsDelinearization++;
4500     break;
4501   }
4502
4503   auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t";
4504   std::string Msg = toString(Kind) + Suffix + stringFromIslObj(Set);
4505   if (BB)
4506     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB)
4507              << Msg);
4508   else
4509     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc,
4510                                         R.getEntry())
4511              << Msg);
4512   return true;
4513 }
4514
4515 void Scop::addAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4516                          DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4517   // Simplify the assumptions/restrictions first.
4518   Set = isl_set_gist_params(Set, getContext().release());
4519
4520   if (!trackAssumption(Kind, Set, Loc, Sign, BB)) {
4521     isl_set_free(Set);
4522     return;
4523   }
4524
4525   if (Sign == AS_ASSUMPTION) {
4526     AssumedContext = isl_set_intersect(AssumedContext, Set);
4527     AssumedContext = isl_set_coalesce(AssumedContext);
4528   } else {
4529     InvalidContext = isl_set_union(InvalidContext, Set);
4530     InvalidContext = isl_set_coalesce(InvalidContext);
4531   }
4532 }
4533
4534 void Scop::recordAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4535                             DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4536   assert((isl_set_is_params(Set) || BB) &&
4537          "Assumptions without a basic block must be parameter sets");
4538   RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4539 }
4540
4541 void Scop::addRecordedAssumptions() {
4542   while (!RecordedAssumptions.empty()) {
4543     const Assumption &AS = RecordedAssumptions.pop_back_val();
4544
4545     if (!AS.BB) {
4546       addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
4547       continue;
4548     }
4549
4550     // If the domain was deleted the assumptions are void.
4551     isl_set *Dom = getDomainConditions(AS.BB).release();
4552     if (!Dom) {
4553       isl_set_free(AS.Set);
4554       continue;
4555     }
4556
4557     // If a basic block was given use its domain to simplify the assumption.
4558     // In case of restrictions we know they only have to hold on the domain,
4559     // thus we can intersect them with the domain of the block. However, for
4560     // assumptions the domain has to imply them, thus:
4561     //                     _              _____
4562     //   Dom => S   <==>   A v B   <==>   A - B
4563     //
4564     // To avoid the complement we will register A - B as a restriction not an
4565     // assumption.
4566     isl_set *S = AS.Set;
4567     if (AS.Sign == AS_RESTRICTION)
4568       S = isl_set_params(isl_set_intersect(S, Dom));
4569     else /* (AS.Sign == AS_ASSUMPTION) */
4570       S = isl_set_params(isl_set_subtract(Dom, S));
4571
4572     addAssumption(AS.Kind, S, AS.Loc, AS_RESTRICTION, AS.BB);
4573   }
4574 }
4575
4576 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
4577   DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n");
4578   addAssumption(Kind, isl_set_empty(getParamSpace().release()), Loc,
4579                 AS_ASSUMPTION, BB);
4580 }
4581
4582 isl::set Scop::getInvalidContext() const {
4583   return isl::manage(isl_set_copy(InvalidContext));
4584 }
4585
4586 void Scop::printContext(raw_ostream &OS) const {
4587   OS << "Context:\n";
4588   OS.indent(4) << Context << "\n";
4589
4590   OS.indent(4) << "Assumed Context:\n";
4591   OS.indent(4) << AssumedContext << "\n";
4592
4593   OS.indent(4) << "Invalid Context:\n";
4594   OS.indent(4) << InvalidContext << "\n";
4595
4596   unsigned Dim = 0;
4597   for (const SCEV *Parameter : Parameters)
4598     OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4599 }
4600
4601 void Scop::printAliasAssumptions(raw_ostream &OS) const {
4602   int noOfGroups = 0;
4603   for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4604     if (Pair.second.size() == 0)
4605       noOfGroups += 1;
4606     else
4607       noOfGroups += Pair.second.size();
4608   }
4609
4610   OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4611   if (MinMaxAliasGroups.empty()) {
4612     OS.indent(8) << "n/a\n";
4613     return;
4614   }
4615
4616   for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4617
4618     // If the group has no read only accesses print the write accesses.
4619     if (Pair.second.empty()) {
4620       OS.indent(8) << "[[";
4621       for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4622         OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4623            << ">";
4624       }
4625       OS << " ]]\n";
4626     }
4627
4628     for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4629       OS.indent(8) << "[[";
4630       OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4631       for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4632         OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4633            << ">";
4634       }
4635       OS << " ]]\n";
4636     }
4637   }
4638 }
4639
4640 void Scop::printStatements(raw_ostream &OS, bool PrintInstructions) const {
4641   OS << "Statements {\n";
4642
4643   for (const ScopStmt &Stmt : *this) {
4644     OS.indent(4);
4645     Stmt.print(OS, PrintInstructions);
4646   }
4647
4648   OS.indent(4) << "}\n";
4649 }
4650
4651 void Scop::printArrayInfo(raw_ostream &OS) const {
4652   OS << "Arrays {\n";
4653
4654   for (auto &Array : arrays())
4655     Array->print(OS);
4656
4657   OS.indent(4) << "}\n";
4658
4659   OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4660
4661   for (auto &Array : arrays())
4662     Array->print(OS, /* SizeAsPwAff */ true);
4663
4664   OS.indent(4) << "}\n";
4665 }
4666
4667 void Scop::print(raw_ostream &OS, bool PrintInstructions) const {
4668   OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4669   OS.indent(4) << "Region: " << getNameStr() << "\n";
4670   OS.indent(4) << "Max Loop Depth:  " << getMaxLoopDepth() << "\n";
4671   OS.indent(4) << "Invariant Accesses: {\n";
4672   for (const auto &IAClass : InvariantEquivClasses) {
4673     const auto &MAs = IAClass.InvariantAccesses;
4674     if (MAs.empty()) {
4675       OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4676     } else {
4677       MAs.front()->print(OS);
4678       OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4679                     << "\n";
4680     }
4681   }
4682   OS.indent(4) << "}\n";
4683   printContext(OS.indent(4));
4684   printArrayInfo(OS.indent(4));
4685   printAliasAssumptions(OS);
4686   printStatements(OS.indent(4), PrintInstructions);
4687 }
4688
4689 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4690 LLVM_DUMP_METHOD void Scop::dump() const { print(dbgs(), true); }
4691 #endif
4692
4693 isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
4694
4695 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4696                                  bool NonNegative) {
4697   // First try to use the SCEVAffinator to generate a piecewise defined
4698   // affine function from @p E in the context of @p BB. If that tasks becomes to
4699   // complex the affinator might return a nullptr. In such a case we invalidate
4700   // the SCoP and return a dummy value. This way we do not need to add error
4701   // handling code to all users of this function.
4702   auto PWAC = Affinator.getPwAff(E, BB);
4703   if (PWAC.first) {
4704     // TODO: We could use a heuristic and either use:
4705     //         SCEVAffinator::takeNonNegativeAssumption
4706     //       or
4707     //         SCEVAffinator::interpretAsUnsigned
4708     //       to deal with unsigned or "NonNegative" SCEVs.
4709     if (NonNegative)
4710       Affinator.takeNonNegativeAssumption(PWAC);
4711     return PWAC;
4712   }
4713
4714   auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
4715   invalidate(COMPLEXITY, DL, BB);
4716   return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4717 }
4718
4719 isl::union_set Scop::getDomains() const {
4720   isl_space *EmptySpace = isl_space_params_alloc(getIslCtx(), 0);
4721   isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4722
4723   for (const ScopStmt &Stmt : *this)
4724     Domain = isl_union_set_add_set(Domain, Stmt.getDomain().release());
4725
4726   return isl::manage(Domain);
4727 }
4728
4729 isl::pw_aff Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4730   PWACtx PWAC = getPwAff(E, BB);
4731   isl_set_free(PWAC.second);
4732   return isl::manage(PWAC.first);
4733 }
4734
4735 isl::union_map
4736 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4737   isl::union_map Accesses = isl::union_map::empty(getParamSpace());
4738
4739   for (ScopStmt &Stmt : *this) {
4740     for (MemoryAccess *MA : Stmt) {
4741       if (!Predicate(*MA))
4742         continue;
4743
4744       isl::set Domain = Stmt.getDomain();
4745       isl::map AccessDomain = MA->getAccessRelation();
4746       AccessDomain = AccessDomain.intersect_domain(Domain);
4747       Accesses = Accesses.add_map(AccessDomain);
4748     }
4749   }
4750
4751   return Accesses.coalesce();
4752 }
4753
4754 isl::union_map Scop::getMustWrites() {
4755   return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4756 }
4757
4758 isl::union_map Scop::getMayWrites() {
4759   return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4760 }
4761
4762 isl::union_map Scop::getWrites() {
4763   return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4764 }
4765
4766 isl::union_map Scop::getReads() {
4767   return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4768 }
4769
4770 isl::union_map Scop::getAccesses() {
4771   return getAccessesOfType([](MemoryAccess &MA) { return true; });
4772 }
4773
4774 isl::union_map Scop::getAccesses(ScopArrayInfo *Array) {
4775   return getAccessesOfType(
4776       [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; });
4777 }
4778
4779 // Check whether @p Node is an extension node.
4780 //
4781 // @return true if @p Node is an extension node.
4782 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4783   if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4784     return isl_bool_error;
4785   else
4786     return isl_bool_true;
4787 }
4788
4789 bool Scop::containsExtensionNode(__isl_keep isl_schedule *Schedule) {
4790   return isl_schedule_foreach_schedule_node_top_down(Schedule, isNotExtNode,
4791                                                      nullptr) == isl_stat_error;
4792 }
4793
4794 isl::union_map Scop::getSchedule() const {
4795   auto *Tree = getScheduleTree().release();
4796   if (containsExtensionNode(Tree)) {
4797     isl_schedule_free(Tree);
4798     return nullptr;
4799   }
4800   auto *S = isl_schedule_get_map(Tree);
4801   isl_schedule_free(Tree);
4802   return isl::manage(S);
4803 }
4804
4805 isl::schedule Scop::getScheduleTree() const {
4806   return isl::manage(isl_schedule_intersect_domain(isl_schedule_copy(Schedule),
4807                                                    getDomains().release()));
4808 }
4809
4810 void Scop::setSchedule(__isl_take isl_union_map *NewSchedule) {
4811   auto *S = isl_schedule_from_domain(getDomains().release());
4812   S = isl_schedule_insert_partial_schedule(
4813       S, isl_multi_union_pw_aff_from_union_map(NewSchedule));
4814   isl_schedule_free(Schedule);
4815   Schedule = S;
4816 }
4817
4818 void Scop::setScheduleTree(__isl_take isl_schedule *NewSchedule) {
4819   isl_schedule_free(Schedule);
4820   Schedule = NewSchedule;
4821 }
4822
4823 bool Scop::restrictDomains(isl::union_set Domain) {
4824   bool Changed = false;
4825   for (ScopStmt &Stmt : *this) {
4826     isl::union_set StmtDomain = isl::union_set(Stmt.getDomain());
4827     isl::union_set NewStmtDomain = StmtDomain.intersect(Domain);
4828
4829     if (StmtDomain.is_subset(NewStmtDomain))
4830       continue;
4831
4832     Changed = true;
4833
4834     NewStmtDomain = NewStmtDomain.coalesce();
4835
4836     if (NewStmtDomain.is_empty())
4837       Stmt.restrictDomain(isl::set::empty(Stmt.getDomainSpace()));
4838     else
4839       Stmt.restrictDomain(isl::set(NewStmtDomain));
4840   }
4841   return Changed;
4842 }
4843
4844 ScalarEvolution *Scop::getSE() const { return SE; }
4845
4846 // Create an isl_multi_union_aff that defines an identity mapping from the
4847 // elements of USet to their N-th dimension.
4848 //
4849 // # Example:
4850 //
4851 //            Domain: { A[i,j]; B[i,j,k] }
4852 //                 N: 1
4853 //
4854 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4855 //
4856 // @param USet   A union set describing the elements for which to generate a
4857 //               mapping.
4858 // @param N      The dimension to map to.
4859 // @returns      A mapping from USet to its N-th dimension.
4860 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
4861   assert(N >= 0);
4862   assert(USet);
4863   assert(!USet.is_empty());
4864
4865   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
4866
4867   auto Lambda = [&Result, N](isl::set S) -> isl::stat {
4868     int Dim = S.dim(isl::dim::set);
4869     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
4870                                                   N, Dim - N);
4871     if (N > 1)
4872       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
4873
4874     Result = Result.add_pw_multi_aff(PMA);
4875     return isl::stat::ok;
4876   };
4877
4878   isl::stat Res = USet.foreach_set(Lambda);
4879   (void)Res;
4880
4881   assert(Res == isl::stat::ok);
4882
4883   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
4884 }
4885
4886 void Scop::addScopStmt(BasicBlock *BB, Loop *SurroundingLoop,
4887                        std::vector<Instruction *> Instructions, int Count) {
4888   assert(BB && "Unexpected nullptr!");
4889   Stmts.emplace_back(*this, *BB, SurroundingLoop, Instructions, Count);
4890   auto *Stmt = &Stmts.back();
4891   StmtMap[BB].push_back(Stmt);
4892   for (Instruction *Inst : Instructions) {
4893     assert(!InstStmtMap.count(Inst) &&
4894            "Unexpected statement corresponding to the instruction.");
4895     InstStmtMap[Inst] = Stmt;
4896   }
4897 }
4898
4899 void Scop::addScopStmt(Region *R, Loop *SurroundingLoop) {
4900   assert(R && "Unexpected nullptr!");
4901   Stmts.emplace_back(*this, *R, SurroundingLoop);
4902   auto *Stmt = &Stmts.back();
4903   for (BasicBlock *BB : R->blocks()) {
4904     StmtMap[BB].push_back(Stmt);
4905     for (Instruction &Inst : *BB) {
4906       assert(!InstStmtMap.count(&Inst) &&
4907              "Unexpected statement corresponding to the instruction.");
4908       InstStmtMap[&Inst] = Stmt;
4909     }
4910   }
4911 }
4912
4913 ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel,
4914                             isl::set Domain) {
4915 #ifndef NDEBUG
4916   isl::set SourceDomain = SourceRel.domain();
4917   isl::set TargetDomain = TargetRel.domain();
4918   assert(Domain.is_subset(TargetDomain) &&
4919          "Target access not defined for complete statement domain");
4920   assert(Domain.is_subset(SourceDomain) &&
4921          "Source access not defined for complete statement domain");
4922 #endif
4923   Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4924   CopyStmtsNum++;
4925   return &(Stmts.back());
4926 }
4927
4928 void Scop::buildSchedule(LoopInfo &LI) {
4929   Loop *L = getLoopSurroundingScop(*this, LI);
4930   LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4931   buildSchedule(getRegion().getNode(), LoopStack, LI);
4932   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4933   Schedule = LoopStack[0].Schedule;
4934 }
4935
4936 /// To generate a schedule for the elements in a Region we traverse the Region
4937 /// in reverse-post-order and add the contained RegionNodes in traversal order
4938 /// to the schedule of the loop that is currently at the top of the LoopStack.
4939 /// For loop-free codes, this results in a correct sequential ordering.
4940 ///
4941 /// Example:
4942 ///           bb1(0)
4943 ///         /     \.
4944 ///      bb2(1)   bb3(2)
4945 ///         \    /  \.
4946 ///          bb4(3)  bb5(4)
4947 ///             \   /
4948 ///              bb6(5)
4949 ///
4950 /// Including loops requires additional processing. Whenever a loop header is
4951 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4952 /// from an empty schedule, we first process all RegionNodes that are within
4953 /// this loop and complete the sequential schedule at this loop-level before
4954 /// processing about any other nodes. To implement this
4955 /// loop-nodes-first-processing, the reverse post-order traversal is
4956 /// insufficient. Hence, we additionally check if the traversal yields
4957 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4958 /// These region-nodes are then queue and only traverse after the all nodes
4959 /// within the current loop have been processed.
4960 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4961   Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4962
4963   ReversePostOrderTraversal<Region *> RTraversal(R);
4964   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4965   std::deque<RegionNode *> DelayList;
4966   bool LastRNWaiting = false;
4967
4968   // Iterate over the region @p R in reverse post-order but queue
4969   // sub-regions/blocks iff they are not part of the last encountered but not
4970   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4971   // that we queued the last sub-region/block from the reverse post-order
4972   // iterator. If it is set we have to explore the next sub-region/block from
4973   // the iterator (if any) to guarantee progress. If it is not set we first try
4974   // the next queued sub-region/blocks.
4975   while (!WorkList.empty() || !DelayList.empty()) {
4976     RegionNode *RN;
4977
4978     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
4979       RN = WorkList.front();
4980       WorkList.pop_front();
4981       LastRNWaiting = false;
4982     } else {
4983       RN = DelayList.front();
4984       DelayList.pop_front();
4985     }
4986
4987     Loop *L = getRegionNodeLoop(RN, LI);
4988     if (!contains(L))
4989       L = OuterScopLoop;
4990
4991     Loop *LastLoop = LoopStack.back().L;
4992     if (LastLoop != L) {
4993       if (LastLoop && !LastLoop->contains(L)) {
4994         LastRNWaiting = true;
4995         DelayList.push_back(RN);
4996         continue;
4997       }
4998       LoopStack.push_back({L, nullptr, 0});
4999     }
5000     buildSchedule(RN, LoopStack, LI);
5001   }
5002 }
5003
5004 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
5005   if (RN->isSubRegion()) {
5006     auto *LocalRegion = RN->getNodeAs<Region>();
5007     if (!isNonAffineSubRegion(LocalRegion)) {
5008       buildSchedule(LocalRegion, LoopStack, LI);
5009       return;
5010     }
5011   }
5012
5013   auto &LoopData = LoopStack.back();
5014   LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN);
5015
5016   for (auto *Stmt : getStmtListFor(RN)) {
5017     auto *UDomain = isl_union_set_from_set(Stmt->getDomain().release());
5018     auto *StmtSchedule = isl_schedule_from_domain(UDomain);
5019     LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule);
5020   }
5021
5022   // Check if we just processed the last node in this loop. If we did, finalize
5023   // the loop by:
5024   //
5025   //   - adding new schedule dimensions
5026   //   - folding the resulting schedule into the parent loop schedule
5027   //   - dropping the loop schedule from the LoopStack.
5028   //
5029   // Then continue to check surrounding loops, which might also have been
5030   // completed by this node.
5031   while (LoopData.L &&
5032          LoopData.NumBlocksProcessed == getNumBlocksInLoop(LoopData.L)) {
5033     auto *Schedule = LoopData.Schedule;
5034     auto NumBlocksProcessed = LoopData.NumBlocksProcessed;
5035
5036     LoopStack.pop_back();
5037     auto &NextLoopData = LoopStack.back();
5038
5039     if (Schedule) {
5040       isl::union_set Domain = give(isl_schedule_get_domain(Schedule));
5041       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, LoopStack.size());
5042       Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA.release());
5043       NextLoopData.Schedule =
5044           combineInSequence(NextLoopData.Schedule, Schedule);
5045     }
5046
5047     NextLoopData.NumBlocksProcessed += NumBlocksProcessed;
5048     LoopData = NextLoopData;
5049   }
5050 }
5051
5052 ArrayRef<ScopStmt *> Scop::getStmtListFor(BasicBlock *BB) const {
5053   auto StmtMapIt = StmtMap.find(BB);
5054   if (StmtMapIt == StmtMap.end())
5055     return {};
5056   return StmtMapIt->second;
5057 }
5058
5059 ScopStmt *Scop::getLastStmtFor(BasicBlock *BB) const {
5060   ArrayRef<ScopStmt *> StmtList = getStmtListFor(BB);
5061   if (!StmtList.empty())
5062     return StmtList.back();
5063   return nullptr;
5064 }
5065
5066 ArrayRef<ScopStmt *> Scop::getStmtListFor(RegionNode *RN) const {
5067   if (RN->isSubRegion())
5068     return getStmtListFor(RN->getNodeAs<Region>());
5069   return getStmtListFor(RN->getNodeAs<BasicBlock>());
5070 }
5071
5072 ArrayRef<ScopStmt *> Scop::getStmtListFor(Region *R) const {
5073   return getStmtListFor(R->getEntry());
5074 }
5075
5076 int Scop::getRelativeLoopDepth(const Loop *L) const {
5077   if (!L || !R.contains(L))
5078     return -1;
5079   // outermostLoopInRegion always returns nullptr for top level regions
5080   if (R.isTopLevelRegion()) {
5081     // LoopInfo's depths start at 1, we start at 0
5082     return L->getLoopDepth() - 1;
5083   } else {
5084     Loop *OuterLoop = R.outermostLoopInRegion(const_cast<Loop *>(L));
5085     assert(OuterLoop);
5086     return L->getLoopDepth() - OuterLoop->getLoopDepth();
5087   }
5088 }
5089
5090 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
5091   for (auto &SAI : arrays()) {
5092     if (SAI->getName() == BaseName)
5093       return SAI;
5094   }
5095   return nullptr;
5096 }
5097
5098 void Scop::addAccessData(MemoryAccess *Access) {
5099   const ScopArrayInfo *SAI = Access->getOriginalScopArrayInfo();
5100   assert(SAI && "can only use after access relations have been constructed");
5101
5102   if (Access->isOriginalValueKind() && Access->isRead())
5103     ValueUseAccs[SAI].push_back(Access);
5104   else if (Access->isOriginalAnyPHIKind() && Access->isWrite())
5105     PHIIncomingAccs[SAI].push_back(Access);
5106 }
5107
5108 void Scop::removeAccessData(MemoryAccess *Access) {
5109   if (Access->isOriginalValueKind() && Access->isRead()) {
5110     auto &Uses = ValueUseAccs[Access->getScopArrayInfo()];
5111     std::remove(Uses.begin(), Uses.end(), Access);
5112   } else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) {
5113     auto &Incomings = PHIIncomingAccs[Access->getScopArrayInfo()];
5114     std::remove(Incomings.begin(), Incomings.end(), Access);
5115   }
5116 }
5117
5118 MemoryAccess *Scop::getValueDef(const ScopArrayInfo *SAI) const {
5119   assert(SAI->isValueKind());
5120
5121   Instruction *Val = dyn_cast<Instruction>(SAI->getBasePtr());
5122   if (!Val)
5123     return nullptr;
5124
5125   ScopStmt *Stmt = getStmtFor(Val);
5126   if (!Stmt)
5127     return nullptr;
5128
5129   return Stmt->lookupValueWriteOf(Val);
5130 }
5131
5132 ArrayRef<MemoryAccess *> Scop::getValueUses(const ScopArrayInfo *SAI) const {
5133   assert(SAI->isValueKind());
5134   auto It = ValueUseAccs.find(SAI);
5135   if (It == ValueUseAccs.end())
5136     return {};
5137   return It->second;
5138 }
5139
5140 MemoryAccess *Scop::getPHIRead(const ScopArrayInfo *SAI) const {
5141   assert(SAI->isPHIKind() || SAI->isExitPHIKind());
5142
5143   if (SAI->isExitPHIKind())
5144     return nullptr;
5145
5146   PHINode *PHI = cast<PHINode>(SAI->getBasePtr());
5147   ScopStmt *Stmt = getStmtFor(PHI);
5148   assert(Stmt && "PHINode must be within the SCoP");
5149
5150   return Stmt->lookupPHIReadOf(PHI);
5151 }
5152
5153 ArrayRef<MemoryAccess *> Scop::getPHIIncomings(const ScopArrayInfo *SAI) const {
5154   assert(SAI->isPHIKind() || SAI->isExitPHIKind());
5155   auto It = PHIIncomingAccs.find(SAI);
5156   if (It == PHIIncomingAccs.end())
5157     return {};
5158   return It->second;
5159 }
5160
5161 bool Scop::isEscaping(Instruction *Inst) {
5162   assert(contains(Inst) && "The concept of escaping makes only sense for "
5163                            "values defined inside the SCoP");
5164
5165   for (Use &Use : Inst->uses()) {
5166     BasicBlock *UserBB = getUseBlock(Use);
5167     if (!contains(UserBB))
5168       return true;
5169
5170     // When the SCoP region exit needs to be simplified, PHIs in the region exit
5171     // move to a new basic block such that its incoming blocks are not in the
5172     // SCoP anymore.
5173     if (hasSingleExitEdge() && isa<PHINode>(Use.getUser()) &&
5174         isExit(cast<PHINode>(Use.getUser())->getParent()))
5175       return true;
5176   }
5177   return false;
5178 }
5179
5180 Scop::ScopStatistics Scop::getStatistics() const {
5181   ScopStatistics Result;
5182 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5183   auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
5184
5185   int NumTotalLoops = LoopStat.NumLoops;
5186   Result.NumBoxedLoops = getBoxedLoops().size();
5187   Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
5188
5189   for (const ScopStmt &Stmt : *this) {
5190     isl::set Domain = Stmt.getDomain().intersect_params(getContext());
5191     bool IsInLoop = Stmt.getNumIterators() >= 1;
5192     for (MemoryAccess *MA : Stmt) {
5193       if (!MA->isWrite())
5194         continue;
5195
5196       if (MA->isLatestValueKind()) {
5197         Result.NumValueWrites += 1;
5198         if (IsInLoop)
5199           Result.NumValueWritesInLoops += 1;
5200       }
5201
5202       if (MA->isLatestAnyPHIKind()) {
5203         Result.NumPHIWrites += 1;
5204         if (IsInLoop)
5205           Result.NumPHIWritesInLoops += 1;
5206       }
5207
5208       isl::set AccSet =
5209           MA->getAccessRelation().intersect_domain(Domain).range();
5210       if (AccSet.is_singleton()) {
5211         Result.NumSingletonWrites += 1;
5212         if (IsInLoop)
5213           Result.NumSingletonWritesInLoops += 1;
5214       }
5215     }
5216   }
5217 #endif
5218   return Result;
5219 }
5220
5221 raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
5222   scop.print(OS, PollyPrintInstructions);
5223   return OS;
5224 }
5225
5226 //===----------------------------------------------------------------------===//
5227 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
5228   AU.addRequired<LoopInfoWrapperPass>();
5229   AU.addRequired<RegionInfoPass>();
5230   AU.addRequired<DominatorTreeWrapperPass>();
5231   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5232   AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5233   AU.addRequired<AAResultsWrapperPass>();
5234   AU.addRequired<AssumptionCacheTracker>();
5235   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5236   AU.setPreservesAll();
5237 }
5238
5239 void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
5240                               Scop::ScopStatistics ScopStats) {
5241   assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
5242
5243   NumScops++;
5244   NumLoopsInScop += Stats.NumLoops;
5245   MaxNumLoopsInScop =
5246       std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
5247
5248   if (Stats.MaxDepth == 1)
5249     NumScopsDepthOne++;
5250   else if (Stats.MaxDepth == 2)
5251     NumScopsDepthTwo++;
5252   else if (Stats.MaxDepth == 3)
5253     NumScopsDepthThree++;
5254   else if (Stats.MaxDepth == 4)
5255     NumScopsDepthFour++;
5256   else if (Stats.MaxDepth == 5)
5257     NumScopsDepthFive++;
5258   else
5259     NumScopsDepthLarger++;
5260
5261   NumAffineLoops += ScopStats.NumAffineLoops;
5262   NumBoxedLoops += ScopStats.NumBoxedLoops;
5263
5264   NumValueWrites += ScopStats.NumValueWrites;
5265   NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
5266   NumPHIWrites += ScopStats.NumPHIWrites;
5267   NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
5268   NumSingletonWrites += ScopStats.NumSingletonWrites;
5269   NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
5270 }
5271
5272 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
5273   auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5274
5275   if (!SD.isMaxRegionInScop(*R))
5276     return false;
5277
5278   Function *F = R->getEntry()->getParent();
5279   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5280   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5281   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5282   auto const &DL = F->getParent()->getDataLayout();
5283   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5284   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
5285   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5286
5287   ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5288   S = SB.getScop(); // take ownership of scop object
5289
5290 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5291   if (S) {
5292     ScopDetection::LoopStats Stats =
5293         ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5294     updateLoopCountStatistic(Stats, S->getStatistics());
5295   }
5296 #endif
5297
5298   return false;
5299 }
5300
5301 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
5302   if (S)
5303     S->print(OS, PollyPrintInstructions);
5304   else
5305     OS << "Invalid Scop!\n";
5306 }
5307
5308 char ScopInfoRegionPass::ID = 0;
5309
5310 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5311
5312 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops",
5313                       "Polly - Create polyhedral description of Scops", false,
5314                       false);
5315 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5316 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5317 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5318 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5319 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5320 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5321 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5322 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
5323                     "Polly - Create polyhedral description of Scops", false,
5324                     false)
5325
5326 //===----------------------------------------------------------------------===//
5327 ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE,
5328                    LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT,
5329                    AssumptionCache &AC, OptimizationRemarkEmitter &ORE)
5330     : DL(DL), SD(SD), SE(SE), LI(LI), AA(AA), DT(DT), AC(AC), ORE(ORE) {
5331   recompute();
5332 }
5333
5334 void ScopInfo::recompute() {
5335   RegionToScopMap.clear();
5336   /// Create polyhedral description of scops for all the valid regions of a
5337   /// function.
5338   for (auto &It : SD) {
5339     Region *R = const_cast<Region *>(It);
5340     if (!SD.isMaxRegionInScop(*R))
5341       continue;
5342
5343     ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5344     std::unique_ptr<Scop> S = SB.getScop();
5345     if (!S)
5346       continue;
5347 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5348     ScopDetection::LoopStats Stats =
5349         ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5350     updateLoopCountStatistic(Stats, S->getStatistics());
5351 #endif
5352     bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
5353     assert(Inserted && "Building Scop for the same region twice!");
5354     (void)Inserted;
5355   }
5356 }
5357
5358 bool ScopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
5359                           FunctionAnalysisManager::Invalidator &Inv) {
5360   // Check whether the analysis, all analyses on functions have been preserved
5361   // or anything we're holding references to is being invalidated
5362   auto PAC = PA.getChecker<ScopInfoAnalysis>();
5363   return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
5364          Inv.invalidate<ScopAnalysis>(F, PA) ||
5365          Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
5366          Inv.invalidate<LoopAnalysis>(F, PA) ||
5367          Inv.invalidate<AAManager>(F, PA) ||
5368          Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
5369          Inv.invalidate<AssumptionAnalysis>(F, PA);
5370 }
5371
5372 AnalysisKey ScopInfoAnalysis::Key;
5373
5374 ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F,
5375                                                FunctionAnalysisManager &FAM) {
5376   auto &SD = FAM.getResult<ScopAnalysis>(F);
5377   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
5378   auto &LI = FAM.getResult<LoopAnalysis>(F);
5379   auto &AA = FAM.getResult<AAManager>(F);
5380   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
5381   auto &AC = FAM.getResult<AssumptionAnalysis>(F);
5382   auto &DL = F.getParent()->getDataLayout();
5383   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5384   return {DL, SD, SE, LI, AA, DT, AC, ORE};
5385 }
5386
5387 PreservedAnalyses ScopInfoPrinterPass::run(Function &F,
5388                                            FunctionAnalysisManager &FAM) {
5389   auto &SI = FAM.getResult<ScopInfoAnalysis>(F);
5390   // Since the legacy PM processes Scops in bottom up, we print them in reverse
5391   // order here to keep the output persistent
5392   for (auto &It : reverse(SI)) {
5393     if (It.second)
5394       It.second->print(Stream, PollyPrintInstructions);
5395     else
5396       Stream << "Invalid Scop!\n";
5397   }
5398   return PreservedAnalyses::all();
5399 }
5400
5401 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5402   AU.addRequired<LoopInfoWrapperPass>();
5403   AU.addRequired<RegionInfoPass>();
5404   AU.addRequired<DominatorTreeWrapperPass>();
5405   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5406   AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5407   AU.addRequired<AAResultsWrapperPass>();
5408   AU.addRequired<AssumptionCacheTracker>();
5409   AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5410   AU.setPreservesAll();
5411 }
5412
5413 bool ScopInfoWrapperPass::runOnFunction(Function &F) {
5414   auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5415   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5416   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5417   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5418   auto const &DL = F.getParent()->getDataLayout();
5419   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5420   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5421   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5422
5423   Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC, ORE});
5424   return false;
5425 }
5426
5427 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
5428   for (auto &It : *Result) {
5429     if (It.second)
5430       It.second->print(OS, PollyPrintInstructions);
5431     else
5432       OS << "Invalid Scop!\n";
5433   }
5434 }
5435
5436 char ScopInfoWrapperPass::ID = 0;
5437
5438 Pass *polly::createScopInfoWrapperPassPass() {
5439   return new ScopInfoWrapperPass();
5440 }
5441
5442 INITIALIZE_PASS_BEGIN(
5443     ScopInfoWrapperPass, "polly-function-scops",
5444     "Polly - Create polyhedral description of all Scops of a function", false,
5445     false);
5446 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5447 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5448 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5449 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5450 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5451 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5452 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5453 INITIALIZE_PASS_END(
5454     ScopInfoWrapperPass, "polly-function-scops",
5455     "Polly - Create polyhedral description of all Scops of a function", false,
5456     false)