lib/Analysis/ScopBuilder.cpp

   1 //===- ScopBuilder.cpp ----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Create a polyhedral description for a static control flow region.
  10 //
  11 // The pass creates a polyhedral description of the Scops detected by the SCoP
  12 // detection derived from their LLVM-IR code.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "polly/ScopBuilder.h"
  17 #include "polly/Options.h"
  18 #include "polly/ScopDetection.h"
  19 #include "polly/ScopInfo.h"
  20 #include "polly/Support/GICHelper.h"
  21 #include "polly/Support/ISLTools.h"
  22 #include "polly/Support/SCEVValidator.h"
  23 #include "polly/Support/ScopHelper.h"
  24 #include "polly/Support/VirtualInstruction.h"
  25 #include "llvm/ADT/ArrayRef.h"
  26 #include "llvm/ADT/EquivalenceClasses.h"
  27 #include "llvm/ADT/PostOrderIterator.h"
  28 #include "llvm/ADT/SmallSet.h"
  29 #include "llvm/ADT/Statistic.h"
  30 #include "llvm/Analysis/AliasAnalysis.h"
  31 #include "llvm/Analysis/AssumptionCache.h"
  32 #include "llvm/Analysis/Loads.h"
  33 #include "llvm/Analysis/LoopInfo.h"
  34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  35 #include "llvm/Analysis/RegionInfo.h"
  36 #include "llvm/Analysis/RegionIterator.h"
  37 #include "llvm/Analysis/ScalarEvolution.h"
  38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  39 #include "llvm/IR/BasicBlock.h"
  40 #include "llvm/IR/DataLayout.h"
  41 #include "llvm/IR/DebugLoc.h"
  42 #include "llvm/IR/DerivedTypes.h"
  43 #include "llvm/IR/Dominators.h"
  44 #include "llvm/IR/Function.h"
  45 #include "llvm/IR/InstrTypes.h"
  46 #include "llvm/IR/Instruction.h"
  47 #include "llvm/IR/Instructions.h"
  48 #include "llvm/IR/Type.h"
  49 #include "llvm/IR/Use.h"
  50 #include "llvm/IR/Value.h"
  51 #include "llvm/Support/CommandLine.h"
  52 #include "llvm/Support/Compiler.h"
  53 #include "llvm/Support/Debug.h"
  54 #include "llvm/Support/ErrorHandling.h"
  55 #include "llvm/Support/raw_ostream.h"
  56 #include <cassert>
  57
  58 using namespace llvm;
  59 using namespace polly;
  60
  61 #define DEBUG_TYPE "polly-scops"
  62
  63 STATISTIC(ScopFound, "Number of valid Scops");
  64 STATISTIC(RichScopFound, "Number of Scops containing a loop");
  65 STATISTIC(InfeasibleScops,
  66           "Number of SCoPs with statically infeasible context.");
  67
  68 bool polly::ModelReadOnlyScalars;
  69
  70 // The maximal number of dimensions we allow during invariant load construction.
  71 // More complex access ranges will result in very high compile time and are also
  72 // unlikely to result in good code. This value is very high and should only
  73 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
  74 static int const MaxDimensionsInAccessRange = 9;
  75
  76 static cl::opt<bool, true> XModelReadOnlyScalars(
  77     "polly-analyze-read-only-scalars",
  78     cl::desc("Model read-only scalar values in the scop description"),
  79     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
  80     cl::init(true), cl::cat(PollyCategory));
  81
  82 static cl::opt<int>
  83     OptComputeOut("polly-analysis-computeout",
  84                   cl::desc("Bound the scop analysis by a maximal amount of "
  85                            "computational steps (0 means no bound)"),
  86                   cl::Hidden, cl::init(800000), cl::ZeroOrMore,
  87                   cl::cat(PollyCategory));
  88
  89 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
  90     "polly-allow-dereference-of-all-function-parameters",
  91     cl::desc(
  92         "Treat all parameters to functions that are pointers as dereferencible."
  93         " This is useful for invariant load hoisting, since we can generate"
  94         " less runtime checks. This is only valid if all pointers to functions"
  95         " are always initialized, so that Polly can choose to hoist"
  96         " their loads. "),
  97     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  98
  99 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
 100     "polly-rtc-max-arrays-per-group",
 101     cl::desc("The maximal number of arrays to compare in each alias group."),
 102     cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
 103
 104 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
 105     "polly-rtc-max-array-disjuncts",
 106     cl::desc("The maximal number of disjunts allowed in memory accesses to "
 107              "to build RTCs."),
 108     cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
 109
 110 static cl::opt<unsigned> RunTimeChecksMaxParameters(
 111     "polly-rtc-max-parameters",
 112     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
 113     cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
 114
 115 static cl::opt<bool> UnprofitableScalarAccs(
 116     "polly-unprofitable-scalar-accs",
 117     cl::desc("Count statements with scalar accesses as not optimizable"),
 118     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 119
 120 static cl::opt<std::string> UserContextStr(
 121     "polly-context", cl::value_desc("isl parameter set"),
 122     cl::desc("Provide additional constraints on the context parameters"),
 123     cl::init(""), cl::cat(PollyCategory));
 124
 125 static cl::opt<bool> DetectFortranArrays(
 126     "polly-detect-fortran-arrays",
 127     cl::desc("Detect Fortran arrays and use this for code generation"),
 128     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 129
 130 static cl::opt<bool> DetectReductions("polly-detect-reductions",
 131                                       cl::desc("Detect and exploit reductions"),
 132                                       cl::Hidden, cl::ZeroOrMore,
 133                                       cl::init(true), cl::cat(PollyCategory));
 134
 135 // Multiplicative reductions can be disabled separately as these kind of
 136 // operations can overflow easily. Additive reductions and bit operations
 137 // are in contrast pretty stable.
 138 static cl::opt<bool> DisableMultiplicativeReductions(
 139     "polly-disable-multiplicative-reductions",
 140     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
 141     cl::init(false), cl::cat(PollyCategory));
 142
 143 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
 144
 145 static cl::opt<GranularityChoice> StmtGranularity(
 146     "polly-stmt-granularity",
 147     cl::desc(
 148         "Algorithm to use for splitting basic blocks into multiple statements"),
 149     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
 150                           "One statement per basic block"),
 151                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
 152                           "Scalar independence heuristic"),
 153                clEnumValN(GranularityChoice::Stores, "store",
 154                           "Store-level granularity")),
 155     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
 156
 157 /// Helper to treat non-affine regions and basic blocks the same.
 158 ///
 159 ///{
 160
 161 /// Return the block that is the representing block for @p RN.
 162 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
 163   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
 164                            : RN->getNodeAs<BasicBlock>();
 165 }
 166
 167 /// Return the @p idx'th block that is executed after @p RN.
 168 static inline BasicBlock *
 169 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
 170   if (RN->isSubRegion()) {
 171     assert(idx == 0);
 172     return RN->getNodeAs<Region>()->getExit();
 173   }
 174   return TI->getSuccessor(idx);
 175 }
 176
 177 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
 178                                const DominatorTree &DT) {
 179   if (!RN->isSubRegion())
 180     return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
 181   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
 182     if (isErrorBlock(*BB, R, LI, DT))
 183       return true;
 184   return false;
 185 }
 186
 187 ///}
 188
 189 /// Create a map to map from a given iteration to a subsequent iteration.
 190 ///
 191 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
 192 /// is incremented by one and all other dimensions are equal, e.g.,
 193 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
 194 ///
 195 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
 196 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
 197   isl::space MapSpace = SetSpace.map_from_set();
 198   isl::map NextIterationMap = isl::map::universe(MapSpace);
 199   for (unsigned u = 0; u < NextIterationMap.dim(isl::dim::in); u++)
 200     if (u != Dim)
 201       NextIterationMap =
 202           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
 203   isl::constraint C =
 204       isl::constraint::alloc_equality(isl::local_space(MapSpace));
 205   C = C.set_constant_si(1);
 206   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
 207   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
 208   NextIterationMap = NextIterationMap.add_constraint(C);
 209   return NextIterationMap;
 210 }
 211
 212 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
 213 static isl::set collectBoundedParts(isl::set S) {
 214   isl::set BoundedParts = isl::set::empty(S.get_space());
 215   for (isl::basic_set BSet : S.get_basic_set_list())
 216     if (BSet.is_bounded())
 217       BoundedParts = BoundedParts.unite(isl::set(BSet));
 218   return BoundedParts;
 219 }
 220
 221 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
 222 ///
 223 /// @returns A separation of @p S into first an unbounded then a bounded subset,
 224 ///          both with regards to the dimension @p Dim.
 225 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
 226                                                        unsigned Dim) {
 227   for (unsigned u = 0, e = S.n_dim(); u < e; u++)
 228     S = S.lower_bound_si(isl::dim::set, u, 0);
 229
 230   unsigned NumDimsS = S.n_dim();
 231   isl::set OnlyDimS = S;
 232
 233   // Remove dimensions that are greater than Dim as they are not interesting.
 234   assert(NumDimsS >= Dim + 1);
 235   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
 236
 237   // Create artificial parametric upper bounds for dimensions smaller than Dim
 238   // as we are not interested in them.
 239   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
 240
 241   for (unsigned u = 0; u < Dim; u++) {
 242     isl::constraint C = isl::constraint::alloc_inequality(
 243         isl::local_space(OnlyDimS.get_space()));
 244     C = C.set_coefficient_si(isl::dim::param, u, 1);
 245     C = C.set_coefficient_si(isl::dim::set, u, -1);
 246     OnlyDimS = OnlyDimS.add_constraint(C);
 247   }
 248
 249   // Collect all bounded parts of OnlyDimS.
 250   isl::set BoundedParts = collectBoundedParts(OnlyDimS);
 251
 252   // Create the dimensions greater than Dim again.
 253   BoundedParts =
 254       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
 255
 256   // Remove the artificial upper bound parameters again.
 257   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
 258
 259   isl::set UnboundedParts = S.subtract(BoundedParts);
 260   return std::make_pair(UnboundedParts, BoundedParts);
 261 }
 262
 263 /// Create the conditions under which @p L @p Pred @p R is true.
 264 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
 265                                   isl::pw_aff R) {
 266   switch (Pred) {
 267   case ICmpInst::ICMP_EQ:
 268     return L.eq_set(R);
 269   case ICmpInst::ICMP_NE:
 270     return L.ne_set(R);
 271   case ICmpInst::ICMP_SLT:
 272     return L.lt_set(R);
 273   case ICmpInst::ICMP_SLE:
 274     return L.le_set(R);
 275   case ICmpInst::ICMP_SGT:
 276     return L.gt_set(R);
 277   case ICmpInst::ICMP_SGE:
 278     return L.ge_set(R);
 279   case ICmpInst::ICMP_ULT:
 280     return L.lt_set(R);
 281   case ICmpInst::ICMP_UGT:
 282     return L.gt_set(R);
 283   case ICmpInst::ICMP_ULE:
 284     return L.le_set(R);
 285   case ICmpInst::ICMP_UGE:
 286     return L.ge_set(R);
 287   default:
 288     llvm_unreachable("Non integer predicate not supported");
 289   }
 290 }
 291
 292 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
 293                                              Loop *NewL) {
 294   // If the loops are the same there is nothing to do.
 295   if (NewL == OldL)
 296     return Dom;
 297
 298   int OldDepth = scop->getRelativeLoopDepth(OldL);
 299   int NewDepth = scop->getRelativeLoopDepth(NewL);
 300   // If both loops are non-affine loops there is nothing to do.
 301   if (OldDepth == -1 && NewDepth == -1)
 302     return Dom;
 303
 304   // Distinguish three cases:
 305   //   1) The depth is the same but the loops are not.
 306   //      => One loop was left one was entered.
 307   //   2) The depth increased from OldL to NewL.
 308   //      => One loop was entered, none was left.
 309   //   3) The depth decreased from OldL to NewL.
 310   //      => Loops were left were difference of the depths defines how many.
 311   if (OldDepth == NewDepth) {
 312     assert(OldL->getParentLoop() == NewL->getParentLoop());
 313     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
 314     Dom = Dom.add_dims(isl::dim::set, 1);
 315   } else if (OldDepth < NewDepth) {
 316     assert(OldDepth + 1 == NewDepth);
 317     auto &R = scop->getRegion();
 318     (void)R;
 319     assert(NewL->getParentLoop() == OldL ||
 320            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
 321     Dom = Dom.add_dims(isl::dim::set, 1);
 322   } else {
 323     assert(OldDepth > NewDepth);
 324     int Diff = OldDepth - NewDepth;
 325     int NumDim = Dom.n_dim();
 326     assert(NumDim >= Diff);
 327     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
 328   }
 329
 330   return Dom;
 331 }
 332
 333 /// Compute the isl representation for the SCEV @p E in this BB.
 334 ///
 335 /// @param BB               The BB for which isl representation is to be
 336 /// computed.
 337 /// @param InvalidDomainMap A map of BB to their invalid domains.
 338 /// @param E                The SCEV that should be translated.
 339 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
 340 ///
 341 /// Note that this function will also adjust the invalid context accordingly.
 342
 343 __isl_give isl_pw_aff *
 344 ScopBuilder::getPwAff(BasicBlock *BB,
 345                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 346                       const SCEV *E, bool NonNegative) {
 347   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative);
 348   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
 349   return PWAC.first.release();
 350 }
 351
 352 /// Build condition sets for unsigned ICmpInst(s).
 353 /// Special handling is required for unsigned operands to ensure that if
 354 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
 355 /// it should wrap around.
 356 ///
 357 /// @param IsStrictUpperBound holds information on the predicate relation
 358 /// between TestVal and UpperBound, i.e,
 359 /// TestVal < UpperBound  OR  TestVal <= UpperBound
 360 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
 361     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
 362     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
 363     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 364     bool IsStrictUpperBound) {
 365   // Do not take NonNeg assumption on TestVal
 366   // as it might have MSB (Sign bit) set.
 367   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
 368   // Take NonNeg assumption on UpperBound.
 369   isl_pw_aff *UpperBound =
 370       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
 371
 372   // 0 <= TestVal
 373   isl_set *First =
 374       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
 375                             isl_pw_aff_get_domain_space(TestVal))),
 376                         isl_pw_aff_copy(TestVal));
 377
 378   isl_set *Second;
 379   if (IsStrictUpperBound)
 380     // TestVal < UpperBound
 381     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
 382   else
 383     // TestVal <= UpperBound
 384     Second = isl_pw_aff_le_set(TestVal, UpperBound);
 385
 386   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
 387   return ConsequenceCondSet;
 388 }
 389
 390 bool ScopBuilder::buildConditionSets(
 391     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
 392     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 393     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 394   Value *Condition = getConditionFromTerminator(SI);
 395   assert(Condition && "No condition for switch");
 396
 397   isl_pw_aff *LHS, *RHS;
 398   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
 399
 400   unsigned NumSuccessors = SI->getNumSuccessors();
 401   ConditionSets.resize(NumSuccessors);
 402   for (auto &Case : SI->cases()) {
 403     unsigned Idx = Case.getSuccessorIndex();
 404     ConstantInt *CaseValue = Case.getCaseValue();
 405
 406     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
 407     isl_set *CaseConditionSet =
 408         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
 409                           isl::manage(RHS))
 410             .release();
 411     ConditionSets[Idx] = isl_set_coalesce(
 412         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
 413   }
 414
 415   assert(ConditionSets[0] == nullptr && "Default condition set was set");
 416   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
 417   for (unsigned u = 2; u < NumSuccessors; u++)
 418     ConditionSetUnion =
 419         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
 420   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
 421
 422   isl_pw_aff_free(LHS);
 423
 424   return true;
 425 }
 426
 427 bool ScopBuilder::buildConditionSets(
 428     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
 429     __isl_keep isl_set *Domain,
 430     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 431     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 432   isl_set *ConsequenceCondSet = nullptr;
 433
 434   if (auto Load = dyn_cast<LoadInst>(Condition)) {
 435     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
 436     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
 437     bool NonNeg = false;
 438     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
 439     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
 440     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
 441                                            isl::manage(RHS))
 442                              .release();
 443   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
 444     auto *Unique = dyn_cast<ConstantInt>(
 445         getUniqueNonErrorValue(PHI, &scop->getRegion(), LI, DT));
 446
 447     if (Unique->isZero())
 448       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
 449     else
 450       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
 451   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
 452     if (CCond->isZero())
 453       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
 454     else
 455       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
 456   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
 457     auto Opcode = BinOp->getOpcode();
 458     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
 459
 460     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
 461                                     InvalidDomainMap, ConditionSets) &&
 462                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
 463                                     InvalidDomainMap, ConditionSets);
 464     if (!Valid) {
 465       while (!ConditionSets.empty())
 466         isl_set_free(ConditionSets.pop_back_val());
 467       return false;
 468     }
 469
 470     isl_set_free(ConditionSets.pop_back_val());
 471     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
 472     isl_set_free(ConditionSets.pop_back_val());
 473     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
 474
 475     if (Opcode == Instruction::And)
 476       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
 477     else
 478       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
 479   } else {
 480     auto *ICond = dyn_cast<ICmpInst>(Condition);
 481     assert(ICond &&
 482            "Condition of exiting branch was neither constant nor ICmp!");
 483
 484     Region &R = scop->getRegion();
 485
 486     isl_pw_aff *LHS, *RHS;
 487     // For unsigned comparisons we assumed the signed bit of neither operand
 488     // to be set. The comparison is equal to a signed comparison under this
 489     // assumption.
 490     bool NonNeg = ICond->isUnsigned();
 491     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
 492                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
 493
 494     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, LI, DT);
 495     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, LI, DT);
 496
 497     switch (ICond->getPredicate()) {
 498     case ICmpInst::ICMP_ULT:
 499       ConsequenceCondSet =
 500           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
 501                                      RightOperand, InvalidDomainMap, true);
 502       break;
 503     case ICmpInst::ICMP_ULE:
 504       ConsequenceCondSet =
 505           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
 506                                      RightOperand, InvalidDomainMap, false);
 507       break;
 508     case ICmpInst::ICMP_UGT:
 509       ConsequenceCondSet =
 510           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
 511                                      LeftOperand, InvalidDomainMap, true);
 512       break;
 513     case ICmpInst::ICMP_UGE:
 514       ConsequenceCondSet =
 515           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
 516                                      LeftOperand, InvalidDomainMap, false);
 517       break;
 518     default:
 519       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
 520       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
 521       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
 522                                              isl::manage(LHS), isl::manage(RHS))
 523                                .release();
 524       break;
 525     }
 526   }
 527
 528   // If no terminator was given we are only looking for parameter constraints
 529   // under which @p Condition is true/false.
 530   if (!TI)
 531     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
 532   assert(ConsequenceCondSet);
 533   ConsequenceCondSet = isl_set_coalesce(
 534       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
 535
 536   isl_set *AlternativeCondSet = nullptr;
 537   bool TooComplex =
 538       isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
 539
 540   if (!TooComplex) {
 541     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
 542                                           isl_set_copy(ConsequenceCondSet));
 543     TooComplex =
 544         isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
 545   }
 546
 547   if (TooComplex) {
 548     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
 549                      TI ? TI->getParent() : nullptr /* BasicBlock */);
 550     isl_set_free(AlternativeCondSet);
 551     isl_set_free(ConsequenceCondSet);
 552     return false;
 553   }
 554
 555   ConditionSets.push_back(ConsequenceCondSet);
 556   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
 557
 558   return true;
 559 }
 560
 561 bool ScopBuilder::buildConditionSets(
 562     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
 563     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 564     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 565   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
 566     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
 567                               ConditionSets);
 568
 569   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
 570
 571   if (TI->getNumSuccessors() == 1) {
 572     ConditionSets.push_back(isl_set_copy(Domain));
 573     return true;
 574   }
 575
 576   Value *Condition = getConditionFromTerminator(TI);
 577   assert(Condition && "No condition for Terminator");
 578
 579   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
 580                             ConditionSets);
 581 }
 582
 583 bool ScopBuilder::propagateDomainConstraints(
 584     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 585   // Iterate over the region R and propagate the domain constrains from the
 586   // predecessors to the current node. In contrast to the
 587   // buildDomainsWithBranchConstraints function, this one will pull the domain
 588   // information from the predecessors instead of pushing it to the successors.
 589   // Additionally, we assume the domains to be already present in the domain
 590   // map here. However, we iterate again in reverse post order so we know all
 591   // predecessors have been visited before a block or non-affine subregion is
 592   // visited.
 593
 594   ReversePostOrderTraversal<Region *> RTraversal(R);
 595   for (auto *RN : RTraversal) {
 596     // Recurse for affine subregions but go on for basic blocks and non-affine
 597     // subregions.
 598     if (RN->isSubRegion()) {
 599       Region *SubRegion = RN->getNodeAs<Region>();
 600       if (!scop->isNonAffineSubRegion(SubRegion)) {
 601         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
 602           return false;
 603         continue;
 604       }
 605     }
 606
 607     BasicBlock *BB = getRegionNodeBasicBlock(RN);
 608     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
 609     assert(Domain);
 610
 611     // Under the union of all predecessor conditions we can reach this block.
 612     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
 613     Domain = Domain.intersect(PredDom).coalesce();
 614     Domain = Domain.align_params(scop->getParamSpace());
 615
 616     Loop *BBLoop = getRegionNodeLoop(RN, LI);
 617     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
 618       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
 619         return false;
 620   }
 621
 622   return true;
 623 }
 624
 625 void ScopBuilder::propagateDomainConstraintsToRegionExit(
 626     BasicBlock *BB, Loop *BBLoop,
 627     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
 628     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 629   // Check if the block @p BB is the entry of a region. If so we propagate it's
 630   // domain to the exit block of the region. Otherwise we are done.
 631   auto *RI = scop->getRegion().getRegionInfo();
 632   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
 633   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
 634   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
 635     return;
 636
 637   // Do not propagate the domain if there is a loop backedge inside the region
 638   // that would prevent the exit block from being executed.
 639   auto *L = BBLoop;
 640   while (L && scop->contains(L)) {
 641     SmallVector<BasicBlock *, 4> LatchBBs;
 642     BBLoop->getLoopLatches(LatchBBs);
 643     for (auto *LatchBB : LatchBBs)
 644       if (BB != LatchBB && BBReg->contains(LatchBB))
 645         return;
 646     L = L->getParentLoop();
 647   }
 648
 649   isl::set Domain = scop->getOrInitEmptyDomain(BB);
 650   assert(Domain && "Cannot propagate a nullptr");
 651
 652   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
 653
 654   // Since the dimensions of @p BB and @p ExitBB might be different we have to
 655   // adjust the domain before we can propagate it.
 656   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
 657   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
 658
 659   // If the exit domain is not yet created we set it otherwise we "add" the
 660   // current domain.
 661   ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
 662
 663   // Initialize the invalid domain.
 664   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
 665
 666   FinishedExitBlocks.insert(ExitBB);
 667 }
 668
 669 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
 670                                                       isl::set Domain) {
 671   // If @p BB is the ScopEntry we are done
 672   if (scop->getRegion().getEntry() == BB)
 673     return isl::set::universe(Domain.get_space());
 674
 675   // The region info of this function.
 676   auto &RI = *scop->getRegion().getRegionInfo();
 677
 678   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
 679
 680   // A domain to collect all predecessor domains, thus all conditions under
 681   // which the block is executed. To this end we start with the empty domain.
 682   isl::set PredDom = isl::set::empty(Domain.get_space());
 683
 684   // Set of regions of which the entry block domain has been propagated to BB.
 685   // all predecessors inside any of the regions can be skipped.
 686   SmallSet<Region *, 8> PropagatedRegions;
 687
 688   for (auto *PredBB : predecessors(BB)) {
 689     // Skip backedges.
 690     if (DT.dominates(BB, PredBB))
 691       continue;
 692
 693     // If the predecessor is in a region we used for propagation we can skip it.
 694     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
 695     if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
 696                     PredBBInRegion)) {
 697       continue;
 698     }
 699
 700     // Check if there is a valid region we can use for propagation, thus look
 701     // for a region that contains the predecessor and has @p BB as exit block.
 702     auto *PredR = RI.getRegionFor(PredBB);
 703     while (PredR->getExit() != BB && !PredR->contains(BB))
 704       PredR->getParent();
 705
 706     // If a valid region for propagation was found use the entry of that region
 707     // for propagation, otherwise the PredBB directly.
 708     if (PredR->getExit() == BB) {
 709       PredBB = PredR->getEntry();
 710       PropagatedRegions.insert(PredR);
 711     }
 712
 713     isl::set PredBBDom = scop->getDomainConditions(PredBB);
 714     Loop *PredBBLoop =
 715         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
 716     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
 717     PredDom = PredDom.unite(PredBBDom);
 718   }
 719
 720   return PredDom;
 721 }
 722
 723 bool ScopBuilder::addLoopBoundsToHeaderDomain(
 724     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 725   int LoopDepth = scop->getRelativeLoopDepth(L);
 726   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
 727
 728   BasicBlock *HeaderBB = L->getHeader();
 729   assert(scop->isDomainDefined(HeaderBB));
 730   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
 731
 732   isl::map NextIterationMap =
 733       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
 734
 735   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
 736
 737   SmallVector<BasicBlock *, 4> LatchBlocks;
 738   L->getLoopLatches(LatchBlocks);
 739
 740   for (BasicBlock *LatchBB : LatchBlocks) {
 741     // If the latch is only reachable via error statements we skip it.
 742     if (!scop->isDomainDefined(LatchBB))
 743       continue;
 744
 745     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
 746
 747     isl::set BackedgeCondition = nullptr;
 748
 749     Instruction *TI = LatchBB->getTerminator();
 750     BranchInst *BI = dyn_cast<BranchInst>(TI);
 751     assert(BI && "Only branch instructions allowed in loop latches");
 752
 753     if (BI->isUnconditional())
 754       BackedgeCondition = LatchBBDom;
 755     else {
 756       SmallVector<isl_set *, 8> ConditionSets;
 757       int idx = BI->getSuccessor(0) != HeaderBB;
 758       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
 759                               InvalidDomainMap, ConditionSets))
 760         return false;
 761
 762       // Free the non back edge condition set as we do not need it.
 763       isl_set_free(ConditionSets[1 - idx]);
 764
 765       BackedgeCondition = isl::manage(ConditionSets[idx]);
 766     }
 767
 768     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
 769     assert(LatchLoopDepth >= LoopDepth);
 770     BackedgeCondition = BackedgeCondition.project_out(
 771         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
 772     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
 773   }
 774
 775   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
 776   for (int i = 0; i < LoopDepth; i++)
 777     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
 778
 779   isl::set UnionBackedgeConditionComplement =
 780       UnionBackedgeCondition.complement();
 781   UnionBackedgeConditionComplement =
 782       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
 783                                                       0);
 784   UnionBackedgeConditionComplement =
 785       UnionBackedgeConditionComplement.apply(ForwardMap);
 786   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
 787   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
 788
 789   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
 790   HeaderBBDom = Parts.second;
 791
 792   // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
 793   // the bounded assumptions to the context as they are already implied by the
 794   // <nsw> tag.
 795   if (scop->hasNSWAddRecForLoop(L))
 796     return true;
 797
 798   isl::set UnboundedCtx = Parts.first.params();
 799   scop->recordAssumption(INFINITELOOP, UnboundedCtx,
 800                          HeaderBB->getTerminator()->getDebugLoc(),
 801                          AS_RESTRICTION);
 802   return true;
 803 }
 804
 805 void ScopBuilder::buildInvariantEquivalenceClasses() {
 806   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
 807
 808   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
 809   for (LoadInst *LInst : RIL) {
 810     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
 811
 812     Type *Ty = LInst->getType();
 813     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
 814     if (ClassRep) {
 815       scop->addInvariantLoadMapping(LInst, ClassRep);
 816       continue;
 817     }
 818
 819     ClassRep = LInst;
 820     scop->addInvariantEquivClass(
 821         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
 822   }
 823 }
 824
 825 bool ScopBuilder::buildDomains(
 826     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 827   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
 828   auto *EntryBB = R->getEntry();
 829   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
 830   int LD = scop->getRelativeLoopDepth(L);
 831   auto *S =
 832       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
 833
 834   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
 835   isl::noexceptions::set Domain = isl::manage(S);
 836   scop->setDomain(EntryBB, Domain);
 837
 838   if (IsOnlyNonAffineRegion)
 839     return !containsErrorBlock(R->getNode(), *R, LI, DT);
 840
 841   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
 842     return false;
 843
 844   if (!propagateDomainConstraints(R, InvalidDomainMap))
 845     return false;
 846
 847   // Error blocks and blocks dominated by them have been assumed to never be
 848   // executed. Representing them in the Scop does not add any value. In fact,
 849   // it is likely to cause issues during construction of the ScopStmts. The
 850   // contents of error blocks have not been verified to be expressible and
 851   // will cause problems when building up a ScopStmt for them.
 852   // Furthermore, basic blocks dominated by error blocks may reference
 853   // instructions in the error block which, if the error block is not modeled,
 854   // can themselves not be constructed properly. To this end we will replace
 855   // the domains of error blocks and those only reachable via error blocks
 856   // with an empty set. Additionally, we will record for each block under which
 857   // parameter combination it would be reached via an error block in its
 858   // InvalidDomain. This information is needed during load hoisting.
 859   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
 860     return false;
 861
 862   return true;
 863 }
 864
 865 bool ScopBuilder::buildDomainsWithBranchConstraints(
 866     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 867   // To create the domain for each block in R we iterate over all blocks and
 868   // subregions in R and propagate the conditions under which the current region
 869   // element is executed. To this end we iterate in reverse post order over R as
 870   // it ensures that we first visit all predecessors of a region node (either a
 871   // basic block or a subregion) before we visit the region node itself.
 872   // Initially, only the domain for the SCoP region entry block is set and from
 873   // there we propagate the current domain to all successors, however we add the
 874   // condition that the successor is actually executed next.
 875   // As we are only interested in non-loop carried constraints here we can
 876   // simply skip loop back edges.
 877
 878   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
 879   ReversePostOrderTraversal<Region *> RTraversal(R);
 880   for (auto *RN : RTraversal) {
 881     // Recurse for affine subregions but go on for basic blocks and non-affine
 882     // subregions.
 883     if (RN->isSubRegion()) {
 884       Region *SubRegion = RN->getNodeAs<Region>();
 885       if (!scop->isNonAffineSubRegion(SubRegion)) {
 886         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
 887           return false;
 888         continue;
 889       }
 890     }
 891
 892     if (containsErrorBlock(RN, scop->getRegion(), LI, DT))
 893       scop->notifyErrorBlock();
 894     ;
 895
 896     BasicBlock *BB = getRegionNodeBasicBlock(RN);
 897     Instruction *TI = BB->getTerminator();
 898
 899     if (isa<UnreachableInst>(TI))
 900       continue;
 901
 902     if (!scop->isDomainDefined(BB))
 903       continue;
 904     isl::set Domain = scop->getDomainConditions(BB);
 905
 906     scop->updateMaxLoopDepth(isl_set_n_dim(Domain.get()));
 907
 908     auto *BBLoop = getRegionNodeLoop(RN, LI);
 909     // Propagate the domain from BB directly to blocks that have a superset
 910     // domain, at the moment only region exit nodes of regions that start in BB.
 911     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
 912                                            InvalidDomainMap);
 913
 914     // If all successors of BB have been set a domain through the propagation
 915     // above we do not need to build condition sets but can just skip this
 916     // block. However, it is important to note that this is a local property
 917     // with regards to the region @p R. To this end FinishedExitBlocks is a
 918     // local variable.
 919     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
 920       return FinishedExitBlocks.count(SuccBB);
 921     };
 922     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
 923       continue;
 924
 925     // Build the condition sets for the successor nodes of the current region
 926     // node. If it is a non-affine subregion we will always execute the single
 927     // exit node, hence the single entry node domain is the condition set. For
 928     // basic blocks we use the helper function buildConditionSets.
 929     SmallVector<isl_set *, 8> ConditionSets;
 930     if (RN->isSubRegion())
 931       ConditionSets.push_back(Domain.copy());
 932     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
 933                                  ConditionSets))
 934       return false;
 935
 936     // Now iterate over the successors and set their initial domain based on
 937     // their condition set. We skip back edges here and have to be careful when
 938     // we leave a loop not to keep constraints over a dimension that doesn't
 939     // exist anymore.
 940     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
 941     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
 942       isl::set CondSet = isl::manage(ConditionSets[u]);
 943       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
 944
 945       // Skip blocks outside the region.
 946       if (!scop->contains(SuccBB))
 947         continue;
 948
 949       // If we propagate the domain of some block to "SuccBB" we do not have to
 950       // adjust the domain.
 951       if (FinishedExitBlocks.count(SuccBB))
 952         continue;
 953
 954       // Skip back edges.
 955       if (DT.dominates(SuccBB, BB))
 956         continue;
 957
 958       Loop *SuccBBLoop =
 959           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
 960
 961       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
 962
 963       // Set the domain for the successor or merge it with an existing domain in
 964       // case there are multiple paths (without loop back edges) to the
 965       // successor block.
 966       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
 967
 968       if (SuccDomain) {
 969         SuccDomain = SuccDomain.unite(CondSet).coalesce();
 970       } else {
 971         // Initialize the invalid domain.
 972         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
 973         SuccDomain = CondSet;
 974       }
 975
 976       SuccDomain = SuccDomain.detect_equalities();
 977
 978       // Check if the maximal number of domain disjunctions was reached.
 979       // In case this happens we will clean up and bail.
 980       if (SuccDomain.n_basic_set() < MaxDisjunctsInDomain)
 981         continue;
 982
 983       scop->invalidate(COMPLEXITY, DebugLoc());
 984       while (++u < ConditionSets.size())
 985         isl_set_free(ConditionSets[u]);
 986       return false;
 987     }
 988   }
 989
 990   return true;
 991 }
 992
 993 bool ScopBuilder::propagateInvalidStmtDomains(
 994     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 995   ReversePostOrderTraversal<Region *> RTraversal(R);
 996   for (auto *RN : RTraversal) {
 997
 998     // Recurse for affine subregions but go on for basic blocks and non-affine
 999     // subregions.
1000     if (RN->isSubRegion()) {
1001       Region *SubRegion = RN->getNodeAs<Region>();
1002       if (!scop->isNonAffineSubRegion(SubRegion)) {
1003         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1004         continue;
1005       }
1006     }
1007
1008     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), LI, DT);
1009     BasicBlock *BB = getRegionNodeBasicBlock(RN);
1010     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1011     assert(Domain && "Cannot propagate a nullptr");
1012
1013     isl::set InvalidDomain = InvalidDomainMap[BB];
1014
1015     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1016
1017     if (!IsInvalidBlock) {
1018       InvalidDomain = InvalidDomain.intersect(Domain);
1019     } else {
1020       InvalidDomain = Domain;
1021       isl::set DomPar = Domain.params();
1022       scop->recordAssumption(ERRORBLOCK, DomPar,
1023                              BB->getTerminator()->getDebugLoc(),
1024                              AS_RESTRICTION);
1025       Domain = isl::set::empty(Domain.get_space());
1026     }
1027
1028     if (InvalidDomain.is_empty()) {
1029       InvalidDomainMap[BB] = InvalidDomain;
1030       continue;
1031     }
1032
1033     auto *BBLoop = getRegionNodeLoop(RN, LI);
1034     auto *TI = BB->getTerminator();
1035     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1036     for (unsigned u = 0; u < NumSuccs; u++) {
1037       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1038
1039       // Skip successors outside the SCoP.
1040       if (!scop->contains(SuccBB))
1041         continue;
1042
1043       // Skip backedges.
1044       if (DT.dominates(SuccBB, BB))
1045         continue;
1046
1047       Loop *SuccBBLoop =
1048           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1049
1050       auto AdjustedInvalidDomain =
1051           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1052
1053       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1054       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1055       SuccInvalidDomain = SuccInvalidDomain.coalesce();
1056
1057       InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1058
1059       // Check if the maximal number of domain disjunctions was reached.
1060       // In case this happens we will bail.
1061       if (SuccInvalidDomain.n_basic_set() < MaxDisjunctsInDomain)
1062         continue;
1063
1064       InvalidDomainMap.erase(BB);
1065       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1066       return false;
1067     }
1068
1069     InvalidDomainMap[BB] = InvalidDomain;
1070   }
1071
1072   return true;
1073 }
1074
1075 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1076                                    Region *NonAffineSubRegion,
1077                                    bool IsExitBlock) {
1078   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1079   // true, are not modeled as ordinary PHI nodes as they are not part of the
1080   // region. However, we model the operands in the predecessor blocks that are
1081   // part of the region as regular scalar accesses.
1082
1083   // If we can synthesize a PHI we can skip it, however only if it is in
1084   // the region. If it is not it can only be in the exit block of the region.
1085   // In this case we model the operands but not the PHI itself.
1086   auto *Scope = LI.getLoopFor(PHI->getParent());
1087   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1088     return;
1089
1090   // PHI nodes are modeled as if they had been demoted prior to the SCoP
1091   // detection. Hence, the PHI is a load of a new memory location in which the
1092   // incoming value was written at the end of the incoming basic block.
1093   bool OnlyNonAffineSubRegionOperands = true;
1094   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1095     Value *Op = PHI->getIncomingValue(u);
1096     BasicBlock *OpBB = PHI->getIncomingBlock(u);
1097     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1098
1099     // Do not build PHI dependences inside a non-affine subregion, but make
1100     // sure that the necessary scalar values are still made available.
1101     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1102       auto *OpInst = dyn_cast<Instruction>(Op);
1103       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1104         ensureValueRead(Op, OpStmt);
1105       continue;
1106     }
1107
1108     OnlyNonAffineSubRegionOperands = false;
1109     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1110   }
1111
1112   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1113     addPHIReadAccess(PHIStmt, PHI);
1114   }
1115 }
1116
1117 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1118                                          Instruction *Inst) {
1119   assert(!isa<PHINode>(Inst));
1120
1121   // Pull-in required operands.
1122   for (Use &Op : Inst->operands())
1123     ensureValueRead(Op.get(), UserStmt);
1124 }
1125
1126 // Create a sequence of two schedules. Either argument may be null and is
1127 // interpreted as the empty schedule. Can also return null if both schedules are
1128 // empty.
1129 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1130   if (!Prev)
1131     return Succ;
1132   if (!Succ)
1133     return Prev;
1134
1135   return Prev.sequence(Succ);
1136 }
1137
1138 // Create an isl_multi_union_aff that defines an identity mapping from the
1139 // elements of USet to their N-th dimension.
1140 //
1141 // # Example:
1142 //
1143 //            Domain: { A[i,j]; B[i,j,k] }
1144 //                 N: 1
1145 //
1146 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1147 //
1148 // @param USet   A union set describing the elements for which to generate a
1149 //               mapping.
1150 // @param N      The dimension to map to.
1151 // @returns      A mapping from USet to its N-th dimension.
1152 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
1153   assert(N >= 0);
1154   assert(USet);
1155   assert(!USet.is_empty());
1156
1157   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1158
1159   for (isl::set S : USet.get_set_list()) {
1160     int Dim = S.dim(isl::dim::set);
1161     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1162                                                   N, Dim - N);
1163     if (N > 1)
1164       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1165
1166     Result = Result.add_pw_multi_aff(PMA);
1167   }
1168
1169   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1170 }
1171
1172 void ScopBuilder::buildSchedule() {
1173   Loop *L = getLoopSurroundingScop(*scop, LI);
1174   LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
1175   buildSchedule(scop->getRegion().getNode(), LoopStack);
1176   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1177   scop->setScheduleTree(LoopStack[0].Schedule);
1178 }
1179
1180 /// To generate a schedule for the elements in a Region we traverse the Region
1181 /// in reverse-post-order and add the contained RegionNodes in traversal order
1182 /// to the schedule of the loop that is currently at the top of the LoopStack.
1183 /// For loop-free codes, this results in a correct sequential ordering.
1184 ///
1185 /// Example:
1186 ///           bb1(0)
1187 ///         /     \.
1188 ///      bb2(1)   bb3(2)
1189 ///         \    /  \.
1190 ///          bb4(3)  bb5(4)
1191 ///             \   /
1192 ///              bb6(5)
1193 ///
1194 /// Including loops requires additional processing. Whenever a loop header is
1195 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1196 /// from an empty schedule, we first process all RegionNodes that are within
1197 /// this loop and complete the sequential schedule at this loop-level before
1198 /// processing about any other nodes. To implement this
1199 /// loop-nodes-first-processing, the reverse post-order traversal is
1200 /// insufficient. Hence, we additionally check if the traversal yields
1201 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1202 /// These region-nodes are then queue and only traverse after the all nodes
1203 /// within the current loop have been processed.
1204 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1205   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1206
1207   ReversePostOrderTraversal<Region *> RTraversal(R);
1208   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1209   std::deque<RegionNode *> DelayList;
1210   bool LastRNWaiting = false;
1211
1212   // Iterate over the region @p R in reverse post-order but queue
1213   // sub-regions/blocks iff they are not part of the last encountered but not
1214   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1215   // that we queued the last sub-region/block from the reverse post-order
1216   // iterator. If it is set we have to explore the next sub-region/block from
1217   // the iterator (if any) to guarantee progress. If it is not set we first try
1218   // the next queued sub-region/blocks.
1219   while (!WorkList.empty() || !DelayList.empty()) {
1220     RegionNode *RN;
1221
1222     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1223       RN = WorkList.front();
1224       WorkList.pop_front();
1225       LastRNWaiting = false;
1226     } else {
1227       RN = DelayList.front();
1228       DelayList.pop_front();
1229     }
1230
1231     Loop *L = getRegionNodeLoop(RN, LI);
1232     if (!scop->contains(L))
1233       L = OuterScopLoop;
1234
1235     Loop *LastLoop = LoopStack.back().L;
1236     if (LastLoop != L) {
1237       if (LastLoop && !LastLoop->contains(L)) {
1238         LastRNWaiting = true;
1239         DelayList.push_back(RN);
1240         continue;
1241       }
1242       LoopStack.push_back({L, nullptr, 0});
1243     }
1244     buildSchedule(RN, LoopStack);
1245   }
1246 }
1247
1248 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1249   if (RN->isSubRegion()) {
1250     auto *LocalRegion = RN->getNodeAs<Region>();
1251     if (!scop->isNonAffineSubRegion(LocalRegion)) {
1252       buildSchedule(LocalRegion, LoopStack);
1253       return;
1254     }
1255   }
1256
1257   assert(LoopStack.rbegin() != LoopStack.rend());
1258   auto LoopData = LoopStack.rbegin();
1259   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1260
1261   for (auto *Stmt : scop->getStmtListFor(RN)) {
1262     isl::union_set UDomain{Stmt->getDomain()};
1263     auto StmtSchedule = isl::schedule::from_domain(UDomain);
1264     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1265   }
1266
1267   // Check if we just processed the last node in this loop. If we did, finalize
1268   // the loop by:
1269   //
1270   //   - adding new schedule dimensions
1271   //   - folding the resulting schedule into the parent loop schedule
1272   //   - dropping the loop schedule from the LoopStack.
1273   //
1274   // Then continue to check surrounding loops, which might also have been
1275   // completed by this node.
1276   size_t Dimension = LoopStack.size();
1277   while (LoopData->L &&
1278          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1279     isl::schedule Schedule = LoopData->Schedule;
1280     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1281
1282     assert(std::next(LoopData) != LoopStack.rend());
1283     ++LoopData;
1284     --Dimension;
1285
1286     if (Schedule) {
1287       isl::union_set Domain = Schedule.get_domain();
1288       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1289       Schedule = Schedule.insert_partial_schedule(MUPA);
1290       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1291     }
1292
1293     LoopData->NumBlocksProcessed += NumBlocksProcessed;
1294   }
1295   // Now pop all loops processed up there from the LoopStack
1296   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1297 }
1298
1299 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1300   // Check for uses of this instruction outside the scop. Because we do not
1301   // iterate over such instructions and therefore did not "ensure" the existence
1302   // of a write, we must determine such use here.
1303   if (scop->isEscaping(Inst))
1304     ensureValueWrite(Inst);
1305 }
1306
1307 /// Check that a value is a Fortran Array descriptor.
1308 ///
1309 /// We check if V has the following structure:
1310 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
1311 ///                                   [<num> x %struct.descriptor_dimension] }
1312 ///
1313 ///
1314 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
1315 ///
1316 /// 1. V's type name starts with "struct.array"
1317 /// 2. V's type has layout as shown.
1318 /// 3. Final member of V's type has name "struct.descriptor_dimension",
1319 /// 4. "struct.descriptor_dimension" has layout as shown.
1320 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
1321 ///
1322 /// We are interested in such types since this is the code that dragonegg
1323 /// generates for Fortran array descriptors.
1324 ///
1325 /// @param V the Value to be checked.
1326 ///
1327 /// @returns True if V is a Fortran array descriptor, False otherwise.
1328 bool isFortranArrayDescriptor(Value *V) {
1329   PointerType *PTy = dyn_cast<PointerType>(V->getType());
1330
1331   if (!PTy)
1332     return false;
1333
1334   Type *Ty = PTy->getElementType();
1335   assert(Ty && "Ty expected to be initialized");
1336   auto *StructArrTy = dyn_cast<StructType>(Ty);
1337
1338   if (!(StructArrTy && StructArrTy->hasName()))
1339     return false;
1340
1341   if (!StructArrTy->getName().startswith("struct.array"))
1342     return false;
1343
1344   if (StructArrTy->getNumElements() != 4)
1345     return false;
1346
1347   const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
1348
1349   // i8* match
1350   if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
1351     return false;
1352
1353   // Get a reference to the int type and check that all the members
1354   // share the same int type
1355   Type *IntTy = ArrMemberTys[1];
1356   if (ArrMemberTys[2] != IntTy)
1357     return false;
1358
1359   // type: [<num> x %struct.descriptor_dimension]
1360   ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
1361   if (!DescriptorDimArrayTy)
1362     return false;
1363
1364   // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
1365   StructType *DescriptorDimTy =
1366       dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
1367
1368   if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
1369     return false;
1370
1371   if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
1372     return false;
1373
1374   if (DescriptorDimTy->getNumElements() != 3)
1375     return false;
1376
1377   for (auto MemberTy : DescriptorDimTy->elements()) {
1378     if (MemberTy != IntTy)
1379       return false;
1380   }
1381
1382   return true;
1383 }
1384
1385 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
1386   // match: 4.1 & 4.2 store/load
1387   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
1388     return nullptr;
1389
1390   // match: 4
1391   if (Inst.getAlignment() != 8)
1392     return nullptr;
1393
1394   Value *Address = Inst.getPointerOperand();
1395
1396   const BitCastInst *Bitcast = nullptr;
1397   // [match: 3]
1398   if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
1399     Value *TypedMem = Slot->getPointerOperand();
1400     // match: 2
1401     Bitcast = dyn_cast<BitCastInst>(TypedMem);
1402   } else {
1403     // match: 2
1404     Bitcast = dyn_cast<BitCastInst>(Address);
1405   }
1406
1407   if (!Bitcast)
1408     return nullptr;
1409
1410   auto *MallocMem = Bitcast->getOperand(0);
1411
1412   // match: 1
1413   auto *MallocCall = dyn_cast<CallInst>(MallocMem);
1414   if (!MallocCall)
1415     return nullptr;
1416
1417   Function *MallocFn = MallocCall->getCalledFunction();
1418   if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
1419     return nullptr;
1420
1421   // Find all uses the malloc'd memory.
1422   // We are looking for a "store" into a struct with the type being the Fortran
1423   // descriptor type
1424   for (auto user : MallocMem->users()) {
1425     /// match: 5
1426     auto *MallocStore = dyn_cast<StoreInst>(user);
1427     if (!MallocStore)
1428       continue;
1429
1430     auto *DescriptorGEP =
1431         dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
1432     if (!DescriptorGEP)
1433       continue;
1434
1435     // match: 5
1436     auto DescriptorType =
1437         dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
1438     if (!(DescriptorType && DescriptorType->hasName()))
1439       continue;
1440
1441     Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
1442
1443     if (!Descriptor)
1444       continue;
1445
1446     if (!isFortranArrayDescriptor(Descriptor))
1447       continue;
1448
1449     return Descriptor;
1450   }
1451
1452   return nullptr;
1453 }
1454
1455 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
1456   // match: 3
1457   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
1458     return nullptr;
1459
1460   Value *Slot = Inst.getPointerOperand();
1461
1462   LoadInst *MemLoad = nullptr;
1463   // [match: 2]
1464   if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
1465     // match: 1
1466     MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
1467   } else {
1468     // match: 1
1469     MemLoad = dyn_cast<LoadInst>(Slot);
1470   }
1471
1472   if (!MemLoad)
1473     return nullptr;
1474
1475   auto *BitcastOperator =
1476       dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
1477   if (!BitcastOperator)
1478     return nullptr;
1479
1480   Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
1481   if (!Descriptor)
1482     return nullptr;
1483
1484   if (!isFortranArrayDescriptor(Descriptor))
1485     return nullptr;
1486
1487   return Descriptor;
1488 }
1489
1490 void ScopBuilder::addRecordedAssumptions() {
1491   for (auto &AS : llvm::reverse(scop->recorded_assumptions())) {
1492
1493     if (!AS.BB) {
1494       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1495                           nullptr /* BasicBlock */);
1496       continue;
1497     }
1498
1499     // If the domain was deleted the assumptions are void.
1500     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1501     if (!Dom)
1502       continue;
1503
1504     // If a basic block was given use its domain to simplify the assumption.
1505     // In case of restrictions we know they only have to hold on the domain,
1506     // thus we can intersect them with the domain of the block. However, for
1507     // assumptions the domain has to imply them, thus:
1508     //                     _              _____
1509     //   Dom => S   <==>   A v B   <==>   A - B
1510     //
1511     // To avoid the complement we will register A - B as a restriction not an
1512     // assumption.
1513     isl_set *S = AS.Set.copy();
1514     if (AS.Sign == AS_RESTRICTION)
1515       S = isl_set_params(isl_set_intersect(S, Dom));
1516     else /* (AS.Sign == AS_ASSUMPTION) */
1517       S = isl_set_params(isl_set_subtract(Dom, S));
1518
1519     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB);
1520   }
1521   scop->clearRecordedAssumptions();
1522 }
1523
1524 void ScopBuilder::addUserAssumptions(
1525     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1526   for (auto &Assumption : AC.assumptions()) {
1527     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1528     if (!CI || CI->getNumArgOperands() != 1)
1529       continue;
1530
1531     bool InScop = scop->contains(CI);
1532     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1533       continue;
1534
1535     auto *L = LI.getLoopFor(CI->getParent());
1536     auto *Val = CI->getArgOperand(0);
1537     ParameterSetTy DetectedParams;
1538     auto &R = scop->getRegion();
1539     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1540       ORE.emit(
1541           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1542           << "Non-affine user assumption ignored.");
1543       continue;
1544     }
1545
1546     // Collect all newly introduced parameters.
1547     ParameterSetTy NewParams;
1548     for (auto *Param : DetectedParams) {
1549       Param = extractConstantFactor(Param, SE).second;
1550       Param = scop->getRepresentingInvariantLoadSCEV(Param);
1551       if (scop->isParam(Param))
1552         continue;
1553       NewParams.insert(Param);
1554     }
1555
1556     SmallVector<isl_set *, 2> ConditionSets;
1557     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1558     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1559     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1560                        : isl_set_copy(scop->getContext().get());
1561     assert(Dom && "Cannot propagate a nullptr.");
1562     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1563                                     ConditionSets);
1564     isl_set_free(Dom);
1565
1566     if (!Valid)
1567       continue;
1568
1569     isl_set *AssumptionCtx = nullptr;
1570     if (InScop) {
1571       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1572       isl_set_free(ConditionSets[0]);
1573     } else {
1574       AssumptionCtx = isl_set_complement(ConditionSets[1]);
1575       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1576     }
1577
1578     // Project out newly introduced parameters as they are not otherwise useful.
1579     if (!NewParams.empty()) {
1580       for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1581         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1582         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1583         isl_id_free(Id);
1584
1585         if (!NewParams.count(Param))
1586           continue;
1587
1588         AssumptionCtx =
1589             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1590       }
1591     }
1592     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1593              << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
1594     isl::set newContext =
1595         scop->getContext().intersect(isl::manage(AssumptionCtx));
1596     scop->setContext(newContext);
1597   }
1598 }
1599
1600 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1601   Value *Val = Inst.getValueOperand();
1602   Type *ElementType = Val->getType();
1603   Value *Address = Inst.getPointerOperand();
1604   const SCEV *AccessFunction =
1605       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1606   const SCEVUnknown *BasePointer =
1607       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1608   enum MemoryAccess::AccessType AccType =
1609       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1610
1611   if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
1612     auto *Src = BitCast->getOperand(0);
1613     auto *SrcTy = Src->getType();
1614     auto *DstTy = BitCast->getType();
1615     // Do not try to delinearize non-sized (opaque) pointers.
1616     if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
1617         (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
1618       return false;
1619     }
1620     if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
1621         DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
1622             DL.getTypeAllocSize(DstTy->getPointerElementType()))
1623       Address = Src;
1624   }
1625
1626   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1627   if (!GEP)
1628     return false;
1629
1630   std::vector<const SCEV *> Subscripts;
1631   std::vector<int> Sizes;
1632   std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
1633   auto *BasePtr = GEP->getOperand(0);
1634
1635   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1636     BasePtr = BasePtrCast->getOperand(0);
1637
1638   // Check for identical base pointers to ensure that we do not miss index
1639   // offsets that have been added before this GEP is applied.
1640   if (BasePtr != BasePointer->getValue())
1641     return false;
1642
1643   std::vector<const SCEV *> SizesSCEV;
1644
1645   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1646
1647   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1648   for (auto *Subscript : Subscripts) {
1649     InvariantLoadsSetTy AccessILS;
1650     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1651                       &AccessILS))
1652       return false;
1653
1654     for (LoadInst *LInst : AccessILS)
1655       if (!ScopRIL.count(LInst))
1656         return false;
1657   }
1658
1659   if (Sizes.empty())
1660     return false;
1661
1662   SizesSCEV.push_back(nullptr);
1663
1664   for (auto V : Sizes)
1665     SizesSCEV.push_back(SE.getSCEV(
1666         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1667
1668   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1669                  true, Subscripts, SizesSCEV, Val);
1670   return true;
1671 }
1672
1673 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1674   if (!PollyDelinearize)
1675     return false;
1676
1677   Value *Address = Inst.getPointerOperand();
1678   Value *Val = Inst.getValueOperand();
1679   Type *ElementType = Val->getType();
1680   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1681   enum MemoryAccess::AccessType AccType =
1682       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1683
1684   const SCEV *AccessFunction =
1685       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1686   const SCEVUnknown *BasePointer =
1687       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1688
1689   assert(BasePointer && "Could not find base pointer");
1690
1691   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1692   auto AccItr = InsnToMemAcc.find(Inst);
1693   if (AccItr == InsnToMemAcc.end())
1694     return false;
1695
1696   std::vector<const SCEV *> Sizes = {nullptr};
1697
1698   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1699                AccItr->second.Shape->DelinearizedSizes.end());
1700
1701   // In case only the element size is contained in the 'Sizes' array, the
1702   // access does not access a real multi-dimensional array. Hence, we allow
1703   // the normal single-dimensional access construction to handle this.
1704   if (Sizes.size() == 1)
1705     return false;
1706
1707   // Remove the element size. This information is already provided by the
1708   // ElementSize parameter. In case the element size of this access and the
1709   // element size used for delinearization differs the delinearization is
1710   // incorrect. Hence, we invalidate the scop.
1711   //
1712   // TODO: Handle delinearization with differing element sizes.
1713   auto DelinearizedSize =
1714       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1715   Sizes.pop_back();
1716   if (ElementSize != DelinearizedSize)
1717     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1718
1719   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1720                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1721   return true;
1722 }
1723
1724 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1725   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1726
1727   if (MemIntr == nullptr)
1728     return false;
1729
1730   auto *L = LI.getLoopFor(Inst->getParent());
1731   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1732   assert(LengthVal);
1733
1734   // Check if the length val is actually affine or if we overapproximate it
1735   InvariantLoadsSetTy AccessILS;
1736   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1737
1738   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1739   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1740                                      LengthVal, SE, &AccessILS);
1741   for (LoadInst *LInst : AccessILS)
1742     if (!ScopRIL.count(LInst))
1743       LengthIsAffine = false;
1744   if (!LengthIsAffine)
1745     LengthVal = nullptr;
1746
1747   auto *DestPtrVal = MemIntr->getDest();
1748   assert(DestPtrVal);
1749
1750   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1751   assert(DestAccFunc);
1752   // Ignore accesses to "NULL".
1753   // TODO: We could use this to optimize the region further, e.g., intersect
1754   //       the context with
1755   //          isl_set_complement(isl_set_params(getDomain()))
1756   //       as we know it would be undefined to execute this instruction anyway.
1757   if (DestAccFunc->isZero())
1758     return true;
1759
1760   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1761   assert(DestPtrSCEV);
1762   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1763   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1764                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
1765                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1766                  Inst.getValueOperand());
1767
1768   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1769   if (!MemTrans)
1770     return true;
1771
1772   auto *SrcPtrVal = MemTrans->getSource();
1773   assert(SrcPtrVal);
1774
1775   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1776   assert(SrcAccFunc);
1777   // Ignore accesses to "NULL".
1778   // TODO: See above TODO
1779   if (SrcAccFunc->isZero())
1780     return true;
1781
1782   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1783   assert(SrcPtrSCEV);
1784   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1785   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1786                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1787                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1788                  Inst.getValueOperand());
1789
1790   return true;
1791 }
1792
1793 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1794   auto *CI = dyn_cast_or_null<CallInst>(Inst);
1795
1796   if (CI == nullptr)
1797     return false;
1798
1799   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1800     return true;
1801
1802   bool ReadOnly = false;
1803   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1804   auto *CalledFunction = CI->getCalledFunction();
1805   switch (AA.getModRefBehavior(CalledFunction)) {
1806   case FMRB_UnknownModRefBehavior:
1807     llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
1808   case FMRB_DoesNotAccessMemory:
1809     return true;
1810   case FMRB_DoesNotReadMemory:
1811   case FMRB_OnlyAccessesInaccessibleMem:
1812   case FMRB_OnlyAccessesInaccessibleOrArgMem:
1813     return false;
1814   case FMRB_OnlyReadsMemory:
1815     GlobalReads.emplace_back(Stmt, CI);
1816     return true;
1817   case FMRB_OnlyReadsArgumentPointees:
1818     ReadOnly = true;
1819     LLVM_FALLTHROUGH;
1820   case FMRB_OnlyAccessesArgumentPointees: {
1821     auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1822     Loop *L = LI.getLoopFor(Inst->getParent());
1823     for (const auto &Arg : CI->arg_operands()) {
1824       if (!Arg->getType()->isPointerTy())
1825         continue;
1826
1827       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1828       if (ArgSCEV->isZero())
1829         continue;
1830
1831       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1832       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1833                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1834     }
1835     return true;
1836   }
1837   }
1838
1839   return true;
1840 }
1841
1842 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1843   Value *Address = Inst.getPointerOperand();
1844   Value *Val = Inst.getValueOperand();
1845   Type *ElementType = Val->getType();
1846   enum MemoryAccess::AccessType AccType =
1847       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1848
1849   const SCEV *AccessFunction =
1850       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1851   const SCEVUnknown *BasePointer =
1852       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1853
1854   assert(BasePointer && "Could not find base pointer");
1855   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1856
1857   // Check if the access depends on a loop contained in a non-affine subregion.
1858   bool isVariantInNonAffineLoop = false;
1859   SetVector<const Loop *> Loops;
1860   findLoops(AccessFunction, Loops);
1861   for (const Loop *L : Loops)
1862     if (Stmt->contains(L)) {
1863       isVariantInNonAffineLoop = true;
1864       break;
1865     }
1866
1867   InvariantLoadsSetTy AccessILS;
1868
1869   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1870   bool IsAffine = !isVariantInNonAffineLoop &&
1871                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
1872                                AccessFunction, SE, &AccessILS);
1873
1874   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1875   for (LoadInst *LInst : AccessILS)
1876     if (!ScopRIL.count(LInst))
1877       IsAffine = false;
1878
1879   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1880     AccType = MemoryAccess::MAY_WRITE;
1881
1882   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1883                  IsAffine, {AccessFunction}, {nullptr}, Val);
1884 }
1885
1886 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1887   if (buildAccessMemIntrinsic(Inst, Stmt))
1888     return;
1889
1890   if (buildAccessCallInst(Inst, Stmt))
1891     return;
1892
1893   if (buildAccessMultiDimFixed(Inst, Stmt))
1894     return;
1895
1896   if (buildAccessMultiDimParam(Inst, Stmt))
1897     return;
1898
1899   buildAccessSingleDim(Inst, Stmt);
1900 }
1901
1902 void ScopBuilder::buildAccessFunctions() {
1903   for (auto &Stmt : *scop) {
1904     if (Stmt.isBlockStmt()) {
1905       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1906       continue;
1907     }
1908
1909     Region *R = Stmt.getRegion();
1910     for (BasicBlock *BB : R->blocks())
1911       buildAccessFunctions(&Stmt, *BB, R);
1912   }
1913
1914   // Build write accesses for values that are used after the SCoP.
1915   // The instructions defining them might be synthesizable and therefore not
1916   // contained in any statement, hence we iterate over the original instructions
1917   // to identify all escaping values.
1918   for (BasicBlock *BB : scop->getRegion().blocks()) {
1919     for (Instruction &Inst : *BB)
1920       buildEscapingDependences(&Inst);
1921   }
1922 }
1923
1924 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1925   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1926          !canSynthesize(Inst, *scop, &SE, L);
1927 }
1928
1929 /// Generate a name for a statement.
1930 ///
1931 /// @param BB     The basic block the statement will represent.
1932 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
1933 /// @param Count  The index of the created statement in @p BB.
1934 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1935 ///               no suffix will be added.
1936 /// @param IsLast Uses a special indicator for the last statement of a BB.
1937 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1938                                 bool IsMain, bool IsLast = false) {
1939   std::string Suffix;
1940   if (!IsMain) {
1941     if (UseInstructionNames)
1942       Suffix = '_';
1943     if (IsLast)
1944       Suffix += "last";
1945     else if (Count < 26)
1946       Suffix += 'a' + Count;
1947     else
1948       Suffix += std::to_string(Count);
1949   }
1950   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1951 }
1952
1953 /// Generate a name for a statement that represents a non-affine subregion.
1954 ///
1955 /// @param R    The region the statement will represent.
1956 /// @param RIdx The index of the @p R relative to other BBs/regions.
1957 static std::string makeStmtName(Region *R, long RIdx) {
1958   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1959                               UseInstructionNames);
1960 }
1961
1962 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1963   Loop *SurroundingLoop = LI.getLoopFor(BB);
1964
1965   int Count = 0;
1966   long BBIdx = scop->getNextStmtIdx();
1967   std::vector<Instruction *> Instructions;
1968   for (Instruction &Inst : *BB) {
1969     if (shouldModelInst(&Inst, SurroundingLoop))
1970       Instructions.push_back(&Inst);
1971     if (Inst.getMetadata("polly_split_after") ||
1972         (SplitOnStore && isa<StoreInst>(Inst))) {
1973       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1974       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1975       Count++;
1976       Instructions.clear();
1977     }
1978   }
1979
1980   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1981   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1982 }
1983
1984 /// Is @p Inst an ordered instruction?
1985 ///
1986 /// An unordered instruction is an instruction, such that a sequence of
1987 /// unordered instructions can be permuted without changing semantics. Any
1988 /// instruction for which this is not always the case is ordered.
1989 static bool isOrderedInstruction(Instruction *Inst) {
1990   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1991 }
1992
1993 /// Join instructions to the same statement if one uses the scalar result of the
1994 /// other.
1995 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1996                             ArrayRef<Instruction *> ModeledInsts) {
1997   for (Instruction *Inst : ModeledInsts) {
1998     if (isa<PHINode>(Inst))
1999       continue;
2000
2001     for (Use &Op : Inst->operands()) {
2002       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
2003       if (!OpInst)
2004         continue;
2005
2006       // Check if OpInst is in the BB and is a modeled instruction.
2007       auto OpVal = UnionFind.findValue(OpInst);
2008       if (OpVal == UnionFind.end())
2009         continue;
2010
2011       UnionFind.unionSets(Inst, OpInst);
2012     }
2013   }
2014 }
2015
2016 /// Ensure that the order of ordered instructions does not change.
2017 ///
2018 /// If we encounter an ordered instruction enclosed in instructions belonging to
2019 /// a different statement (which might as well contain ordered instructions, but
2020 /// this is not tested here), join them.
2021 static void
2022 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
2023                         ArrayRef<Instruction *> ModeledInsts) {
2024   SetVector<Instruction *> SeenLeaders;
2025   for (Instruction *Inst : ModeledInsts) {
2026     if (!isOrderedInstruction(Inst))
2027       continue;
2028
2029     Instruction *Leader = UnionFind.getLeaderValue(Inst);
2030     // Since previous iterations might have merged sets, some items in
2031     // SeenLeaders are not leaders anymore. However, The new leader of
2032     // previously merged instructions must be one of the former leaders of
2033     // these merged instructions.
2034     bool Inserted = SeenLeaders.insert(Leader);
2035     if (Inserted)
2036       continue;
2037
2038     // Merge statements to close holes. Say, we have already seen statements A
2039     // and B, in this order. Then we see an instruction of A again and we would
2040     // see the pattern "A B A". This function joins all statements until the
2041     // only seen occurrence of A.
2042     for (Instruction *Prev : reverse(SeenLeaders)) {
2043       // We are backtracking from the last element until we see Inst's leader
2044       // in SeenLeaders and merge all into one set. Although leaders of
2045       // instructions change during the execution of this loop, it's irrelevant
2046       // as we are just searching for the element that we already confirmed is
2047       // in the list.
2048       if (Prev == Leader)
2049         break;
2050       UnionFind.unionSets(Prev, Leader);
2051     }
2052   }
2053 }
2054
2055 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
2056 /// the incoming values from this block are executed after the PHI READ.
2057 ///
2058 /// Otherwise it could overwrite the incoming value from before the BB with the
2059 /// value for the next execution. This can happen if the PHI WRITE is added to
2060 /// the statement with the instruction that defines the incoming value (instead
2061 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
2062 /// are in order, we put both into the statement. PHI WRITEs are always executed
2063 /// after PHI READs when they are in the same statement.
2064 ///
2065 /// TODO: This is an overpessimization. We only have to ensure that the PHI
2066 /// WRITE is not put into a statement containing the PHI itself. That could also
2067 /// be done by
2068 /// - having all (strongly connected) PHIs in a single statement,
2069 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
2070 ///   has a chance of being lifted before a PHI by being in a statement with a
2071 ///   PHI that comes before in the basic block), or
2072 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
2073 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
2074                             ArrayRef<Instruction *> ModeledInsts) {
2075   for (Instruction *Inst : ModeledInsts) {
2076     PHINode *PHI = dyn_cast<PHINode>(Inst);
2077     if (!PHI)
2078       continue;
2079
2080     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
2081     if (Idx < 0)
2082       continue;
2083
2084     Instruction *IncomingVal =
2085         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
2086     if (!IncomingVal)
2087       continue;
2088
2089     UnionFind.unionSets(PHI, IncomingVal);
2090   }
2091 }
2092
2093 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
2094   Loop *L = LI.getLoopFor(BB);
2095
2096   // Extracting out modeled instructions saves us from checking
2097   // shouldModelInst() repeatedly.
2098   SmallVector<Instruction *, 32> ModeledInsts;
2099   EquivalenceClasses<Instruction *> UnionFind;
2100   Instruction *MainInst = nullptr, *MainLeader = nullptr;
2101   for (Instruction &Inst : *BB) {
2102     if (!shouldModelInst(&Inst, L))
2103       continue;
2104     ModeledInsts.push_back(&Inst);
2105     UnionFind.insert(&Inst);
2106
2107     // When a BB is split into multiple statements, the main statement is the
2108     // one containing the 'main' instruction. We select the first instruction
2109     // that is unlikely to be removed (because it has side-effects) as the main
2110     // one. It is used to ensure that at least one statement from the bb has the
2111     // same name as with -polly-stmt-granularity=bb.
2112     if (!MainInst && (isa<StoreInst>(Inst) ||
2113                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
2114       MainInst = &Inst;
2115   }
2116
2117   joinOperandTree(UnionFind, ModeledInsts);
2118   joinOrderedInstructions(UnionFind, ModeledInsts);
2119   joinOrderedPHIs(UnionFind, ModeledInsts);
2120
2121   // The list of instructions for statement (statement represented by the leader
2122   // instruction).
2123   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
2124
2125   // The order of statements must be preserved w.r.t. their ordered
2126   // instructions. Without this explicit scan, we would also use non-ordered
2127   // instructions (whose order is arbitrary) to determine statement order.
2128   for (Instruction &Inst : *BB) {
2129     if (!isOrderedInstruction(&Inst))
2130       continue;
2131
2132     auto LeaderIt = UnionFind.findLeader(&Inst);
2133     if (LeaderIt == UnionFind.member_end())
2134       continue;
2135
2136     // Insert element for the leader instruction.
2137     (void)LeaderToInstList[*LeaderIt];
2138   }
2139
2140   // Collect the instructions of all leaders. UnionFind's member iterator
2141   // unfortunately are not in any specific order.
2142   for (Instruction &Inst : *BB) {
2143     auto LeaderIt = UnionFind.findLeader(&Inst);
2144     if (LeaderIt == UnionFind.member_end())
2145       continue;
2146
2147     if (&Inst == MainInst)
2148       MainLeader = *LeaderIt;
2149     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2150     InstList.push_back(&Inst);
2151   }
2152
2153   // Finally build the statements.
2154   int Count = 0;
2155   long BBIdx = scop->getNextStmtIdx();
2156   for (auto &Instructions : LeaderToInstList) {
2157     std::vector<Instruction *> &InstList = Instructions.second;
2158
2159     // If there is no main instruction, make the first statement the main.
2160     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2161
2162     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2163     scop->addScopStmt(BB, Name, L, std::move(InstList));
2164     Count += 1;
2165   }
2166
2167   // Unconditionally add an epilogue (last statement). It contains no
2168   // instructions, but holds the PHI write accesses for successor basic blocks,
2169   // if the incoming value is not defined in another statement if the same BB.
2170   // The epilogue becomes the main statement only if there is no other
2171   // statement that could become main.
2172   // The epilogue will be removed if no PHIWrite is added to it.
2173   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2174   scop->addScopStmt(BB, EpilogueName, L, {});
2175 }
2176
2177 void ScopBuilder::buildStmts(Region &SR) {
2178   if (scop->isNonAffineSubRegion(&SR)) {
2179     std::vector<Instruction *> Instructions;
2180     Loop *SurroundingLoop =
2181         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2182     for (Instruction &Inst : *SR.getEntry())
2183       if (shouldModelInst(&Inst, SurroundingLoop))
2184         Instructions.push_back(&Inst);
2185     long RIdx = scop->getNextStmtIdx();
2186     std::string Name = makeStmtName(&SR, RIdx);
2187     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2188     return;
2189   }
2190
2191   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2192     if (I->isSubRegion())
2193       buildStmts(*I->getNodeAs<Region>());
2194     else {
2195       BasicBlock *BB = I->getNodeAs<BasicBlock>();
2196       switch (StmtGranularity) {
2197       case GranularityChoice::BasicBlocks:
2198         buildSequentialBlockStmts(BB);
2199         break;
2200       case GranularityChoice::ScalarIndependence:
2201         buildEqivClassBlockStmts(BB);
2202         break;
2203       case GranularityChoice::Stores:
2204         buildSequentialBlockStmts(BB, true);
2205         break;
2206       }
2207     }
2208 }
2209
2210 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2211                                        Region *NonAffineSubRegion) {
2212   assert(
2213       Stmt &&
2214       "The exit BB is the only one that cannot be represented by a statement");
2215   assert(Stmt->represents(&BB));
2216
2217   // We do not build access functions for error blocks, as they may contain
2218   // instructions we can not model.
2219   if (isErrorBlock(BB, scop->getRegion(), LI, DT))
2220     return;
2221
2222   auto BuildAccessesForInst = [this, Stmt,
2223                                NonAffineSubRegion](Instruction *Inst) {
2224     PHINode *PHI = dyn_cast<PHINode>(Inst);
2225     if (PHI)
2226       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2227
2228     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2229       assert(Stmt && "Cannot build access function in non-existing statement");
2230       buildMemoryAccess(MemInst, Stmt);
2231     }
2232
2233     // PHI nodes have already been modeled above and terminators that are
2234     // not part of a non-affine subregion are fully modeled and regenerated
2235     // from the polyhedral domains. Hence, they do not need to be modeled as
2236     // explicit data dependences.
2237     if (!PHI)
2238       buildScalarDependences(Stmt, Inst);
2239   };
2240
2241   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2242   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2243   if (IsEntryBlock) {
2244     for (Instruction *Inst : Stmt->getInstructions())
2245       BuildAccessesForInst(Inst);
2246     if (Stmt->isRegionStmt())
2247       BuildAccessesForInst(BB.getTerminator());
2248   } else {
2249     for (Instruction &Inst : BB) {
2250       if (isIgnoredIntrinsic(&Inst))
2251         continue;
2252
2253       // Invariant loads already have been processed.
2254       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2255         continue;
2256
2257       BuildAccessesForInst(&Inst);
2258     }
2259   }
2260 }
2261
2262 MemoryAccess *ScopBuilder::addMemoryAccess(
2263     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2264     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2265     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2266     MemoryKind Kind) {
2267   bool isKnownMustAccess = false;
2268
2269   // Accesses in single-basic block statements are always executed.
2270   if (Stmt->isBlockStmt())
2271     isKnownMustAccess = true;
2272
2273   if (Stmt->isRegionStmt()) {
2274     // Accesses that dominate the exit block of a non-affine region are always
2275     // executed. In non-affine regions there may exist MemoryKind::Values that
2276     // do not dominate the exit. MemoryKind::Values will always dominate the
2277     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2278     // non-affine region.
2279     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2280       isKnownMustAccess = true;
2281   }
2282
2283   // Non-affine PHI writes do not "happen" at a particular instruction, but
2284   // after exiting the statement. Therefore they are guaranteed to execute and
2285   // overwrite the old value.
2286   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2287     isKnownMustAccess = true;
2288
2289   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2290     AccType = MemoryAccess::MAY_WRITE;
2291
2292   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2293                                   Affine, Subscripts, Sizes, AccessValue, Kind);
2294
2295   scop->addAccessFunction(Access);
2296   Stmt->addAccess(Access);
2297   return Access;
2298 }
2299
2300 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2301                                  MemoryAccess::AccessType AccType,
2302                                  Value *BaseAddress, Type *ElementType,
2303                                  bool IsAffine,
2304                                  ArrayRef<const SCEV *> Subscripts,
2305                                  ArrayRef<const SCEV *> Sizes,
2306                                  Value *AccessValue) {
2307   ArrayBasePointers.insert(BaseAddress);
2308   auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
2309                                     ElementType, IsAffine, AccessValue,
2310                                     Subscripts, Sizes, MemoryKind::Array);
2311
2312   if (!DetectFortranArrays)
2313     return;
2314
2315   if (Value *FAD = findFADAllocationInvisible(MemAccInst))
2316     MemAccess->setFortranArrayDescriptor(FAD);
2317   else if (Value *FAD = findFADAllocationVisible(MemAccInst))
2318     MemAccess->setFortranArrayDescriptor(FAD);
2319 }
2320
2321 /// Check if @p Expr is divisible by @p Size.
2322 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2323   assert(Size != 0);
2324   if (Size == 1)
2325     return true;
2326
2327   // Only one factor needs to be divisible.
2328   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2329     for (auto *FactorExpr : MulExpr->operands())
2330       if (isDivisible(FactorExpr, Size, SE))
2331         return true;
2332     return false;
2333   }
2334
2335   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2336   // to be divisible.
2337   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2338     for (auto *OpExpr : NAryExpr->operands())
2339       if (!isDivisible(OpExpr, Size, SE))
2340         return false;
2341     return true;
2342   }
2343
2344   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2345   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2346   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2347   return MulSCEV == Expr;
2348 }
2349
2350 void ScopBuilder::foldSizeConstantsToRight() {
2351   isl::union_set Accessed = scop->getAccesses().range();
2352
2353   for (auto Array : scop->arrays()) {
2354     if (Array->getNumberOfDimensions() <= 1)
2355       continue;
2356
2357     isl::space Space = Array->getSpace();
2358     Space = Space.align_params(Accessed.get_space());
2359
2360     if (!Accessed.contains(Space))
2361       continue;
2362
2363     isl::set Elements = Accessed.extract_set(Space);
2364     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2365
2366     std::vector<int> Int;
2367     int Dims = Elements.dim(isl::dim::set);
2368     for (int i = 0; i < Dims; i++) {
2369       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2370       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2371       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2372
2373       isl::basic_set DimHull = DimOnly.affine_hull();
2374
2375       if (i == Dims - 1) {
2376         Int.push_back(1);
2377         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2378         continue;
2379       }
2380
2381       if (DimHull.dim(isl::dim::div) == 1) {
2382         isl::aff Diff = DimHull.get_div(0);
2383         isl::val Val = Diff.get_denominator_val();
2384
2385         int ValInt = 1;
2386         if (Val.is_int()) {
2387           auto ValAPInt = APIntFromVal(Val);
2388           if (ValAPInt.isSignedIntN(32))
2389             ValInt = ValAPInt.getSExtValue();
2390         } else {
2391         }
2392
2393         Int.push_back(ValInt);
2394         isl::constraint C = isl::constraint::alloc_equality(
2395             isl::local_space(Transform.get_space()));
2396         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2397         C = C.set_coefficient_si(isl::dim::in, i, -1);
2398         Transform = Transform.add_constraint(C);
2399         continue;
2400       }
2401
2402       isl::basic_set ZeroSet = isl::basic_set(DimHull);
2403       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2404
2405       int ValInt = 1;
2406       if (ZeroSet.is_equal(DimHull)) {
2407         ValInt = 0;
2408       }
2409
2410       Int.push_back(ValInt);
2411       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2412     }
2413
2414     isl::set MappedElements = isl::map(Transform).domain();
2415     if (!Elements.is_subset(MappedElements))
2416       continue;
2417
2418     bool CanFold = true;
2419     if (Int[0] <= 1)
2420       CanFold = false;
2421
2422     unsigned NumDims = Array->getNumberOfDimensions();
2423     for (unsigned i = 1; i < NumDims - 1; i++)
2424       if (Int[0] != Int[i] && Int[i])
2425         CanFold = false;
2426
2427     if (!CanFold)
2428       continue;
2429
2430     for (auto &Access : scop->access_functions())
2431       if (Access->getScopArrayInfo() == Array)
2432         Access->setAccessRelation(
2433             Access->getAccessRelation().apply_range(Transform));
2434
2435     std::vector<const SCEV *> Sizes;
2436     for (unsigned i = 0; i < NumDims; i++) {
2437       auto Size = Array->getDimensionSize(i);
2438
2439       if (i == NumDims - 1)
2440         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2441       Sizes.push_back(Size);
2442     }
2443
2444     Array->updateSizes(Sizes, false /* CheckConsistency */);
2445   }
2446 }
2447
2448 void ScopBuilder::markFortranArrays() {
2449   for (ScopStmt &Stmt : *scop) {
2450     for (MemoryAccess *MemAcc : Stmt) {
2451       Value *FAD = MemAcc->getFortranArrayDescriptor();
2452       if (!FAD)
2453         continue;
2454
2455       // TODO: const_cast-ing to edit
2456       ScopArrayInfo *SAI =
2457           const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
2458       assert(SAI && "memory access into a Fortran array does not "
2459                     "have an associated ScopArrayInfo");
2460       SAI->applyAndSetFAD(FAD);
2461     }
2462   }
2463 }
2464
2465 void ScopBuilder::finalizeAccesses() {
2466   updateAccessDimensionality();
2467   foldSizeConstantsToRight();
2468   foldAccessRelations();
2469   assumeNoOutOfBounds();
2470   markFortranArrays();
2471 }
2472
2473 void ScopBuilder::updateAccessDimensionality() {
2474   // Check all array accesses for each base pointer and find a (virtual) element
2475   // size for the base pointer that divides all access functions.
2476   for (ScopStmt &Stmt : *scop)
2477     for (MemoryAccess *Access : Stmt) {
2478       if (!Access->isArrayKind())
2479         continue;
2480       ScopArrayInfo *Array =
2481           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2482
2483       if (Array->getNumberOfDimensions() != 1)
2484         continue;
2485       unsigned DivisibleSize = Array->getElemSizeInBytes();
2486       const SCEV *Subscript = Access->getSubscript(0);
2487       while (!isDivisible(Subscript, DivisibleSize, SE))
2488         DivisibleSize /= 2;
2489       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2490       Array->updateElementType(Ty);
2491     }
2492
2493   for (auto &Stmt : *scop)
2494     for (auto &Access : Stmt)
2495       Access->updateDimensionality();
2496 }
2497
2498 void ScopBuilder::foldAccessRelations() {
2499   for (auto &Stmt : *scop)
2500     for (auto &Access : Stmt)
2501       Access->foldAccessRelation();
2502 }
2503
2504 void ScopBuilder::assumeNoOutOfBounds() {
2505   for (auto &Stmt : *scop)
2506     for (auto &Access : Stmt)
2507       Access->assumeNoOutOfBound();
2508 }
2509
2510 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2511   // Find the statement that defines the value of Inst. That statement has to
2512   // write the value to make it available to those statements that read it.
2513   ScopStmt *Stmt = scop->getStmtFor(Inst);
2514
2515   // It is possible that the value is synthesizable within a loop (such that it
2516   // is not part of any statement), but not after the loop (where you need the
2517   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2518   // avoid this. In case the IR has no such PHI, use the last statement (where
2519   // the value is synthesizable) to write the value.
2520   if (!Stmt)
2521     Stmt = scop->getLastStmtFor(Inst->getParent());
2522
2523   // Inst not defined within this SCoP.
2524   if (!Stmt)
2525     return;
2526
2527   // Do not process further if the instruction is already written.
2528   if (Stmt->lookupValueWriteOf(Inst))
2529     return;
2530
2531   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2532                   true, Inst, ArrayRef<const SCEV *>(),
2533                   ArrayRef<const SCEV *>(), MemoryKind::Value);
2534 }
2535
2536 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2537   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2538   // to be able to replace this one. Currently, there is a split responsibility.
2539   // In a first step, the MemoryAccess is created, but without the
2540   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2541   // AccessRelation is created. At least for scalar accesses, there is no new
2542   // information available at ScopStmt::buildAccessRelations(), so we could
2543   // create the AccessRelation right away. This is what
2544   // ScopStmt::ensureValueRead(Value*) does.
2545
2546   auto *Scope = UserStmt->getSurroundingLoop();
2547   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2548   switch (VUse.getKind()) {
2549   case VirtualUse::Constant:
2550   case VirtualUse::Block:
2551   case VirtualUse::Synthesizable:
2552   case VirtualUse::Hoisted:
2553   case VirtualUse::Intra:
2554     // Uses of these kinds do not need a MemoryAccess.
2555     break;
2556
2557   case VirtualUse::ReadOnly:
2558     // Add MemoryAccess for invariant values only if requested.
2559     if (!ModelReadOnlyScalars)
2560       break;
2561
2562     LLVM_FALLTHROUGH;
2563   case VirtualUse::Inter:
2564
2565     // Do not create another MemoryAccess for reloading the value if one already
2566     // exists.
2567     if (UserStmt->lookupValueReadOf(V))
2568       break;
2569
2570     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2571                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2572                     MemoryKind::Value);
2573
2574     // Inter-statement uses need to write the value in their defining statement.
2575     if (VUse.isInter())
2576       ensureValueWrite(cast<Instruction>(V));
2577     break;
2578   }
2579 }
2580
2581 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2582                                  BasicBlock *IncomingBlock,
2583                                  Value *IncomingValue, bool IsExitBlock) {
2584   // As the incoming block might turn out to be an error statement ensure we
2585   // will create an exit PHI SAI object. It is needed during code generation
2586   // and would be created later anyway.
2587   if (IsExitBlock)
2588     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2589                                    MemoryKind::ExitPHI);
2590
2591   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2592   // from outside the SCoP's region have no statement representation.
2593   if (!IncomingStmt)
2594     return;
2595
2596   // Take care for the incoming value being available in the incoming block.
2597   // This must be done before the check for multiple PHI writes because multiple
2598   // exiting edges from subregion each can be the effective written value of the
2599   // subregion. As such, all of them must be made available in the subregion
2600   // statement.
2601   ensureValueRead(IncomingValue, IncomingStmt);
2602
2603   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2604   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2605     assert(Acc->getAccessInstruction() == PHI);
2606     Acc->addIncoming(IncomingBlock, IncomingValue);
2607     return;
2608   }
2609
2610   MemoryAccess *Acc = addMemoryAccess(
2611       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2612       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2613       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2614   assert(Acc);
2615   Acc->addIncoming(IncomingBlock, IncomingValue);
2616 }
2617
2618 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2619   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2620                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2621                   MemoryKind::PHI);
2622 }
2623
2624 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2625   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2626
2627   Stmt.Domain = scop->getDomainConditions(&Stmt);
2628   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2629 }
2630
2631 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2632   isl::set Domain = Stmt.getDomain();
2633   BasicBlock *BB = Stmt.getEntryBlock();
2634
2635   Loop *L = LI.getLoopFor(BB);
2636
2637   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2638     L = L->getParentLoop();
2639
2640   SmallVector<llvm::Loop *, 8> Loops;
2641
2642   while (L && Stmt.getParent()->getRegion().contains(L)) {
2643     Loops.push_back(L);
2644     L = L->getParentLoop();
2645   }
2646
2647   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2648 }
2649
2650 /// Return the reduction type for a given binary operator.
2651 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2652                                                     const Instruction *Load) {
2653   if (!BinOp)
2654     return MemoryAccess::RT_NONE;
2655   switch (BinOp->getOpcode()) {
2656   case Instruction::FAdd:
2657     if (!BinOp->isFast())
2658       return MemoryAccess::RT_NONE;
2659     LLVM_FALLTHROUGH;
2660   case Instruction::Add:
2661     return MemoryAccess::RT_ADD;
2662   case Instruction::Or:
2663     return MemoryAccess::RT_BOR;
2664   case Instruction::Xor:
2665     return MemoryAccess::RT_BXOR;
2666   case Instruction::And:
2667     return MemoryAccess::RT_BAND;
2668   case Instruction::FMul:
2669     if (!BinOp->isFast())
2670       return MemoryAccess::RT_NONE;
2671     LLVM_FALLTHROUGH;
2672   case Instruction::Mul:
2673     if (DisableMultiplicativeReductions)
2674       return MemoryAccess::RT_NONE;
2675     return MemoryAccess::RT_MUL;
2676   default:
2677     return MemoryAccess::RT_NONE;
2678   }
2679 }
2680
2681 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2682   SmallVector<MemoryAccess *, 2> Loads;
2683   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2684
2685   // First collect candidate load-store reduction chains by iterating over all
2686   // stores and collecting possible reduction loads.
2687   for (MemoryAccess *StoreMA : Stmt) {
2688     if (StoreMA->isRead())
2689       continue;
2690
2691     Loads.clear();
2692     collectCandidateReductionLoads(StoreMA, Loads);
2693     for (MemoryAccess *LoadMA : Loads)
2694       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
2695   }
2696
2697   // Then check each possible candidate pair.
2698   for (const auto &CandidatePair : Candidates) {
2699     bool Valid = true;
2700     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
2701     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
2702
2703     // Skip those with obviously unequal base addresses.
2704     if (!LoadAccs.has_equal_space(StoreAccs)) {
2705       continue;
2706     }
2707
2708     // And check if the remaining for overlap with other memory accesses.
2709     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2710     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
2711     isl::set AllAccs = AllAccsRel.range();
2712
2713     for (MemoryAccess *MA : Stmt) {
2714       if (MA == CandidatePair.first || MA == CandidatePair.second)
2715         continue;
2716
2717       isl::map AccRel =
2718           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
2719       isl::set Accs = AccRel.range();
2720
2721       if (AllAccs.has_equal_space(Accs)) {
2722         isl::set OverlapAccs = Accs.intersect(AllAccs);
2723         Valid = Valid && OverlapAccs.is_empty();
2724       }
2725     }
2726
2727     if (!Valid)
2728       continue;
2729
2730     const LoadInst *Load =
2731         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
2732     MemoryAccess::ReductionType RT =
2733         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
2734
2735     // If no overlapping access was found we mark the load and store as
2736     // reduction like.
2737     CandidatePair.first->markAsReductionLike(RT);
2738     CandidatePair.second->markAsReductionLike(RT);
2739   }
2740 }
2741
2742 void ScopBuilder::verifyInvariantLoads() {
2743   auto &RIL = scop->getRequiredInvariantLoads();
2744   for (LoadInst *LI : RIL) {
2745     assert(LI && scop->contains(LI));
2746     // If there exists a statement in the scop which has a memory access for
2747     // @p LI, then mark this scop as infeasible for optimization.
2748     for (ScopStmt &Stmt : *scop)
2749       if (Stmt.getArrayAccessOrNULLFor(LI)) {
2750         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2751         return;
2752       }
2753   }
2754 }
2755
2756 void ScopBuilder::hoistInvariantLoads() {
2757   if (!PollyInvariantLoadHoisting)
2758     return;
2759
2760   isl::union_map Writes = scop->getWrites();
2761   for (ScopStmt &Stmt : *scop) {
2762     InvariantAccessesTy InvariantAccesses;
2763
2764     for (MemoryAccess *Access : Stmt)
2765       if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
2766         InvariantAccesses.push_back({Access, NHCtx});
2767
2768     // Transfer the memory access from the statement to the SCoP.
2769     for (auto InvMA : InvariantAccesses)
2770       Stmt.removeMemoryAccess(InvMA.MA);
2771     addInvariantLoads(Stmt, InvariantAccesses);
2772   }
2773 }
2774
2775 /// Check if an access range is too complex.
2776 ///
2777 /// An access range is too complex, if it contains either many disjuncts or
2778 /// very complex expressions. As a simple heuristic, we assume if a set to
2779 /// be too complex if the sum of existentially quantified dimensions and
2780 /// set dimensions is larger than a threshold. This reliably detects both
2781 /// sets with many disjuncts as well as sets with many divisions as they
2782 /// arise in h264.
2783 ///
2784 /// @param AccessRange The range to check for complexity.
2785 ///
2786 /// @returns True if the access range is too complex.
2787 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2788   int NumTotalDims = 0;
2789
2790   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2791     NumTotalDims += BSet.dim(isl::dim::div);
2792     NumTotalDims += BSet.dim(isl::dim::set);
2793   }
2794
2795   if (NumTotalDims > MaxDimensionsInAccessRange)
2796     return true;
2797
2798   return false;
2799 }
2800
2801 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2802                                                isl::union_map Writes) {
2803   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2804     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2805   }
2806
2807   Value *BaseAddr = MA->getOriginalBaseAddr();
2808   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2809     if (!isa<LoadInst>(BasePtrInst))
2810       return scop->contains(BasePtrInst);
2811
2812   return false;
2813 }
2814
2815 void ScopBuilder::addUserContext() {
2816   if (UserContextStr.empty())
2817     return;
2818
2819   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2820   isl::space Space = scop->getParamSpace();
2821   if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) {
2822     std::string SpaceStr = Space.to_str();
2823     errs() << "Error: the context provided in -polly-context has not the same "
2824            << "number of dimensions than the computed context. Due to this "
2825            << "mismatch, the -polly-context option is ignored. Please provide "
2826            << "the context in the parameter space: " << SpaceStr << ".\n";
2827     return;
2828   }
2829
2830   for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) {
2831     std::string NameContext =
2832         scop->getContext().get_dim_name(isl::dim::param, i);
2833     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2834
2835     if (NameContext != NameUserContext) {
2836       std::string SpaceStr = Space.to_str();
2837       errs() << "Error: the name of dimension " << i
2838              << " provided in -polly-context "
2839              << "is '" << NameUserContext << "', but the name in the computed "
2840              << "context is '" << NameContext
2841              << "'. Due to this name mismatch, "
2842              << "the -polly-context option is ignored. Please provide "
2843              << "the context in the parameter space: " << SpaceStr << ".\n";
2844       return;
2845     }
2846
2847     UserContext = UserContext.set_dim_id(isl::dim::param, i,
2848                                          Space.get_dim_id(isl::dim::param, i));
2849   }
2850   isl::set newContext = scop->getContext().intersect(UserContext);
2851   scop->setContext(newContext);
2852 }
2853
2854 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2855                                          isl::union_map Writes) {
2856   // TODO: Loads that are not loop carried, hence are in a statement with
2857   //       zero iterators, are by construction invariant, though we
2858   //       currently "hoist" them anyway. This is necessary because we allow
2859   //       them to be treated as parameters (e.g., in conditions) and our code
2860   //       generation would otherwise use the old value.
2861
2862   auto &Stmt = *Access->getStatement();
2863   BasicBlock *BB = Stmt.getEntryBlock();
2864
2865   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2866       Access->isMemoryIntrinsic())
2867     return nullptr;
2868
2869   // Skip accesses that have an invariant base pointer which is defined but
2870   // not loaded inside the SCoP. This can happened e.g., if a readnone call
2871   // returns a pointer that is used as a base address. However, as we want
2872   // to hoist indirect pointers, we allow the base pointer to be defined in
2873   // the region if it is also a memory access. Each ScopArrayInfo object
2874   // that has a base pointer origin has a base pointer that is loaded and
2875   // that it is invariant, thus it will be hoisted too. However, if there is
2876   // no base pointer origin we check that the base pointer is defined
2877   // outside the region.
2878   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2879   if (hasNonHoistableBasePtrInScop(Access, Writes))
2880     return nullptr;
2881
2882   isl::map AccessRelation = Access->getAccessRelation();
2883   assert(!AccessRelation.is_empty());
2884
2885   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2886     return nullptr;
2887
2888   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2889   isl::set SafeToLoad;
2890
2891   auto &DL = scop->getFunction().getParent()->getDataLayout();
2892   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2893                                   MaybeAlign(LI->getAlignment()), DL)) {
2894     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2895   } else if (BB != LI->getParent()) {
2896     // Skip accesses in non-affine subregions as they might not be executed
2897     // under the same condition as the entry of the non-affine subregion.
2898     return nullptr;
2899   } else {
2900     SafeToLoad = AccessRelation.range();
2901   }
2902
2903   if (isAccessRangeTooComplex(AccessRelation.range()))
2904     return nullptr;
2905
2906   isl::union_map Written = Writes.intersect_range(SafeToLoad);
2907   isl::set WrittenCtx = Written.params();
2908   bool IsWritten = !WrittenCtx.is_empty();
2909
2910   if (!IsWritten)
2911     return WrittenCtx;
2912
2913   WrittenCtx = WrittenCtx.remove_divs();
2914   bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain;
2915   if (TooComplex || !isRequiredInvariantLoad(LI))
2916     return nullptr;
2917
2918   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2919                       AS_RESTRICTION, LI->getParent());
2920   return WrittenCtx;
2921 }
2922
2923 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2924   for (const llvm::Argument &Arg : F.args())
2925     if (&Arg == maybeParam)
2926       return true;
2927
2928   return false;
2929 }
2930
2931 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2932                                      bool StmtInvalidCtxIsEmpty,
2933                                      bool MAInvalidCtxIsEmpty,
2934                                      bool NonHoistableCtxIsEmpty) {
2935   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2936   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2937   if (PollyAllowDereferenceOfAllFunctionParams &&
2938       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2939     return true;
2940
2941   // TODO: We can provide more information for better but more expensive
2942   //       results.
2943   if (!isDereferenceableAndAlignedPointer(
2944           LInst->getPointerOperand(), LInst->getType(),
2945           MaybeAlign(LInst->getAlignment()), DL))
2946     return false;
2947
2948   // If the location might be overwritten we do not hoist it unconditionally.
2949   //
2950   // TODO: This is probably too conservative.
2951   if (!NonHoistableCtxIsEmpty)
2952     return false;
2953
2954   // If a dereferenceable load is in a statement that is modeled precisely we
2955   // can hoist it.
2956   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2957     return true;
2958
2959   // Even if the statement is not modeled precisely we can hoist the load if it
2960   // does not involve any parameters that might have been specialized by the
2961   // statement domain.
2962   for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
2963     if (!isa<SCEVConstant>(MA->getSubscript(u)))
2964       return false;
2965   return true;
2966 }
2967
2968 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2969                                     InvariantAccessesTy &InvMAs) {
2970   if (InvMAs.empty())
2971     return;
2972
2973   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2974   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2975
2976   // Get the context under which the statement is executed but remove the error
2977   // context under which this statement is reached.
2978   isl::set DomainCtx = Stmt.getDomain().params();
2979   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
2980
2981   if (DomainCtx.n_basic_set() >= MaxDisjunctsInDomain) {
2982     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2983     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
2984     return;
2985   }
2986
2987   // Project out all parameters that relate to loads in the statement. Otherwise
2988   // we could have cyclic dependences on the constraints under which the
2989   // hoisted loads are executed and we could not determine an order in which to
2990   // pre-load them. This happens because not only lower bounds are part of the
2991   // domain but also upper bounds.
2992   for (auto &InvMA : InvMAs) {
2993     auto *MA = InvMA.MA;
2994     Instruction *AccInst = MA->getAccessInstruction();
2995     if (SE.isSCEVable(AccInst->getType())) {
2996       SetVector<Value *> Values;
2997       for (const SCEV *Parameter : scop->parameters()) {
2998         Values.clear();
2999         findValues(Parameter, SE, Values);
3000         if (!Values.count(AccInst))
3001           continue;
3002
3003         if (isl::id ParamId = scop->getIdForParam(Parameter)) {
3004           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3005           if (Dim >= 0)
3006             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3007         }
3008       }
3009     }
3010   }
3011
3012   for (auto &InvMA : InvMAs) {
3013     auto *MA = InvMA.MA;
3014     isl::set NHCtx = InvMA.NonHoistableCtx;
3015
3016     // Check for another invariant access that accesses the same location as
3017     // MA and if found consolidate them. Otherwise create a new equivalence
3018     // class at the end of InvariantEquivClasses.
3019     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3020     Type *Ty = LInst->getType();
3021     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
3022
3023     isl::set MAInvalidCtx = MA->getInvalidContext();
3024     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3025     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3026
3027     isl::set MACtx;
3028     // Check if we know that this pointer can be speculatively accessed.
3029     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3030                            NonHoistableCtxIsEmpty)) {
3031       MACtx = isl::set::universe(DomainCtx.get_space());
3032     } else {
3033       MACtx = DomainCtx;
3034       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3035       MACtx = MACtx.gist_params(scop->getContext());
3036     }
3037
3038     bool Consolidated = false;
3039     for (auto &IAClass : scop->invariantEquivClasses()) {
3040       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3041         continue;
3042
3043       // If the pointer and the type is equal check if the access function wrt.
3044       // to the domain is equal too. It can happen that the domain fixes
3045       // parameter values and these can be different for distinct part of the
3046       // SCoP. If this happens we cannot consolidate the loads but need to
3047       // create a new invariant load equivalence class.
3048       auto &MAs = IAClass.InvariantAccesses;
3049       if (!MAs.empty()) {
3050         auto *LastMA = MAs.front();
3051
3052         isl::set AR = MA->getAccessRelation().range();
3053         isl::set LastAR = LastMA->getAccessRelation().range();
3054         bool SameAR = AR.is_equal(LastAR);
3055
3056         if (!SameAR)
3057           continue;
3058       }
3059
3060       // Add MA to the list of accesses that are in this class.
3061       MAs.push_front(MA);
3062
3063       Consolidated = true;
3064
3065       // Unify the execution context of the class and this statement.
3066       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3067       if (IAClassDomainCtx)
3068         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
3069       else
3070         IAClassDomainCtx = MACtx;
3071       IAClass.ExecutionContext = IAClassDomainCtx;
3072       break;
3073     }
3074
3075     if (Consolidated)
3076       continue;
3077
3078     MACtx = MACtx.coalesce();
3079
3080     // If we did not consolidate MA, thus did not find an equivalence class
3081     // for it, we create a new one.
3082     scop->addInvariantEquivClass(
3083         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3084   }
3085 }
3086
3087 void ScopBuilder::collectCandidateReductionLoads(
3088     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
3089   ScopStmt *Stmt = StoreMA->getStatement();
3090
3091   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
3092   if (!Store)
3093     return;
3094
3095   // Skip if there is not one binary operator between the load and the store
3096   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
3097   if (!BinOp)
3098     return;
3099
3100   // Skip if the binary operators has multiple uses
3101   if (BinOp->getNumUses() != 1)
3102     return;
3103
3104   // Skip if the opcode of the binary operator is not commutative/associative
3105   if (!BinOp->isCommutative() || !BinOp->isAssociative())
3106     return;
3107
3108   // Skip if the binary operator is outside the current SCoP
3109   if (BinOp->getParent() != Store->getParent())
3110     return;
3111
3112   // Skip if it is a multiplicative reduction and we disabled them
3113   if (DisableMultiplicativeReductions &&
3114       (BinOp->getOpcode() == Instruction::Mul ||
3115        BinOp->getOpcode() == Instruction::FMul))
3116     return;
3117
3118   // Check the binary operator operands for a candidate load
3119   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
3120   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
3121   if (!PossibleLoad0 && !PossibleLoad1)
3122     return;
3123
3124   // A load is only a candidate if it cannot escape (thus has only this use)
3125   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
3126     if (PossibleLoad0->getParent() == Store->getParent())
3127       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
3128   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
3129     if (PossibleLoad1->getParent() == Store->getParent())
3130       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
3131 }
3132
3133 /// Find the canonical scop array info object for a set of invariant load
3134 /// hoisted loads. The canonical array is the one that corresponds to the
3135 /// first load in the list of accesses which is used as base pointer of a
3136 /// scop array.
3137 static const ScopArrayInfo *findCanonicalArray(Scop &S,
3138                                                MemoryAccessList &Accesses) {
3139   for (MemoryAccess *Access : Accesses) {
3140     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
3141         Access->getAccessInstruction(), MemoryKind::Array);
3142     if (CanonicalArray)
3143       return CanonicalArray;
3144   }
3145   return nullptr;
3146 }
3147
3148 /// Check if @p Array severs as base array in an invariant load.
3149 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
3150   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
3151     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3152       if (Access2->getScopArrayInfo() == Array)
3153         return true;
3154   return false;
3155 }
3156
3157 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3158 /// with a reference to @p New.
3159 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3160                                  const ScopArrayInfo *New) {
3161   for (ScopStmt &Stmt : S)
3162     for (MemoryAccess *Access : Stmt) {
3163       if (Access->getLatestScopArrayInfo() != Old)
3164         continue;
3165
3166       isl::id Id = New->getBasePtrId();
3167       isl::map Map = Access->getAccessRelation();
3168       Map = Map.set_tuple_id(isl::dim::out, Id);
3169       Access->setAccessRelation(Map);
3170     }
3171 }
3172
3173 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3174   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3175     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3176
3177     const ScopArrayInfo *CanonicalBasePtrSAI =
3178         findCanonicalArray(*scop, BasePtrAccesses);
3179
3180     if (!CanonicalBasePtrSAI)
3181       continue;
3182
3183     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3184       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3185           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3186       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3187           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3188         continue;
3189
3190       // we currently do not canonicalize arrays where some accesses are
3191       // hoisted as invariant loads. If we would, we need to update the access
3192       // function of the invariant loads as well. However, as this is not a
3193       // very common situation, we leave this for now to avoid further
3194       // complexity increases.
3195       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3196         continue;
3197
3198       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3199     }
3200   }
3201 }
3202
3203 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3204   for (MemoryAccess *Access : Stmt.MemAccs) {
3205     Type *ElementType = Access->getElementType();
3206
3207     MemoryKind Ty;
3208     if (Access->isPHIKind())
3209       Ty = MemoryKind::PHI;
3210     else if (Access->isExitPHIKind())
3211       Ty = MemoryKind::ExitPHI;
3212     else if (Access->isValueKind())
3213       Ty = MemoryKind::Value;
3214     else
3215       Ty = MemoryKind::Array;
3216
3217     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3218                                                ElementType, Access->Sizes, Ty);
3219     Access->buildAccessRelation(SAI);
3220     scop->addAccessData(Access);
3221   }
3222 }
3223
3224 /// Add the minimal/maximal access in @p Set to @p User.
3225 ///
3226 /// @return True if more accesses should be added, false if we reached the
3227 ///         maximal number of run-time checks to be generated.
3228 static bool buildMinMaxAccess(isl::set Set,
3229                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3230   isl::pw_multi_aff MinPMA, MaxPMA;
3231   isl::pw_aff LastDimAff;
3232   isl::aff OneAff;
3233   unsigned Pos;
3234
3235   Set = Set.remove_divs();
3236   polly::simplify(Set);
3237
3238   if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts)
3239     Set = Set.simple_hull();
3240
3241   // Restrict the number of parameters involved in the access as the lexmin/
3242   // lexmax computation will take too long if this number is high.
3243   //
3244   // Experiments with a simple test case using an i7 4800MQ:
3245   //
3246   //  #Parameters involved | Time (in sec)
3247   //            6          |     0.01
3248   //            7          |     0.04
3249   //            8          |     0.12
3250   //            9          |     0.40
3251   //           10          |     1.54
3252   //           11          |     6.78
3253   //           12          |    30.38
3254   //
3255   if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
3256     unsigned InvolvedParams = 0;
3257     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3258       if (Set.involves_dims(isl::dim::param, u, 1))
3259         InvolvedParams++;
3260
3261     if (InvolvedParams > RunTimeChecksMaxParameters)
3262       return false;
3263   }
3264
3265   MinPMA = Set.lexmin_pw_multi_aff();
3266   MaxPMA = Set.lexmax_pw_multi_aff();
3267
3268   MinPMA = MinPMA.coalesce();
3269   MaxPMA = MaxPMA.coalesce();
3270
3271   // Adjust the last dimension of the maximal access by one as we want to
3272   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3273   // we test during code generation might now point after the end of the
3274   // allocated array but we will never dereference it anyway.
3275   assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) &&
3276          "Assumed at least one output dimension");
3277
3278   Pos = MaxPMA.dim(isl::dim::out) - 1;
3279   LastDimAff = MaxPMA.get_pw_aff(Pos);
3280   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3281   OneAff = OneAff.add_constant_si(1);
3282   LastDimAff = LastDimAff.add(OneAff);
3283   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3284
3285   if (!MinPMA || !MaxPMA)
3286     return false;
3287
3288   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3289
3290   return true;
3291 }
3292
3293 /// Wrapper function to calculate minimal/maximal accesses to each array.
3294 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3295                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
3296   MinMaxAccesses.reserve(AliasGroup.size());
3297
3298   isl::union_set Domains = scop->getDomains();
3299   isl::union_map Accesses = isl::union_map::empty(scop->getParamSpace());
3300
3301   for (MemoryAccess *MA : AliasGroup)
3302     Accesses = Accesses.add_map(MA->getAccessRelation());
3303
3304   Accesses = Accesses.intersect_domain(Domains);
3305   isl::union_set Locations = Accesses.range();
3306
3307   bool LimitReached = false;
3308   for (isl::set Set : Locations.get_set_list()) {
3309     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3310     if (LimitReached)
3311       break;
3312   }
3313
3314   return !LimitReached;
3315 }
3316
3317 static isl::set getAccessDomain(MemoryAccess *MA) {
3318   isl::set Domain = MA->getStatement()->getDomain();
3319   Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim());
3320   return Domain.reset_tuple_id();
3321 }
3322
3323 bool ScopBuilder::buildAliasChecks() {
3324   if (!PollyUseRuntimeAliasChecks)
3325     return true;
3326
3327   if (buildAliasGroups()) {
3328     // Aliasing assumptions do not go through addAssumption but we still want to
3329     // collect statistics so we do it here explicitly.
3330     if (scop->getAliasGroups().size())
3331       Scop::incrementNumberOfAliasingAssumptions(1);
3332     return true;
3333   }
3334
3335   // If a problem occurs while building the alias groups we need to delete
3336   // this SCoP and pretend it wasn't valid in the first place. To this end
3337   // we make the assumed context infeasible.
3338   scop->invalidate(ALIASING, DebugLoc());
3339
3340   LLVM_DEBUG(
3341       dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3342              << " could not be created as the number of parameters involved "
3343                 "is too high. The SCoP will be "
3344                 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3345                 "the maximal number of parameters but be advised that the "
3346                 "compile time might increase exponentially.\n\n");
3347   return false;
3348 }
3349
3350 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3351 ScopBuilder::buildAliasGroupsForAccesses() {
3352   AliasSetTracker AST(AA);
3353
3354   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3355   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3356   for (ScopStmt &Stmt : *scop) {
3357
3358     isl::set StmtDomain = Stmt.getDomain();
3359     bool StmtDomainEmpty = StmtDomain.is_empty();
3360
3361     // Statements with an empty domain will never be executed.
3362     if (StmtDomainEmpty)
3363       continue;
3364
3365     for (MemoryAccess *MA : Stmt) {
3366       if (MA->isScalarKind())
3367         continue;
3368       if (!MA->isRead())
3369         HasWriteAccess.insert(MA->getScopArrayInfo());
3370       MemAccInst Acc(MA->getAccessInstruction());
3371       if (MA->isRead() && isa<MemTransferInst>(Acc))
3372         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3373       else
3374         PtrToAcc[Acc.getPointerOperand()] = MA;
3375       AST.add(Acc);
3376     }
3377   }
3378
3379   AliasGroupVectorTy AliasGroups;
3380   for (AliasSet &AS : AST) {
3381     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3382       continue;
3383     AliasGroupTy AG;
3384     for (auto &PR : AS)
3385       AG.push_back(PtrToAcc[PR.getValue()]);
3386     if (AG.size() < 2)
3387       continue;
3388     AliasGroups.push_back(std::move(AG));
3389   }
3390
3391   return std::make_tuple(AliasGroups, HasWriteAccess);
3392 }
3393
3394 bool ScopBuilder::buildAliasGroups() {
3395   // To create sound alias checks we perform the following steps:
3396   //   o) We partition each group into read only and non read only accesses.
3397   //   o) For each group with more than one base pointer we then compute minimal
3398   //      and maximal accesses to each array of a group in read only and non
3399   //      read only partitions separately.
3400   AliasGroupVectorTy AliasGroups;
3401   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3402
3403   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3404
3405   splitAliasGroupsByDomain(AliasGroups);
3406
3407   for (AliasGroupTy &AG : AliasGroups) {
3408     if (!scop->hasFeasibleRuntimeContext())
3409       return false;
3410
3411     {
3412       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3413       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3414       if (!Valid)
3415         return false;
3416     }
3417     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3418       scop->invalidate(COMPLEXITY, DebugLoc());
3419       return false;
3420     }
3421   }
3422
3423   return true;
3424 }
3425
3426 bool ScopBuilder::buildAliasGroup(
3427     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3428   AliasGroupTy ReadOnlyAccesses;
3429   AliasGroupTy ReadWriteAccesses;
3430   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3431   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3432
3433   if (AliasGroup.size() < 2)
3434     return true;
3435
3436   for (MemoryAccess *Access : AliasGroup) {
3437     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3438                                         Access->getAccessInstruction())
3439              << "Possibly aliasing pointer, use restrict keyword.");
3440     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3441     if (HasWriteAccess.count(Array)) {
3442       ReadWriteArrays.insert(Array);
3443       ReadWriteAccesses.push_back(Access);
3444     } else {
3445       ReadOnlyArrays.insert(Array);
3446       ReadOnlyAccesses.push_back(Access);
3447     }
3448   }
3449
3450   // If there are no read-only pointers, and less than two read-write pointers,
3451   // no alias check is needed.
3452   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3453     return true;
3454
3455   // If there is no read-write pointer, no alias check is needed.
3456   if (ReadWriteArrays.empty())
3457     return true;
3458
3459   // For non-affine accesses, no alias check can be generated as we cannot
3460   // compute a sufficiently tight lower and upper bound: bail out.
3461   for (MemoryAccess *MA : AliasGroup) {
3462     if (!MA->isAffine()) {
3463       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3464                        MA->getAccessInstruction()->getParent());
3465       return false;
3466     }
3467   }
3468
3469   // Ensure that for all memory accesses for which we generate alias checks,
3470   // their base pointers are available.
3471   for (MemoryAccess *MA : AliasGroup) {
3472     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3473       scop->addRequiredInvariantLoad(
3474           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3475   }
3476
3477   //  scop->getAliasGroups().emplace_back();
3478   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3479   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3480   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3481
3482   bool Valid;
3483
3484   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3485
3486   if (!Valid)
3487     return false;
3488
3489   // Bail out if the number of values we need to compare is too large.
3490   // This is important as the number of comparisons grows quadratically with
3491   // the number of values we need to compare.
3492   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3493       RunTimeChecksMaxArraysPerGroup)
3494     return false;
3495
3496   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3497
3498   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3499   if (!Valid)
3500     return false;
3501
3502   return true;
3503 }
3504
3505 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3506   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3507     AliasGroupTy NewAG;
3508     AliasGroupTy &AG = AliasGroups[u];
3509     AliasGroupTy::iterator AGI = AG.begin();
3510     isl::set AGDomain = getAccessDomain(*AGI);
3511     while (AGI != AG.end()) {
3512       MemoryAccess *MA = *AGI;
3513       isl::set MADomain = getAccessDomain(MA);
3514       if (AGDomain.is_disjoint(MADomain)) {
3515         NewAG.push_back(MA);
3516         AGI = AG.erase(AGI);
3517       } else {
3518         AGDomain = AGDomain.unite(MADomain);
3519         AGI++;
3520       }
3521     }
3522     if (NewAG.size() > 1)
3523       AliasGroups.push_back(std::move(NewAG));
3524   }
3525 }
3526
3527 #ifndef NDEBUG
3528 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3529   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3530   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3531   assert(PhysUse.getKind() == VirtUse.getKind());
3532 }
3533
3534 /// Check the consistency of every statement's MemoryAccesses.
3535 ///
3536 /// The check is carried out by expecting the "physical" kind of use (derived
3537 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3538 /// kind of use (derived from a statement's MemoryAccess).
3539 ///
3540 /// The "physical" uses are taken by ensureValueRead to determine whether to
3541 /// create MemoryAccesses. When done, the kind of scalar access should be the
3542 /// same no matter which way it was derived.
3543 ///
3544 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3545 /// can intentionally influence on the kind of uses (not corresponding to the
3546 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3547 /// to pick up the virtual uses. But here in the code generator, this has not
3548 /// happened yet, such that virtual and physical uses are equivalent.
3549 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3550   for (auto *BB : S->getRegion().blocks()) {
3551     for (auto &Inst : *BB) {
3552       auto *Stmt = S->getStmtFor(&Inst);
3553       if (!Stmt)
3554         continue;
3555
3556       if (isIgnoredIntrinsic(&Inst))
3557         continue;
3558
3559       // Branch conditions are encoded in the statement domains.
3560       if (Inst.isTerminator() && Stmt->isBlockStmt())
3561         continue;
3562
3563       // Verify all uses.
3564       for (auto &Op : Inst.operands())
3565         verifyUse(S, Op, LI);
3566
3567       // Stores do not produce values used by other statements.
3568       if (isa<StoreInst>(Inst))
3569         continue;
3570
3571       // For every value defined in the block, also check that a use of that
3572       // value in the same statement would not be an inter-statement use. It can
3573       // still be synthesizable or load-hoisted, but these kind of instructions
3574       // are not directly copied in code-generation.
3575       auto VirtDef =
3576           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3577       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3578              VirtDef.getKind() == VirtualUse::Intra ||
3579              VirtDef.getKind() == VirtualUse::Hoisted);
3580     }
3581   }
3582
3583   if (S->hasSingleExitEdge())
3584     return;
3585
3586   // PHINodes in the SCoP region's exit block are also uses to be checked.
3587   if (!S->getRegion().isTopLevelRegion()) {
3588     for (auto &Inst : *S->getRegion().getExit()) {
3589       if (!isa<PHINode>(Inst))
3590         break;
3591
3592       for (auto &Op : Inst.operands())
3593         verifyUse(S, Op, LI);
3594     }
3595   }
3596 }
3597 #endif
3598
3599 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3600   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
3601
3602   buildStmts(R);
3603
3604   // Create all invariant load instructions first. These are categorized as
3605   // 'synthesizable', therefore are not part of any ScopStmt but need to be
3606   // created somewhere.
3607   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3608   for (BasicBlock *BB : scop->getRegion().blocks()) {
3609     if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
3610       continue;
3611
3612     for (Instruction &Inst : *BB) {
3613       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3614       if (!Load)
3615         continue;
3616
3617       if (!RIL.count(Load))
3618         continue;
3619
3620       // Invariant loads require a MemoryAccess to be created in some statement.
3621       // It is not important to which statement the MemoryAccess is added
3622       // because it will later be removed from the ScopStmt again. We chose the
3623       // first statement of the basic block the LoadInst is in.
3624       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3625       assert(!List.empty());
3626       ScopStmt *RILStmt = List.front();
3627       buildMemoryAccess(Load, RILStmt);
3628     }
3629   }
3630   buildAccessFunctions();
3631
3632   // In case the region does not have an exiting block we will later (during
3633   // code generation) split the exit block. This will move potential PHI nodes
3634   // from the current exit block into the new region exiting block. Hence, PHI
3635   // nodes that are at this point not part of the region will be.
3636   // To handle these PHI nodes later we will now model their operands as scalar
3637   // accesses. Note that we do not model anything in the exit block if we have
3638   // an exiting block in the region, as there will not be any splitting later.
3639   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3640     for (Instruction &Inst : *R.getExit()) {
3641       PHINode *PHI = dyn_cast<PHINode>(&Inst);
3642       if (!PHI)
3643         break;
3644
3645       buildPHIAccesses(nullptr, PHI, nullptr, true);
3646     }
3647   }
3648
3649   // Create memory accesses for global reads since all arrays are now known.
3650   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3651   for (auto GlobalReadPair : GlobalReads) {
3652     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3653     Instruction *GlobalRead = GlobalReadPair.second;
3654     for (auto *BP : ArrayBasePointers)
3655       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3656                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3657   }
3658
3659   buildInvariantEquivalenceClasses();
3660
3661   /// A map from basic blocks to their invalid domains.
3662   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3663
3664   if (!buildDomains(&R, InvalidDomainMap)) {
3665     LLVM_DEBUG(
3666         dbgs() << "Bailing-out because buildDomains encountered problems\n");
3667     return;
3668   }
3669
3670   addUserAssumptions(AC, InvalidDomainMap);
3671
3672   // Initialize the invalid domain.
3673   for (ScopStmt &Stmt : scop->Stmts)
3674     if (Stmt.isBlockStmt())
3675       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3676     else
3677       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3678           Stmt.getRegion()->getNode())]);
3679
3680   // Remove empty statements.
3681   // Exit early in case there are no executable statements left in this scop.
3682   scop->removeStmtNotInDomainMap();
3683   scop->simplifySCoP(false);
3684   if (scop->isEmpty()) {
3685     LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3686     return;
3687   }
3688
3689   // The ScopStmts now have enough information to initialize themselves.
3690   for (ScopStmt &Stmt : *scop) {
3691     collectSurroundingLoops(Stmt);
3692
3693     buildDomain(Stmt);
3694     buildAccessRelations(Stmt);
3695
3696     if (DetectReductions)
3697       checkForReductions(Stmt);
3698   }
3699
3700   // Check early for a feasible runtime context.
3701   if (!scop->hasFeasibleRuntimeContext()) {
3702     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3703     return;
3704   }
3705
3706   // Check early for profitability. Afterwards it cannot change anymore,
3707   // only the runtime context could become infeasible.
3708   if (!scop->isProfitable(UnprofitableScalarAccs)) {
3709     scop->invalidate(PROFITABLE, DebugLoc());
3710     LLVM_DEBUG(
3711         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3712     return;
3713   }
3714
3715   buildSchedule();
3716
3717   finalizeAccesses();
3718
3719   scop->realignParams();
3720   addUserContext();
3721
3722   // After the context was fully constructed, thus all our knowledge about
3723   // the parameters is in there, we add all recorded assumptions to the
3724   // assumed/invalid context.
3725   addRecordedAssumptions();
3726
3727   scop->simplifyContexts();
3728   if (!buildAliasChecks()) {
3729     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3730     return;
3731   }
3732
3733   hoistInvariantLoads();
3734   canonicalizeDynamicBasePtrs();
3735   verifyInvariantLoads();
3736   scop->simplifySCoP(true);
3737
3738   // Check late for a feasible runtime context because profitability did not
3739   // change.
3740   if (!scop->hasFeasibleRuntimeContext()) {
3741     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3742     return;
3743   }
3744
3745 #ifndef NDEBUG
3746   verifyUses(scop.get(), LI, DT);
3747 #endif
3748 }
3749
3750 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
3751                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3752                          ScopDetection &SD, ScalarEvolution &SE,
3753                          OptimizationRemarkEmitter &ORE)
3754     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3755   DebugLoc Beg, End;
3756   auto P = getBBPairForRegion(R);
3757   getDebugLocations(P, Beg, End);
3758
3759   std::string Msg = "SCoP begins here.";
3760   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3761            << Msg);
3762
3763   buildScop(*R, AC);
3764
3765   LLVM_DEBUG(dbgs() << *scop);
3766
3767   if (!scop->hasFeasibleRuntimeContext()) {
3768     InfeasibleScops++;
3769     Msg = "SCoP ends here but was dismissed.";
3770     LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3771     scop.reset();
3772   } else {
3773     Msg = "SCoP ends here.";
3774     ++ScopFound;
3775     if (scop->getMaxLoopDepth() > 0)
3776       ++RichScopFound;
3777   }
3778
3779   if (R->isTopLevelRegion())
3780     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3781              << Msg);
3782   else
3783     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3784              << Msg);
3785 }