lib/Transform/ForwardOpTree.cpp

   1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Move instructions between statements.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "polly/ForwardOpTree.h"
  15 #include "polly/Options.h"
  16 #include "polly/ScopBuilder.h"
  17 #include "polly/ScopInfo.h"
  18 #include "polly/ScopPass.h"
  19 #include "polly/Support/GICHelper.h"
  20 #include "polly/Support/ISLOStream.h"
  21 #include "polly/Support/ISLTools.h"
  22 #include "polly/Support/VirtualInstruction.h"
  23 #include "polly/ZoneAlgo.h"
  24 #include "llvm/ADT/STLExtras.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/ADT/Statistic.h"
  27 #include "llvm/Analysis/LoopInfo.h"
  28 #include "llvm/Analysis/ValueTracking.h"
  29 #include "llvm/IR/Instruction.h"
  30 #include "llvm/IR/Instructions.h"
  31 #include "llvm/IR/Value.h"
  32 #include "llvm/Pass.h"
  33 #include "llvm/Support/Casting.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/Support/Compiler.h"
  36 #include "llvm/Support/Debug.h"
  37 #include "llvm/Support/ErrorHandling.h"
  38 #include "llvm/Support/raw_ostream.h"
  39 #include "isl/ctx.h"
  40 #include "isl/isl-noexceptions.h"
  41 #include <cassert>
  42 #include <memory>
  43
  44 #define DEBUG_TYPE "polly-optree"
  45
  46 using namespace llvm;
  47 using namespace polly;
  48
  49 static cl::opt<bool>
  50     AnalyzeKnown("polly-optree-analyze-known",
  51                  cl::desc("Analyze array contents for load forwarding"),
  52                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
  53
  54 static cl::opt<bool>
  55     NormalizePHIs("polly-optree-normalize-phi",
  56                   cl::desc("Replace PHIs by their incoming values"),
  57                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
  58
  59 static cl::opt<unsigned>
  60     MaxOps("polly-optree-max-ops",
  61            cl::desc("Maximum number of ISL operations to invest for known "
  62                     "analysis; 0=no limit"),
  63            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
  64
  65 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
  66 STATISTIC(KnownOutOfQuota,
  67           "Analyses aborted because max_operations was reached");
  68
  69 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
  70 STATISTIC(TotalKnownLoadsForwarded,
  71           "Number of forwarded loads because their value was known");
  72 STATISTIC(TotalReloads, "Number of reloaded values");
  73 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
  74 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
  75 STATISTIC(TotalModifiedStmts,
  76           "Number of statements with at least one forwarded tree");
  77
  78 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
  79
  80 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
  81 STATISTIC(NumValueWritesInLoops,
  82           "Number of scalar value writes nested in affine loops after OpTree");
  83 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
  84 STATISTIC(NumPHIWritesInLoops,
  85           "Number of scalar phi writes nested in affine loops after OpTree");
  86 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
  87 STATISTIC(NumSingletonWritesInLoops,
  88           "Number of singleton writes nested in affine loops after OpTree");
  89
  90 namespace {
  91
  92 /// The state of whether an operand tree was/can be forwarded.
  93 ///
  94 /// The items apply to an instructions and its operand tree with the instruction
  95 /// as the root element. If the value in question is not an instruction in the
  96 /// SCoP, it can be a leaf of an instruction's operand tree.
  97 enum ForwardingDecision {
  98   /// The root instruction or value cannot be forwarded at all.
  99   FD_CannotForward,
 100
 101   /// The root instruction or value can be forwarded as a leaf of a larger
 102   /// operand tree.
 103   /// It does not make sense to move the value itself, it would just replace it
 104   /// by a use of itself. For instance, a constant "5" used in a statement can
 105   /// be forwarded, but it would just replace it by the same constant "5".
 106   /// However, it makes sense to move as an operand of
 107   ///
 108   ///   %add = add 5, 5
 109   ///
 110   /// where "5" is moved as part of a larger operand tree. "5" would be placed
 111   /// (disregarding for a moment that literal constants don't have a location
 112   /// and can be used anywhere) into the same statement as %add would.
 113   FD_CanForwardLeaf,
 114
 115   /// The root instruction can be forwarded and doing so avoids a scalar
 116   /// dependency.
 117   ///
 118   /// This can be either because the operand tree can be moved to the target
 119   /// statement, or a memory access is redirected to read from a different
 120   /// location.
 121   FD_CanForwardProfitably,
 122
 123   /// Used to indicate that a forwarding has be carried out successfully, and
 124   /// the forwarded memory access can be deleted.
 125   FD_DidForwardTree,
 126
 127   /// Used to indicate that a forwarding has be carried out successfully, and
 128   /// the forwarded memory access is being reused.
 129   FD_DidForwardLeaf,
 130
 131   /// A forwarding method cannot be applied to the operand tree.
 132   /// The difference to FD_CannotForward is that there might be other methods
 133   /// that can handle it.
 134   /// The conditions that make an operand tree applicable must be checked even
 135   /// with DoIt==true because a method following the one that returned
 136   /// FD_NotApplicable might have returned FD_CanForwardTree.
 137   FD_NotApplicable
 138 };
 139
 140 /// Implementation of operand tree forwarding for a specific SCoP.
 141 ///
 142 /// For a statement that requires a scalar value (through a value read
 143 /// MemoryAccess), see if its operand can be moved into the statement. If so,
 144 /// the MemoryAccess is removed and the all the operand tree instructions are
 145 /// moved into the statement. All original instructions are left in the source
 146 /// statements. The simplification pass can clean these up.
 147 class ForwardOpTreeImpl : ZoneAlgorithm {
 148 private:
 149   /// Scope guard to limit the number of isl operations for this pass.
 150   IslMaxOperationsGuard &MaxOpGuard;
 151
 152   /// How many instructions have been copied to other statements.
 153   int NumInstructionsCopied = 0;
 154
 155   /// Number of loads forwarded because their value was known.
 156   int NumKnownLoadsForwarded = 0;
 157
 158   /// Number of values reloaded from known array elements.
 159   int NumReloads = 0;
 160
 161   /// How many read-only accesses have been copied.
 162   int NumReadOnlyCopied = 0;
 163
 164   /// How many operand trees have been forwarded.
 165   int NumForwardedTrees = 0;
 166
 167   /// Number of statements with at least one forwarded operand tree.
 168   int NumModifiedStmts = 0;
 169
 170   /// Whether we carried out at least one change to the SCoP.
 171   bool Modified = false;
 172
 173   /// Contains the zones where array elements are known to contain a specific
 174   /// value.
 175   /// { [Element[] -> Zone[]] -> ValInst[] }
 176   /// @see computeKnown()
 177   isl::union_map Known;
 178
 179   /// Translator for newly introduced ValInsts to already existing ValInsts such
 180   /// that new introduced load instructions can reuse the Known analysis of its
 181   /// original load. { ValInst[] -> ValInst[] }
 182   isl::union_map Translator;
 183
 184   /// Get list of array elements that do contain the same ValInst[] at Domain[].
 185   ///
 186   /// @param ValInst { Domain[] -> ValInst[] }
 187   ///                The values for which we search for alternative locations,
 188   ///                per statement instance.
 189   ///
 190   /// @return { Domain[] -> Element[] }
 191   ///         For each statement instance, the array elements that contain the
 192   ///         same ValInst.
 193   isl::union_map findSameContentElements(isl::union_map ValInst) {
 194     assert(!ValInst.is_single_valued().is_false());
 195
 196     // { Domain[] }
 197     isl::union_set Domain = ValInst.domain();
 198
 199     // { Domain[] -> Scatter[] }
 200     isl::union_map Schedule = getScatterFor(Domain);
 201
 202     // { Element[] -> [Scatter[] -> ValInst[]] }
 203     isl::union_map MustKnownCurried =
 204         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
 205
 206     // { [Domain[] -> ValInst[]] -> Scatter[] }
 207     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
 208
 209     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
 210     isl::union_map SchedValDomVal =
 211         DomValSched.range_product(ValInst.range_map()).reverse();
 212
 213     // { Element[] -> [Domain[] -> ValInst[]] }
 214     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
 215
 216     // { Domain[] -> Element[] }
 217     isl::union_map MustKnownMap =
 218         MustKnownInst.uncurry().domain().unwrap().reverse();
 219     simplify(MustKnownMap);
 220
 221     return MustKnownMap;
 222   }
 223
 224   /// Find a single array element for each statement instance, within a single
 225   /// array.
 226   ///
 227   /// @param MustKnown { Domain[] -> Element[] }
 228   ///                  Set of candidate array elements.
 229   /// @param Domain    { Domain[] }
 230   ///                  The statement instance for which we need elements for.
 231   ///
 232   /// @return { Domain[] -> Element[] }
 233   ///         For each statement instance, an array element out of @p MustKnown.
 234   ///         All array elements must be in the same array (Polly does not yet
 235   ///         support reading from different accesses using the same
 236   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
 237   ///         null.
 238   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
 239     // { Domain[] -> Element[] }
 240     isl::map Result;
 241
 242     // MemoryAccesses can read only elements from a single array
 243     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
 244     // Look through all spaces until we find one that contains at least the
 245     // wanted statement instance.s
 246     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
 247       // Get the array this is accessing.
 248       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
 249       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
 250
 251       // No support for generation of indirect array accesses.
 252       if (SAI->getBasePtrOriginSAI())
 253         return isl::stat::ok; // continue
 254
 255       // Determine whether this map contains all wanted values.
 256       isl::set MapDom = Map.domain();
 257       if (!Domain.is_subset(MapDom).is_true())
 258         return isl::stat::ok; // continue
 259
 260       // There might be multiple array elements that contain the same value, but
 261       // choose only one of them. lexmin is used because it returns a one-value
 262       // mapping, we do not care about which one.
 263       // TODO: Get the simplest access function.
 264       Result = Map.lexmin();
 265       return isl::stat::error; // break
 266     });
 267
 268     return Result;
 269   }
 270
 271 public:
 272   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
 273       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
 274
 275   /// Compute the zones of known array element contents.
 276   ///
 277   /// @return True if the computed #Known is usable.
 278   bool computeKnownValues() {
 279     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
 280
 281     // Check that nothing strange occurs.
 282     collectCompatibleElts();
 283
 284     {
 285       IslQuotaScope QuotaScope = MaxOpGuard.enter();
 286
 287       computeCommon();
 288       if (NormalizePHIs)
 289         computeNormalizedPHIs();
 290       Known = computeKnown(true, true);
 291
 292       // Preexisting ValInsts use the known content analysis of themselves.
 293       Translator = makeIdentityMap(Known.range(), false);
 294     }
 295
 296     if (!Known || !Translator || !NormalizeMap) {
 297       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
 298       Known = nullptr;
 299       Translator = nullptr;
 300       NormalizeMap = nullptr;
 301       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
 302       return false;
 303     }
 304
 305     KnownAnalyzed++;
 306     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
 307
 308     return true;
 309   }
 310
 311   void printStatistics(raw_ostream &OS, int Indent = 0) {
 312     OS.indent(Indent) << "Statistics {\n";
 313     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
 314                           << '\n';
 315     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
 316                           << '\n';
 317     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
 318     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
 319                           << '\n';
 320     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
 321                           << '\n';
 322     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
 323                           << NumModifiedStmts << '\n';
 324     OS.indent(Indent) << "}\n";
 325   }
 326
 327   void printStatements(raw_ostream &OS, int Indent = 0) const {
 328     OS.indent(Indent) << "After statements {\n";
 329     for (auto &Stmt : *S) {
 330       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
 331       for (auto *MA : Stmt)
 332         MA->print(OS);
 333
 334       OS.indent(Indent + 12);
 335       Stmt.printInstructions(OS);
 336     }
 337     OS.indent(Indent) << "}\n";
 338   }
 339
 340   /// Create a new MemoryAccess of type read and MemoryKind::Array.
 341   ///
 342   /// @param Stmt           The statement in which the access occurs.
 343   /// @param LI             The instruction that does the access.
 344   /// @param AccessRelation The array element that each statement instance
 345   ///                       accesses.
 346   ///
 347   /// @param The newly created access.
 348   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
 349                                     isl::map AccessRelation) {
 350     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
 351     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
 352
 353     // Create a dummy SCEV access, to be replaced anyway.
 354     SmallVector<const SCEV *, 4> Sizes;
 355     Sizes.reserve(SAI->getNumberOfDimensions());
 356     SmallVector<const SCEV *, 4> Subscripts;
 357     Subscripts.reserve(SAI->getNumberOfDimensions());
 358     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
 359       Sizes.push_back(SAI->getDimensionSize(i));
 360       Subscripts.push_back(nullptr);
 361     }
 362
 363     MemoryAccess *Access =
 364         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
 365                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
 366     S->addAccessFunction(Access);
 367     Stmt->addAccess(Access, true);
 368
 369     Access->setNewAccessRelation(AccessRelation);
 370
 371     return Access;
 372   }
 373
 374   /// Forward a load by reading from an array element that contains the same
 375   /// value. Typically the location it was loaded from.
 376   ///
 377   /// @param TargetStmt  The statement the operand tree will be copied to.
 378   /// @param Inst        The (possibly speculatable) instruction to forward.
 379   /// @param UseStmt     The statement that uses @p Inst.
 380   /// @param UseLoop     The loop @p Inst is used in.
 381   /// @param DefStmt     The statement @p Inst is defined in.
 382   /// @param DefLoop     The loop which contains @p Inst.
 383   /// @param DoIt        If false, only determine whether an operand tree can be
 384   ///                    forwarded. If true, carry out the forwarding. Do not
 385   ///                    use DoIt==true if an operand tree is not known to be
 386   ///                    forwardable.
 387   ///
 388   /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
 389   ///                           load.
 390   ///         FD_CannotForward  if the pointer operand cannot be forwarded.
 391   ///         FD_CanForwardProfitably if @p Inst is forwardable.
 392   ///         FD_DidForwardTree if @p DoIt was true.
 393   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
 394                                       ScopStmt *UseStmt, Loop *UseLoop,
 395                                       ScopStmt *DefStmt, Loop *DefLoop,
 396                                       bool DoIt) {
 397     // Cannot do anything without successful known analysis.
 398     if (Known.is_null() || Translator.is_null() ||
 399         MaxOpGuard.hasQuotaExceeded())
 400       return FD_NotApplicable;
 401
 402     LoadInst *LI = dyn_cast<LoadInst>(Inst);
 403     if (!LI)
 404       return FD_NotApplicable;
 405
 406     // If the load is already in the statement, no forwarding is necessary.
 407     // However, it might happen that the LoadInst is already present in the
 408     // statement's instruction list. In that case we do as follows:
 409     // - For the evaluation (DoIt==false), we can trivially forward it as it is
 410     //   benefit of forwarding an already present instruction.
 411     // - For the execution (DoIt==true), prepend the instruction (to make it
 412     //   available to all instructions following in the instruction list), but
 413     //   do not add another MemoryAccess.
 414     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
 415     if (Access && !DoIt)
 416       return FD_CanForwardProfitably;
 417
 418     ForwardingDecision OpDecision = forwardTree(
 419         TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
 420     switch (OpDecision) {
 421     case FD_CannotForward:
 422       assert(!DoIt);
 423       return OpDecision;
 424
 425     case FD_CanForwardLeaf:
 426     case FD_CanForwardProfitably:
 427       assert(!DoIt);
 428       break;
 429
 430     case FD_DidForwardLeaf:
 431     case FD_DidForwardTree:
 432       assert(DoIt);
 433       break;
 434
 435     default:
 436       llvm_unreachable("Shouldn't return this");
 437     }
 438
 439     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
 440
 441     // { DomainDef[] -> ValInst[] }
 442     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
 443     assert(!isNormalized(ExpectedVal).is_false() &&
 444            "LoadInsts are always normalized");
 445
 446     // { DomainUse[] -> DomainTarget[] }
 447     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
 448
 449     // { DomainTarget[] -> ValInst[] }
 450     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
 451     isl::union_map TranslatedExpectedVal =
 452         isl::union_map(TargetExpectedVal).apply_range(Translator);
 453
 454     // { DomainTarget[] -> Element[] }
 455     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
 456
 457     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
 458     if (!SameVal)
 459       return FD_NotApplicable;
 460
 461     if (DoIt)
 462       TargetStmt->prependInstruction(LI);
 463
 464     if (!DoIt)
 465       return FD_CanForwardProfitably;
 466
 467     if (Access) {
 468       LLVM_DEBUG(
 469           dbgs() << "    forwarded known load with preexisting MemoryAccess"
 470                  << Access << "\n");
 471     } else {
 472       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
 473       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
 474                         << Access << "\n");
 475
 476       // { ValInst[] }
 477       isl::space ValInstSpace = ExpectedVal.get_space().range();
 478
 479       // After adding a new load to the SCoP, also update the Known content
 480       // about it. The new load will have a known ValInst of
 481       // { [DomainTarget[] -> Value[]] }
 482       // but which -- because it is a copy of it -- has same value as the
 483       // { [DomainDef[] -> Value[]] }
 484       // that it replicates. Instead of  cloning the known content of
 485       // [DomainDef[] -> Value[]]
 486       // for DomainTarget[], we add a 'translator' that maps
 487       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
 488       // before comparing to the known content.
 489       // TODO: 'Translator' could also be used to map PHINodes to their incoming
 490       // ValInsts.
 491       if (ValInstSpace.is_wrapping()) {
 492         // { DefDomain[] -> Value[] }
 493         isl::map ValInsts = ExpectedVal.range().unwrap();
 494
 495         // { DefDomain[] }
 496         isl::set DefDomain = ValInsts.domain();
 497
 498         // { Value[] }
 499         isl::space ValSpace = ValInstSpace.unwrap().range();
 500
 501         // { Value[] -> Value[] }
 502         isl::map ValToVal =
 503             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
 504
 505         // { DomainDef[] -> DomainTarget[] }
 506         isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
 507
 508         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
 509         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
 510
 511         Translator = Translator.add_map(LocalTranslator);
 512         LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
 513                           << "\n");
 514       }
 515     }
 516     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
 517                       << "\n");
 518     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
 519                       << "\n");
 520     assert(Access);
 521
 522     NumKnownLoadsForwarded++;
 523     TotalKnownLoadsForwarded++;
 524     return FD_DidForwardTree;
 525   }
 526
 527   /// Forward a scalar by redirecting the access to an array element that stores
 528   /// the same value.
 529   ///
 530   /// @param TargetStmt  The statement the operand tree will be copied to.
 531   /// @param Inst        The scalar to forward.
 532   /// @param UseStmt     The statement that uses @p Inst.
 533   /// @param UseLoop     The loop @p Inst is used in.
 534   /// @param DefStmt     The statement @p Inst is defined in.
 535   /// @param DefLoop     The loop which contains @p Inst.
 536   /// @param DoIt        If false, only determine whether an operand tree can be
 537   ///                    forwarded. If true, carry out the forwarding. Do not
 538   ///                    use DoIt==true if an operand tree is not known to be
 539   ///                    forwardable.
 540   ///
 541   /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
 542   ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
 543   ///         FD_CanForwardProfitably if @p Inst has been reloaded.
 544   ///         FD_DidForwardLeaf       if @p DoIt was true.
 545   ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
 546                                         ScopStmt *UseStmt, Loop *UseLoop,
 547                                         ScopStmt *DefStmt, Loop *DefLoop,
 548                                         bool DoIt) {
 549     // Cannot do anything without successful known analysis.
 550     if (Known.is_null() || Translator.is_null() ||
 551         MaxOpGuard.hasQuotaExceeded())
 552       return FD_NotApplicable;
 553
 554     MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
 555     if (Access && Access->isLatestArrayKind()) {
 556       if (DoIt)
 557         return FD_DidForwardLeaf;
 558       return FD_CanForwardLeaf;
 559     }
 560
 561     // Don't spend too much time analyzing whether it can be reloaded. When
 562     // carrying-out the forwarding, we cannot bail-out in the middle of the
 563     // transformation. It also shouldn't take as long because some results are
 564     // cached.
 565     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
 566
 567     // { DomainDef[] -> ValInst[] }
 568     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
 569
 570     // { DomainUse[] -> DomainTarget[] }
 571     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
 572
 573     // { DomainTarget[] -> ValInst[] }
 574     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
 575     isl::union_map TranslatedExpectedVal =
 576         TargetExpectedVal.apply_range(Translator);
 577
 578     // { DomainTarget[] -> Element[] }
 579     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
 580
 581     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
 582     if (!SameVal)
 583       return FD_NotApplicable;
 584
 585     if (!DoIt)
 586       return FD_CanForwardProfitably;
 587
 588     if (!Access)
 589       Access = TargetStmt->ensureValueRead(Inst);
 590
 591     simplify(SameVal);
 592     Access->setNewAccessRelation(SameVal);
 593
 594     TotalReloads++;
 595     NumReloads++;
 596     return FD_DidForwardLeaf;
 597   }
 598
 599   /// Forwards a speculatively executable instruction.
 600   ///
 601   /// @param TargetStmt  The statement the operand tree will be copied to.
 602   /// @param UseInst     The (possibly speculatable) instruction to forward.
 603   /// @param DefStmt     The statement @p UseInst is defined in.
 604   /// @param DefLoop     The loop which contains @p UseInst.
 605   /// @param DoIt        If false, only determine whether an operand tree can be
 606   ///                    forwarded. If true, carry out the forwarding. Do not
 607   ///                    use DoIt==true if an operand tree is not known to be
 608   ///                    forwardable.
 609   ///
 610   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
 611   ///         FD_CannotForward  if one of @p UseInst's operands is not
 612   ///                           forwardable.
 613   ///         FD_CanForwardTree if @p UseInst is forwardable.
 614   ///         FD_DidForward     if @p DoIt was true.
 615   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
 616                                          Instruction *UseInst,
 617                                          ScopStmt *DefStmt, Loop *DefLoop,
 618                                          bool DoIt) {
 619     // PHIs, unless synthesizable, are not yet supported.
 620     if (isa<PHINode>(UseInst))
 621       return FD_NotApplicable;
 622
 623     // Compatible instructions must satisfy the following conditions:
 624     // 1. Idempotent (instruction will be copied, not moved; although its
 625     //    original instance might be removed by simplification)
 626     // 2. Not access memory (There might be memory writes between)
 627     // 3. Not cause undefined behaviour (we might copy to a location when the
 628     //    original instruction was no executed; this is currently not possible
 629     //    because we do not forward PHINodes)
 630     // 4. Not leak memory if executed multiple times (i.e. malloc)
 631     //
 632     // Instruction::mayHaveSideEffects is not sufficient because it considers
 633     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
 634     // not sufficient because it allows memory accesses.
 635     if (mayBeMemoryDependent(*UseInst))
 636       return FD_NotApplicable;
 637
 638     if (DoIt) {
 639       // To ensure the right order, prepend this instruction before its
 640       // operands. This ensures that its operands are inserted before the
 641       // instruction using them.
 642       // TODO: The operand tree is not really a tree, but a DAG. We should be
 643       // able to handle DAGs without duplication.
 644       TargetStmt->prependInstruction(UseInst);
 645       NumInstructionsCopied++;
 646       TotalInstructionsCopied++;
 647     }
 648
 649     for (Value *OpVal : UseInst->operand_values()) {
 650       ForwardingDecision OpDecision =
 651           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
 652       switch (OpDecision) {
 653       case FD_CannotForward:
 654         assert(!DoIt);
 655         return FD_CannotForward;
 656
 657       case FD_CanForwardLeaf:
 658       case FD_CanForwardProfitably:
 659         assert(!DoIt);
 660         break;
 661
 662       case FD_DidForwardLeaf:
 663       case FD_DidForwardTree:
 664         assert(DoIt);
 665         break;
 666
 667       case FD_NotApplicable:
 668         llvm_unreachable("forwardTree should never return FD_NotApplicable");
 669       }
 670     }
 671
 672     if (DoIt)
 673       return FD_DidForwardTree;
 674     return FD_CanForwardProfitably;
 675   }
 676
 677   /// Determines whether an operand tree can be forwarded or carries out a
 678   /// forwarding, depending on the @p DoIt flag.
 679   ///
 680   /// @param TargetStmt  The statement the operand tree will be copied to.
 681   /// @param UseVal      The value (usually an instruction) which is root of an
 682   ///                    operand tree.
 683   /// @param UseStmt     The statement that uses @p UseVal.
 684   /// @param UseLoop     The loop @p UseVal is used in.
 685   /// @param DoIt        If false, only determine whether an operand tree can be
 686   ///                    forwarded. If true, carry out the forwarding. Do not
 687   ///                    use DoIt==true if an operand tree is not known to be
 688   ///                    forwardable.
 689   ///
 690   /// @return If DoIt==false, return whether the operand tree can be forwarded.
 691   ///         If DoIt==true, return FD_DidForward.
 692   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
 693                                  ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
 694     ScopStmt *DefStmt = nullptr;
 695     Loop *DefLoop = nullptr;
 696
 697     // { DefDomain[] -> TargetDomain[] }
 698     isl::map DefToTarget;
 699
 700     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
 701     switch (VUse.getKind()) {
 702     case VirtualUse::Constant:
 703     case VirtualUse::Block:
 704     case VirtualUse::Hoisted:
 705       // These can be used anywhere without special considerations.
 706       if (DoIt)
 707         return FD_DidForwardTree;
 708       return FD_CanForwardLeaf;
 709
 710     case VirtualUse::Synthesizable: {
 711       // ScopExpander will take care for of generating the code at the new
 712       // location.
 713       if (DoIt)
 714         return FD_DidForwardTree;
 715
 716       // Check if the value is synthesizable at the new location as well. This
 717       // might be possible when leaving a loop for which ScalarEvolution is
 718       // unable to derive the exit value for.
 719       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
 720       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
 721       // do not forward past its loop header. This would require us to use a
 722       // previous loop induction variable instead the current one. We currently
 723       // do not allow forwarding PHI nodes, thus this should never occur (the
 724       // only exception where no phi is necessary being an unreachable loop
 725       // without edge from the outside).
 726       VirtualUse TargetUse = VirtualUse::create(
 727           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
 728       if (TargetUse.getKind() == VirtualUse::Synthesizable)
 729         return FD_CanForwardLeaf;
 730
 731       LLVM_DEBUG(
 732           dbgs() << "    Synthesizable would not be synthesizable anymore: "
 733                  << *UseVal << "\n");
 734       return FD_CannotForward;
 735     }
 736
 737     case VirtualUse::ReadOnly:
 738       // Note that we cannot return FD_CanForwardTree here. With a operand tree
 739       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
 740       // With -polly-analyze-read-only-scalars=true we would ensure the
 741       // existence of a MemoryAccess (which already exists for a leaf) and be
 742       // removed again by tryForwardTree because it's goal is to remove this
 743       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
 744       // to do so.
 745       if (!DoIt)
 746         return FD_CanForwardLeaf;
 747
 748       // If we model read-only scalars, we need to create a MemoryAccess for it.
 749       if (ModelReadOnlyScalars)
 750         TargetStmt->ensureValueRead(UseVal);
 751
 752       NumReadOnlyCopied++;
 753       TotalReadOnlyCopied++;
 754       return FD_DidForwardLeaf;
 755
 756     case VirtualUse::Intra:
 757       // Knowing that UseStmt and DefStmt are the same statement instance, just
 758       // reuse the information about UseStmt for DefStmt
 759       DefStmt = UseStmt;
 760
 761       LLVM_FALLTHROUGH;
 762     case VirtualUse::Inter:
 763       Instruction *Inst = cast<Instruction>(UseVal);
 764
 765       if (!DefStmt) {
 766         DefStmt = S->getStmtFor(Inst);
 767         if (!DefStmt)
 768           return FD_CannotForward;
 769       }
 770
 771       DefLoop = LI->getLoopFor(Inst->getParent());
 772
 773       ForwardingDecision SpeculativeResult =
 774           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
 775       if (SpeculativeResult != FD_NotApplicable)
 776         return SpeculativeResult;
 777
 778       ForwardingDecision KnownResult = forwardKnownLoad(
 779           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
 780       if (KnownResult != FD_NotApplicable)
 781         return KnownResult;
 782
 783       ForwardingDecision ReloadResult = reloadKnownContent(
 784           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
 785       if (ReloadResult != FD_NotApplicable)
 786         return ReloadResult;
 787
 788       // When no method is found to forward the operand tree, we effectively
 789       // cannot handle it.
 790       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
 791       return FD_CannotForward;
 792     }
 793
 794     llvm_unreachable("Case unhandled");
 795   }
 796
 797   /// Try to forward an operand tree rooted in @p RA.
 798   bool tryForwardTree(MemoryAccess *RA) {
 799     assert(RA->isLatestScalarKind());
 800     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
 801
 802     ScopStmt *Stmt = RA->getStatement();
 803     Loop *InLoop = Stmt->getSurroundingLoop();
 804
 805     isl::map TargetToUse;
 806     if (!Known.is_null()) {
 807       isl::space DomSpace = Stmt->getDomainSpace();
 808       TargetToUse =
 809           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
 810     }
 811
 812     ForwardingDecision Assessment =
 813         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
 814     assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
 815     if (Assessment != FD_CanForwardProfitably)
 816       return false;
 817
 818     ForwardingDecision Execution =
 819         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
 820     assert(((Execution == FD_DidForwardTree) ||
 821             (Execution == FD_DidForwardLeaf)) &&
 822            "A previous positive assessment must also be executable");
 823
 824     if (Execution == FD_DidForwardTree)
 825       Stmt->removeSingleMemoryAccess(RA);
 826     return true;
 827   }
 828
 829   /// Return which SCoP this instance is processing.
 830   Scop *getScop() const { return S; }
 831
 832   /// Run the algorithm: Use value read accesses as operand tree roots and try
 833   /// to forward them into the statement.
 834   bool forwardOperandTrees() {
 835     for (ScopStmt &Stmt : *S) {
 836       bool StmtModified = false;
 837
 838       // Because we are modifying the MemoryAccess list, collect them first to
 839       // avoid iterator invalidation.
 840       SmallVector<MemoryAccess *, 16> Accs;
 841       for (MemoryAccess *RA : Stmt) {
 842         if (!RA->isRead())
 843           continue;
 844         if (!RA->isLatestScalarKind())
 845           continue;
 846
 847         Accs.push_back(RA);
 848       }
 849
 850       for (MemoryAccess *RA : Accs) {
 851         if (tryForwardTree(RA)) {
 852           Modified = true;
 853           StmtModified = true;
 854           NumForwardedTrees++;
 855           TotalForwardedTrees++;
 856         }
 857       }
 858
 859       if (StmtModified) {
 860         NumModifiedStmts++;
 861         TotalModifiedStmts++;
 862       }
 863     }
 864
 865     if (Modified)
 866       ScopsModified++;
 867     return Modified;
 868   }
 869
 870   /// Print the pass result, performed transformations and the SCoP after the
 871   /// transformation.
 872   void print(raw_ostream &OS, int Indent = 0) {
 873     printStatistics(OS, Indent);
 874
 875     if (!Modified) {
 876       // This line can easily be checked in regression tests.
 877       OS << "ForwardOpTree executed, but did not modify anything\n";
 878       return;
 879     }
 880
 881     printStatements(OS, Indent);
 882   }
 883 };
 884
 885 /// Pass that redirects scalar reads to array elements that are known to contain
 886 /// the same value.
 887 ///
 888 /// This reduces the number of scalar accesses and therefore potentially
 889 /// increases the freedom of the scheduler. In the ideal case, all reads of a
 890 /// scalar definition are redirected (We currently do not care about removing
 891 /// the write in this case).  This is also useful for the main DeLICM pass as
 892 /// there are less scalars to be mapped.
 893 class ForwardOpTree : public ScopPass {
 894 private:
 895   /// The pass implementation, also holding per-scop data.
 896   std::unique_ptr<ForwardOpTreeImpl> Impl;
 897
 898 public:
 899   static char ID;
 900
 901   explicit ForwardOpTree() : ScopPass(ID) {}
 902   ForwardOpTree(const ForwardOpTree &) = delete;
 903   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
 904
 905   void getAnalysisUsage(AnalysisUsage &AU) const override {
 906     AU.addRequiredTransitive<ScopInfoRegionPass>();
 907     AU.addRequired<LoopInfoWrapperPass>();
 908     AU.setPreservesAll();
 909   }
 910
 911   bool runOnScop(Scop &S) override {
 912     // Free resources for previous SCoP's computation, if not yet done.
 913     releaseMemory();
 914
 915     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 916
 917     {
 918       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
 919       Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
 920
 921       if (AnalyzeKnown) {
 922         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
 923         Impl->computeKnownValues();
 924       }
 925
 926       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
 927       Impl->forwardOperandTrees();
 928
 929       if (MaxOpGuard.hasQuotaExceeded()) {
 930         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
 931                              "max_operations exceeded\n");
 932         KnownOutOfQuota++;
 933       }
 934     }
 935
 936     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
 937     LLVM_DEBUG(dbgs() << S);
 938
 939     // Update statistics
 940     auto ScopStats = S.getStatistics();
 941     NumValueWrites += ScopStats.NumValueWrites;
 942     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
 943     NumPHIWrites += ScopStats.NumPHIWrites;
 944     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
 945     NumSingletonWrites += ScopStats.NumSingletonWrites;
 946     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
 947
 948     return false;
 949   }
 950
 951   void printScop(raw_ostream &OS, Scop &S) const override {
 952     if (!Impl)
 953       return;
 954
 955     assert(Impl->getScop() == &S);
 956     Impl->print(OS);
 957   }
 958
 959   void releaseMemory() override { Impl.reset(); }
 960 }; // class ForwardOpTree
 961
 962 char ForwardOpTree::ID;
 963 } // namespace
 964
 965 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
 966
 967 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
 968                       "Polly - Forward operand tree", false, false)
 969 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 970 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
 971                     "Polly - Forward operand tree", false, false)