lib/Transform/ForwardOpTree.cpp

   1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Move instructions between statements.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "polly/ForwardOpTree.h"
  15 #include "polly/Options.h"
  16 #include "polly/ScopBuilder.h"
  17 #include "polly/ScopInfo.h"
  18 #include "polly/ScopPass.h"
  19 #include "polly/Support/GICHelper.h"
  20 #include "polly/Support/ISLOStream.h"
  21 #include "polly/Support/ISLTools.h"
  22 #include "polly/Support/VirtualInstruction.h"
  23 #include "polly/ZoneAlgo.h"
  24 #include "llvm/ADT/STLExtras.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/ADT/Statistic.h"
  27 #include "llvm/Analysis/LoopInfo.h"
  28 #include "llvm/Analysis/ValueTracking.h"
  29 #include "llvm/IR/Instruction.h"
  30 #include "llvm/IR/Instructions.h"
  31 #include "llvm/IR/Value.h"
  32 #include "llvm/Pass.h"
  33 #include "llvm/Support/Casting.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/Support/Compiler.h"
  36 #include "llvm/Support/Debug.h"
  37 #include "llvm/Support/ErrorHandling.h"
  38 #include "llvm/Support/raw_ostream.h"
  39 #include "isl/ctx.h"
  40 #include "isl/isl-noexceptions.h"
  41 #include <cassert>
  42 #include <memory>
  43
  44 #define DEBUG_TYPE "polly-optree"
  45
  46 using namespace llvm;
  47 using namespace polly;
  48
  49 static cl::opt<bool>
  50     AnalyzeKnown("polly-optree-analyze-known",
  51                  cl::desc("Analyze array contents for load forwarding"),
  52                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
  53
  54 static cl::opt<unsigned>
  55     MaxOps("polly-optree-max-ops",
  56            cl::desc("Maximum number of ISL operations to invest for known "
  57                     "analysis; 0=no limit"),
  58            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
  59
  60 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
  61 STATISTIC(KnownOutOfQuota,
  62           "Analyses aborted because max_operations was reached");
  63
  64 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
  65 STATISTIC(TotalKnownLoadsForwarded,
  66           "Number of forwarded loads because their value was known");
  67 STATISTIC(TotalReloads, "Number of reloaded values");
  68 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
  69 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
  70 STATISTIC(TotalModifiedStmts,
  71           "Number of statements with at least one forwarded tree");
  72
  73 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
  74
  75 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
  76 STATISTIC(NumValueWritesInLoops,
  77           "Number of scalar value writes nested in affine loops after OpTree");
  78 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
  79 STATISTIC(NumPHIWritesInLoops,
  80           "Number of scalar phi writes nested in affine loops after OpTree");
  81 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
  82 STATISTIC(NumSingletonWritesInLoops,
  83           "Number of singleton writes nested in affine loops after OpTree");
  84
  85 namespace {
  86
  87 /// The state of whether an operand tree was/can be forwarded.
  88 ///
  89 /// The items apply to an instructions and its operand tree with the instruction
  90 /// as the root element. If the value in question is not an instruction in the
  91 /// SCoP, it can be a leaf of an instruction's operand tree.
  92 enum ForwardingDecision {
  93   /// The root instruction or value cannot be forwarded at all.
  94   FD_CannotForward,
  95
  96   /// The root instruction or value can be forwarded as a leaf of a larger
  97   /// operand tree.
  98   /// It does not make sense to move the value itself, it would just replace it
  99   /// by a use of itself. For instance, a constant "5" used in a statement can
 100   /// be forwarded, but it would just replace it by the same constant "5".
 101   /// However, it makes sense to move as an operand of
 102   ///
 103   ///   %add = add 5, 5
 104   ///
 105   /// where "5" is moved as part of a larger operand tree. "5" would be placed
 106   /// (disregarding for a moment that literal constants don't have a location
 107   /// and can be used anywhere) into the same statement as %add would.
 108   FD_CanForwardLeaf,
 109
 110   /// The root instruction can be forwarded and doing so avoids a scalar
 111   /// dependency.
 112   ///
 113   /// This can be either because the operand tree can be moved to the target
 114   /// statement, or a memory access is redirected to read from a different
 115   /// location.
 116   FD_CanForwardProfitably,
 117
 118   /// Used to indicate that a forwarding has be carried out successfully, and
 119   /// the forwarded memory access can be deleted.
 120   FD_DidForwardTree,
 121
 122   /// Used to indicate that a forwarding has be carried out successfully, and
 123   /// the forwarded memory access is being reused.
 124   FD_DidForwardLeaf,
 125
 126   /// A forwarding method cannot be applied to the operand tree.
 127   /// The difference to FD_CannotForward is that there might be other methods
 128   /// that can handle it.
 129   /// The conditions that make an operand tree applicable must be checked even
 130   /// with DoIt==true because a method following the one that returned
 131   /// FD_NotApplicable might have returned FD_CanForwardTree.
 132   FD_NotApplicable
 133 };
 134
 135 /// Implementation of operand tree forwarding for a specific SCoP.
 136 ///
 137 /// For a statement that requires a scalar value (through a value read
 138 /// MemoryAccess), see if its operand can be moved into the statement. If so,
 139 /// the MemoryAccess is removed and the all the operand tree instructions are
 140 /// moved into the statement. All original instructions are left in the source
 141 /// statements. The simplification pass can clean these up.
 142 class ForwardOpTreeImpl : ZoneAlgorithm {
 143 private:
 144   /// Scope guard to limit the number of isl operations for this pass.
 145   IslMaxOperationsGuard &MaxOpGuard;
 146
 147   /// How many instructions have been copied to other statements.
 148   int NumInstructionsCopied = 0;
 149
 150   /// Number of loads forwarded because their value was known.
 151   int NumKnownLoadsForwarded = 0;
 152
 153   /// Number of values reloaded from known array elements.
 154   int NumReloads = 0;
 155
 156   /// How many read-only accesses have been copied.
 157   int NumReadOnlyCopied = 0;
 158
 159   /// How many operand trees have been forwarded.
 160   int NumForwardedTrees = 0;
 161
 162   /// Number of statements with at least one forwarded operand tree.
 163   int NumModifiedStmts = 0;
 164
 165   /// Whether we carried out at least one change to the SCoP.
 166   bool Modified = false;
 167
 168   /// Contains the zones where array elements are known to contain a specific
 169   /// value.
 170   /// { [Element[] -> Zone[]] -> ValInst[] }
 171   /// @see computeKnown()
 172   isl::union_map Known;
 173
 174   /// Translator for newly introduced ValInsts to already existing ValInsts such
 175   /// that new introduced load instructions can reuse the Known analysis of its
 176   /// original load. { ValInst[] -> ValInst[] }
 177   isl::union_map Translator;
 178
 179   /// Get list of array elements that do contain the same ValInst[] at Domain[].
 180   ///
 181   /// @param ValInst { Domain[] -> ValInst[] }
 182   ///                The values for which we search for alternative locations,
 183   ///                per statement instance.
 184   ///
 185   /// @return { Domain[] -> Element[] }
 186   ///         For each statement instance, the array elements that contain the
 187   ///         same ValInst.
 188   isl::union_map findSameContentElements(isl::union_map ValInst) {
 189     assert(!ValInst.is_single_valued().is_false());
 190
 191     // { Domain[] }
 192     isl::union_set Domain = ValInst.domain();
 193
 194     // { Domain[] -> Scatter[] }
 195     isl::union_map Schedule = getScatterFor(Domain);
 196
 197     // { Element[] -> [Scatter[] -> ValInst[]] }
 198     isl::union_map MustKnownCurried =
 199         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
 200
 201     // { [Domain[] -> ValInst[]] -> Scatter[] }
 202     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
 203
 204     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
 205     isl::union_map SchedValDomVal =
 206         DomValSched.range_product(ValInst.range_map()).reverse();
 207
 208     // { Element[] -> [Domain[] -> ValInst[]] }
 209     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
 210
 211     // { Domain[] -> Element[] }
 212     isl::union_map MustKnownMap =
 213         MustKnownInst.uncurry().domain().unwrap().reverse();
 214     simplify(MustKnownMap);
 215
 216     return MustKnownMap;
 217   }
 218
 219   /// Find a single array element for each statement instance, within a single
 220   /// array.
 221   ///
 222   /// @param MustKnown { Domain[] -> Element[] }
 223   ///                  Set of candidate array elements.
 224   /// @param Domain    { Domain[] }
 225   ///                  The statement instance for which we need elements for.
 226   ///
 227   /// @return { Domain[] -> Element[] }
 228   ///         For each statement instance, an array element out of @p MustKnown.
 229   ///         All array elements must be in the same array (Polly does not yet
 230   ///         support reading from different accesses using the same
 231   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
 232   ///         null.
 233   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
 234     // { Domain[] -> Element[] }
 235     isl::map Result;
 236
 237     // MemoryAccesses can read only elements from a single array
 238     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
 239     // Look through all spaces until we find one that contains at least the
 240     // wanted statement instance.s
 241     MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
 242       // Get the array this is accessing.
 243       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
 244       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
 245
 246       // No support for generation of indirect array accesses.
 247       if (SAI->getBasePtrOriginSAI())
 248         return isl::stat::ok; // continue
 249
 250       // Determine whether this map contains all wanted values.
 251       isl::set MapDom = Map.domain();
 252       if (!Domain.is_subset(MapDom).is_true())
 253         return isl::stat::ok; // continue
 254
 255       // There might be multiple array elements that contain the same value, but
 256       // choose only one of them. lexmin is used because it returns a one-value
 257       // mapping, we do not care about which one.
 258       // TODO: Get the simplest access function.
 259       Result = Map.lexmin();
 260       return isl::stat::error; // break
 261     });
 262
 263     return Result;
 264   }
 265
 266 public:
 267   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
 268       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
 269
 270   /// Compute the zones of known array element contents.
 271   ///
 272   /// @return True if the computed #Known is usable.
 273   bool computeKnownValues() {
 274     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
 275
 276     // Check that nothing strange occurs.
 277     collectCompatibleElts();
 278
 279     {
 280       IslQuotaScope QuotaScope = MaxOpGuard.enter();
 281
 282       computeCommon();
 283       Known = computeKnown(true, true);
 284
 285       // Preexisting ValInsts use the known content analysis of themselves.
 286       Translator = makeIdentityMap(Known.range(), false);
 287     }
 288
 289     if (!Known || !Translator) {
 290       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
 291       Known = nullptr;
 292       Translator = nullptr;
 293       DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
 294       return false;
 295     }
 296
 297     KnownAnalyzed++;
 298     DEBUG(dbgs() << "All known: " << Known << "\n");
 299
 300     return true;
 301   }
 302
 303   void printStatistics(raw_ostream &OS, int Indent = 0) {
 304     OS.indent(Indent) << "Statistics {\n";
 305     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
 306                           << '\n';
 307     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
 308                           << '\n';
 309         OS.indent(Indent + 4) << "Reloads: " << NumReloads<< '\n';
 310     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
 311                           << '\n';
 312     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
 313                           << '\n';
 314     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
 315                           << NumModifiedStmts << '\n';
 316     OS.indent(Indent) << "}\n";
 317   }
 318
 319   void printStatements(raw_ostream &OS, int Indent = 0) const {
 320     OS.indent(Indent) << "After statements {\n";
 321     for (auto &Stmt : *S) {
 322       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
 323       for (auto *MA : Stmt)
 324         MA->print(OS);
 325
 326       OS.indent(Indent + 12);
 327       Stmt.printInstructions(OS);
 328     }
 329     OS.indent(Indent) << "}\n";
 330   }
 331
 332   /// Create a new MemoryAccess of type read and MemoryKind::Array.
 333   ///
 334   /// @param Stmt           The statement in which the access occurs.
 335   /// @param LI             The instruction that does the access.
 336   /// @param AccessRelation The array element that each statement instance
 337   ///                       accesses.
 338   ///
 339   /// @param The newly created access.
 340   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
 341                                     isl::map AccessRelation) {
 342     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
 343     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
 344
 345     // Create a dummy SCEV access, to be replaced anyway.
 346     SmallVector<const SCEV *, 4> Sizes;
 347     Sizes.reserve(SAI->getNumberOfDimensions());
 348     SmallVector<const SCEV *, 4> Subscripts;
 349     Subscripts.reserve(SAI->getNumberOfDimensions());
 350     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
 351       Sizes.push_back(SAI->getDimensionSize(i));
 352       Subscripts.push_back(nullptr);
 353     }
 354
 355     MemoryAccess *Access =
 356         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
 357                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
 358     S->addAccessFunction(Access);
 359     Stmt->addAccess(Access, true);
 360
 361     Access->setNewAccessRelation(AccessRelation);
 362
 363     return Access;
 364   }
 365
 366   /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
 367   /// use in every instance of @p UseStmt.
 368   ///
 369   /// @param UseStmt Statement a scalar is used in.
 370   /// @param DefStmt Statement a scalar is defined in.
 371   ///
 372   /// @return { DomainUse[] -> DomainDef[] }
 373   isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
 374     // { DomainUse[] -> Scatter[] }
 375     isl::map UseScatter = getScatterFor(UseStmt);
 376
 377     // { Zone[] -> DomainDef[] }
 378     isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
 379
 380     // { Scatter[] -> DomainDef[] }
 381     isl::map ReachDefTimepoints =
 382         convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
 383
 384     // { DomainUse[] -> DomainDef[] }
 385     return UseScatter.apply_range(ReachDefTimepoints);
 386   }
 387
 388   /// Forward a load by reading from an array element that contains the same
 389   /// value. Typically the location it was loaded from.
 390   ///
 391   /// @param TargetStmt  The statement the operand tree will be copied to.
 392   /// @param Inst        The (possibly speculatable) instruction to forward.
 393   /// @param UseStmt     The statement that uses @p Inst.
 394   /// @param UseLoop     The loop @p Inst is used in.
 395   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
 396   ///                    A mapping from the statement instance @p Inst is used
 397   ///                    to the statement instance it is forwarded to.
 398   /// @param DefStmt     The statement @p Inst is defined in.
 399   /// @param DefLoop     The loop which contains @p Inst.
 400   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
 401   ///                    A mapping from the statement instance @p Inst is
 402   ///                    defined to the statement instance it is forwarded to.
 403   /// @param DoIt        If false, only determine whether an operand tree can be
 404   ///                    forwarded. If true, carry out the forwarding. Do not
 405   ///                    use DoIt==true if an operand tree is not known to be
 406   ///                    forwardable.
 407   ///
 408   /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
 409   ///                           load.
 410   ///         FD_CannotForward  if the pointer operand cannot be forwarded.
 411   ///         FD_CanForwardProfitably if @p Inst is forwardable.
 412   ///         FD_DidForwardTree if @p DoIt was true.
 413   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
 414                                       ScopStmt *UseStmt, Loop *UseLoop,
 415                                       isl::map UseToTarget, ScopStmt *DefStmt,
 416                                       Loop *DefLoop, isl::map DefToTarget,
 417                                       bool DoIt) {
 418     // Cannot do anything without successful known analysis.
 419     if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
 420         DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
 421       return FD_NotApplicable;
 422
 423     LoadInst *LI = dyn_cast<LoadInst>(Inst);
 424     if (!LI)
 425       return FD_NotApplicable;
 426
 427     // If the load is already in the statement, no forwarding is necessary.
 428     // However, it might happen that the LoadInst is already present in the
 429     // statement's instruction list. In that case we do as follows:
 430     // - For the evaluation (DoIt==false), we can trivially forward it as it is
 431     //   benefit of forwarding an already present instruction.
 432     // - For the execution (DoIt==true), prepend the instruction (to make it
 433     //   available to all instructions following in the instruction list), but
 434     //   do not add another MemoryAccess.
 435     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
 436     if (Access && !DoIt)
 437       return FD_CanForwardProfitably;
 438
 439     ForwardingDecision OpDecision =
 440         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
 441                     DefToTarget, DoIt);
 442     switch (OpDecision) {
 443     case FD_CannotForward:
 444       assert(!DoIt);
 445       return OpDecision;
 446
 447     case FD_CanForwardLeaf:
 448     case FD_CanForwardProfitably:
 449       assert(!DoIt);
 450       break;
 451
 452     case FD_DidForwardLeaf:
 453     case FD_DidForwardTree:
 454       assert(DoIt);
 455       break;
 456
 457     default:
 458       llvm_unreachable("Shouldn't return this");
 459     }
 460
 461     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
 462
 463     // { DomainDef[] -> ValInst[] }
 464     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
 465
 466     // { DomainTarget[] -> ValInst[] }
 467     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
 468     isl::union_map TranslatedExpectedVal =
 469         isl::union_map(TargetExpectedVal).apply_range(Translator);
 470
 471     // { DomainTarget[] -> Element[] }
 472     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
 473
 474     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
 475     if (!SameVal)
 476       return FD_NotApplicable;
 477
 478     if (DoIt)
 479       TargetStmt->prependInstruction(LI);
 480
 481     if (!DoIt)
 482       return FD_CanForwardProfitably;
 483
 484     if (Access) {
 485       DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
 486                    << Access << "\n");
 487     } else {
 488       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
 489       DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
 490                    << "\n");
 491
 492       // { ValInst[] }
 493       isl::space ValInstSpace = ExpectedVal.get_space().range();
 494
 495       // After adding a new load to the SCoP, also update the Known content
 496       // about it. The new load will have a known ValInst of
 497       // { [DomainTarget[] -> Value[]] }
 498       // but which -- because it is a copy of it -- has same value as the
 499       // { [DomainDef[] -> Value[]] }
 500       // that it replicates. Instead of  cloning the known content of
 501       // [DomainDef[] -> Value[]]
 502       // for DomainTarget[], we add a 'translator' that maps
 503       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
 504       // before comparing to the known content.
 505       // TODO: 'Translator' could also be used to map PHINodes to their incoming
 506       // ValInsts.
 507       if (ValInstSpace.is_wrapping()) {
 508         // { DefDomain[] -> Value[] }
 509         isl::map ValInsts = ExpectedVal.range().unwrap();
 510
 511         // { DefDomain[] }
 512         isl::set DefDomain = ValInsts.domain();
 513
 514         // { Value[] }
 515         isl::space ValSpace = ValInstSpace.unwrap().range();
 516
 517         // { Value[] -> Value[] }
 518         isl::map ValToVal =
 519             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
 520
 521         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
 522         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
 523
 524         Translator = Translator.add_map(LocalTranslator);
 525         DEBUG(dbgs() << "      local translator is " << LocalTranslator
 526                      << "\n");
 527       }
 528     }
 529     DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
 530                  << "\n");
 531     DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
 532     assert(Access);
 533
 534     NumKnownLoadsForwarded++;
 535     TotalKnownLoadsForwarded++;
 536     return FD_DidForwardTree;
 537   }
 538
 539   /// Forward a scalar by redirecting the access to an array element that stores
 540   /// the same value.
 541   ///
 542   /// @param TargetStmt  The statement the operand tree will be copied to.
 543   /// @param Inst        The scalar to forward.
 544   /// @param UseStmt     The statement that uses @p Inst.
 545   /// @param UseLoop     The loop @p Inst is used in.
 546   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
 547   ///                    A mapping from the statement instance @p Inst is used
 548   ///                    in, to the statement instance it is forwarded to.
 549   /// @param DefStmt     The statement @p Inst is defined in.
 550   /// @param DefLoop     The loop which contains @p Inst.
 551   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
 552   ///                    A mapping from the statement instance @p Inst is
 553   ///                    defined in, to the statement instance it is forwarded
 554   ///                    to.
 555   /// @param DoIt        If false, only determine whether an operand tree can be
 556   ///                    forwarded. If true, carry out the forwarding. Do not
 557   ///                    use DoIt==true if an operand tree is not known to be
 558   ///                    forwardable.
 559   ///
 560   /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
 561   ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
 562   ///         FD_CanForwardProfitably if @p Inst has been reloaded.
 563   ///         FD_DidForwardLeaf       if @p DoIt was true.
 564   ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
 565                                         ScopStmt *UseStmt, Loop *UseLoop,
 566                                         isl::map UseToTarget, ScopStmt *DefStmt,
 567                                         Loop *DefLoop, isl::map DefToTarget,
 568                                         bool DoIt) {
 569     // Cannot do anything without successful known analysis.
 570     if (Known.is_null())
 571       return FD_NotApplicable;
 572
 573     MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
 574     if (Access && Access->isLatestArrayKind()) {
 575       if (DoIt)
 576         return FD_DidForwardLeaf;
 577       return FD_CanForwardLeaf;
 578     }
 579
 580     // { DomainDef[] -> ValInst[] }
 581     isl::union_map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
 582
 583     // { DomainTarget[] -> ValInst[] }
 584     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
 585     isl::union_map TranslatedExpectedVal =
 586         TargetExpectedVal.apply_range(Translator);
 587
 588     // { DomainTarget[] -> Element[] }
 589     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
 590
 591     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
 592     if (!SameVal)
 593       return FD_NotApplicable;
 594
 595     if (!DoIt)
 596       return FD_CanForwardProfitably;
 597
 598     if (!Access)
 599       Access = TargetStmt->ensureValueRead(Inst);
 600
 601     simplify(SameVal);
 602     Access->setNewAccessRelation(SameVal);
 603
 604     TotalReloads++;
 605     NumReloads++;
 606     return FD_DidForwardLeaf;
 607   }
 608
 609   /// Forwards a speculatively executable instruction.
 610   ///
 611   /// @param TargetStmt  The statement the operand tree will be copied to.
 612   /// @param UseInst     The (possibly speculatable) instruction to forward.
 613   /// @param DefStmt     The statement @p UseInst is defined in.
 614   /// @param DefLoop     The loop which contains @p UseInst.
 615   /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
 616   ///                    A mapping from the statement instance @p UseInst is
 617   ///                    defined to the statement instance it is forwarded to.
 618   /// @param DoIt        If false, only determine whether an operand tree can be
 619   ///                    forwarded. If true, carry out the forwarding. Do not
 620   ///                    use DoIt==true if an operand tree is not known to be
 621   ///                    forwardable.
 622   ///
 623   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
 624   ///         FD_CannotForward  if one of @p UseInst's operands is not
 625   ///                           forwardable.
 626   ///         FD_CanForwardTree if @p UseInst is forwardable.
 627   ///         FD_DidForward     if @p DoIt was true.
 628   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
 629                                          Instruction *UseInst,
 630                                          ScopStmt *DefStmt, Loop *DefLoop,
 631                                          isl::map DefToTarget, bool DoIt) {
 632     // PHIs, unless synthesizable, are not yet supported.
 633     if (isa<PHINode>(UseInst))
 634       return FD_NotApplicable;
 635
 636     // Compatible instructions must satisfy the following conditions:
 637     // 1. Idempotent (instruction will be copied, not moved; although its
 638     //    original instance might be removed by simplification)
 639     // 2. Not access memory (There might be memory writes between)
 640     // 3. Not cause undefined behaviour (we might copy to a location when the
 641     //    original instruction was no executed; this is currently not possible
 642     //    because we do not forward PHINodes)
 643     // 4. Not leak memory if executed multiple times (i.e. malloc)
 644     //
 645     // Instruction::mayHaveSideEffects is not sufficient because it considers
 646     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
 647     // not sufficient because it allows memory accesses.
 648     if (mayBeMemoryDependent(*UseInst))
 649       return FD_NotApplicable;
 650
 651     if (DoIt) {
 652       // To ensure the right order, prepend this instruction before its
 653       // operands. This ensures that its operands are inserted before the
 654       // instruction using them.
 655       // TODO: The operand tree is not really a tree, but a DAG. We should be
 656       // able to handle DAGs without duplication.
 657       TargetStmt->prependInstruction(UseInst);
 658       NumInstructionsCopied++;
 659       TotalInstructionsCopied++;
 660     }
 661
 662     for (Value *OpVal : UseInst->operand_values()) {
 663       ForwardingDecision OpDecision =
 664           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
 665       switch (OpDecision) {
 666       case FD_CannotForward:
 667         assert(!DoIt);
 668         return FD_CannotForward;
 669
 670       case FD_CanForwardLeaf:
 671       case FD_CanForwardProfitably:
 672         assert(!DoIt);
 673         break;
 674
 675       case FD_DidForwardLeaf:
 676       case FD_DidForwardTree:
 677         assert(DoIt);
 678         break;
 679
 680       case FD_NotApplicable:
 681         llvm_unreachable("forwardTree should never return FD_NotApplicable");
 682       }
 683     }
 684
 685     if (DoIt)
 686       return FD_DidForwardTree;
 687     return FD_CanForwardProfitably;
 688   }
 689
 690   /// Determines whether an operand tree can be forwarded or carries out a
 691   /// forwarding, depending on the @p DoIt flag.
 692   ///
 693   /// @param TargetStmt  The statement the operand tree will be copied to.
 694   /// @param UseVal      The value (usually an instruction) which is root of an
 695   ///                    operand tree.
 696   /// @param UseStmt     The statement that uses @p UseVal.
 697   /// @param UseLoop     The loop @p UseVal is used in.
 698   /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
 699   ///                    A mapping from the statement instance @p UseVal is used
 700   ///                    to the statement instance it is forwarded to.
 701   /// @param DoIt        If false, only determine whether an operand tree can be
 702   ///                    forwarded. If true, carry out the forwarding. Do not
 703   ///                    use DoIt==true if an operand tree is not known to be
 704   ///                    forwardable.
 705   ///
 706   /// @return If DoIt==false, return whether the operand tree can be forwarded.
 707   ///         If DoIt==true, return FD_DidForward.
 708   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
 709                                  ScopStmt *UseStmt, Loop *UseLoop,
 710                                  isl::map UseToTarget, bool DoIt) {
 711     ScopStmt *DefStmt = nullptr;
 712     Loop *DefLoop = nullptr;
 713
 714     // { DefDomain[] -> TargetDomain[] }
 715     isl::map DefToTarget;
 716
 717     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
 718     switch (VUse.getKind()) {
 719     case VirtualUse::Constant:
 720     case VirtualUse::Block:
 721     case VirtualUse::Hoisted:
 722       // These can be used anywhere without special considerations.
 723       if (DoIt)
 724         return FD_DidForwardTree;
 725       return FD_CanForwardLeaf;
 726
 727     case VirtualUse::Synthesizable: {
 728       // ScopExpander will take care for of generating the code at the new
 729       // location.
 730       if (DoIt)
 731         return FD_DidForwardTree;
 732
 733       // Check if the value is synthesizable at the new location as well. This
 734       // might be possible when leaving a loop for which ScalarEvolution is
 735       // unable to derive the exit value for.
 736       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
 737       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
 738       // do not forward past its loop header. This would require us to use a
 739       // previous loop induction variable instead the current one. We currently
 740       // do not allow forwarding PHI nodes, thus this should never occur (the
 741       // only exception where no phi is necessary being an unreachable loop
 742       // without edge from the outside).
 743       VirtualUse TargetUse = VirtualUse::create(
 744           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
 745       if (TargetUse.getKind() == VirtualUse::Synthesizable)
 746         return FD_CanForwardLeaf;
 747
 748       DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
 749                    << *UseVal << "\n");
 750       return FD_CannotForward;
 751     }
 752
 753     case VirtualUse::ReadOnly:
 754       // Note that we cannot return FD_CanForwardTree here. With a operand tree
 755       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
 756       // With -polly-analyze-read-only-scalars=true we would ensure the
 757       // existence of a MemoryAccess (which already exists for a leaf) and be
 758       // removed again by tryForwardTree because it's goal is to remove this
 759       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
 760       // to do so.
 761       if (!DoIt)
 762         return FD_CanForwardLeaf;
 763
 764       // If we model read-only scalars, we need to create a MemoryAccess for it.
 765       if (ModelReadOnlyScalars)
 766         TargetStmt->ensureValueRead(UseVal);
 767
 768       NumReadOnlyCopied++;
 769       TotalReadOnlyCopied++;
 770       return FD_DidForwardLeaf;
 771
 772     case VirtualUse::Intra:
 773       // Knowing that UseStmt and DefStmt are the same statement instance, just
 774       // reuse the information about UseStmt for DefStmt
 775       DefStmt = UseStmt;
 776       DefToTarget = UseToTarget;
 777
 778       LLVM_FALLTHROUGH;
 779     case VirtualUse::Inter:
 780       Instruction *Inst = cast<Instruction>(UseVal);
 781
 782       if (!DefStmt) {
 783         DefStmt = S->getStmtFor(Inst);
 784         if (!DefStmt)
 785           return FD_CannotForward;
 786       }
 787
 788       DefLoop = LI->getLoopFor(Inst->getParent());
 789
 790       if (DefToTarget.is_null() && !Known.is_null()) {
 791         IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
 792
 793         // { UseDomain[] -> DefDomain[] }
 794         isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
 795
 796         // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
 797         // { DefDomain[] -> TargetDomain[] }
 798         DefToTarget = UseToTarget.apply_domain(UseToDef);
 799         simplify(DefToTarget);
 800       }
 801
 802       ForwardingDecision SpeculativeResult = forwardSpeculatable(
 803           TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
 804       if (SpeculativeResult != FD_NotApplicable)
 805         return SpeculativeResult;
 806
 807       ForwardingDecision KnownResult =
 808           forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
 809                            DefStmt, DefLoop, DefToTarget, DoIt);
 810       if (KnownResult != FD_NotApplicable)
 811         return KnownResult;
 812
 813       ForwardingDecision ReloadResult =
 814           reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
 815                              DefStmt, DefLoop, DefToTarget, DoIt);
 816       if (ReloadResult != FD_NotApplicable)
 817         return ReloadResult;
 818
 819       // When no method is found to forward the operand tree, we effectively
 820       // cannot handle it.
 821       DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
 822       return FD_CannotForward;
 823     }
 824
 825     llvm_unreachable("Case unhandled");
 826   }
 827
 828   /// Try to forward an operand tree rooted in @p RA.
 829   bool tryForwardTree(MemoryAccess *RA) {
 830     assert(RA->isLatestScalarKind());
 831     DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
 832
 833     ScopStmt *Stmt = RA->getStatement();
 834     Loop *InLoop = Stmt->getSurroundingLoop();
 835
 836     isl::map TargetToUse;
 837     if (!Known.is_null()) {
 838       isl::space DomSpace = Stmt->getDomainSpace();
 839       TargetToUse =
 840           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
 841     }
 842
 843     ForwardingDecision Assessment = forwardTree(
 844         Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
 845     assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
 846     if (Assessment != FD_CanForwardProfitably)
 847       return false;
 848
 849     ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
 850                                                InLoop, TargetToUse, true);
 851     assert(((Execution == FD_DidForwardTree) ||
 852             (Execution == FD_DidForwardLeaf)) &&
 853            "A previous positive assessment must also be executable");
 854
 855     if (Execution == FD_DidForwardTree)
 856       Stmt->removeSingleMemoryAccess(RA);
 857     return true;
 858   }
 859
 860   /// Return which SCoP this instance is processing.
 861   Scop *getScop() const { return S; }
 862
 863   /// Run the algorithm: Use value read accesses as operand tree roots and try
 864   /// to forward them into the statement.
 865   bool forwardOperandTrees() {
 866     for (ScopStmt &Stmt : *S) {
 867       bool StmtModified = false;
 868
 869       // Because we are modifying the MemoryAccess list, collect them first to
 870       // avoid iterator invalidation.
 871       SmallVector<MemoryAccess *, 16> Accs;
 872       for (MemoryAccess *RA : Stmt) {
 873         if (!RA->isRead())
 874           continue;
 875         if (!RA->isLatestScalarKind())
 876           continue;
 877
 878         Accs.push_back(RA);
 879       }
 880
 881       for (MemoryAccess *RA : Accs) {
 882         if (tryForwardTree(RA)) {
 883           Modified = true;
 884           StmtModified = true;
 885           NumForwardedTrees++;
 886           TotalForwardedTrees++;
 887         }
 888       }
 889
 890       if (StmtModified) {
 891         NumModifiedStmts++;
 892         TotalModifiedStmts++;
 893       }
 894     }
 895
 896     if (Modified)
 897       ScopsModified++;
 898     return Modified;
 899   }
 900
 901   /// Print the pass result, performed transformations and the SCoP after the
 902   /// transformation.
 903   void print(raw_ostream &OS, int Indent = 0) {
 904     printStatistics(OS, Indent);
 905
 906     if (!Modified) {
 907       // This line can easily be checked in regression tests.
 908       OS << "ForwardOpTree executed, but did not modify anything\n";
 909       return;
 910     }
 911
 912     printStatements(OS, Indent);
 913   }
 914 };
 915
 916 /// Pass that redirects scalar reads to array elements that are known to contain
 917 /// the same value.
 918 ///
 919 /// This reduces the number of scalar accesses and therefore potentially
 920 /// increases the freedom of the scheduler. In the ideal case, all reads of a
 921 /// scalar definition are redirected (We currently do not care about removing
 922 /// the write in this case).  This is also useful for the main DeLICM pass as
 923 /// there are less scalars to be mapped.
 924 class ForwardOpTree : public ScopPass {
 925 private:
 926   /// The pass implementation, also holding per-scop data.
 927   std::unique_ptr<ForwardOpTreeImpl> Impl;
 928
 929 public:
 930   static char ID;
 931
 932   explicit ForwardOpTree() : ScopPass(ID) {}
 933   ForwardOpTree(const ForwardOpTree &) = delete;
 934   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
 935
 936   void getAnalysisUsage(AnalysisUsage &AU) const override {
 937     AU.addRequiredTransitive<ScopInfoRegionPass>();
 938     AU.addRequired<LoopInfoWrapperPass>();
 939     AU.setPreservesAll();
 940   }
 941
 942   bool runOnScop(Scop &S) override {
 943     // Free resources for previous SCoP's computation, if not yet done.
 944     releaseMemory();
 945
 946     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 947
 948     {
 949       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx(), MaxOps, false);
 950       Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
 951
 952       if (AnalyzeKnown) {
 953         DEBUG(dbgs() << "Prepare forwarders...\n");
 954         Impl->computeKnownValues();
 955       }
 956
 957       DEBUG(dbgs() << "Forwarding operand trees...\n");
 958       Impl->forwardOperandTrees();
 959
 960       if (MaxOpGuard.hasQuotaExceeded()) {
 961         DEBUG(dbgs() << "Not all operations completed because of "
 962                         "max_operations exceeded\n");
 963         KnownOutOfQuota++;
 964       }
 965     }
 966
 967     DEBUG(dbgs() << "\nFinal Scop:\n");
 968     DEBUG(dbgs() << S);
 969
 970     // Update statistics
 971     auto ScopStats = S.getStatistics();
 972     NumValueWrites += ScopStats.NumValueWrites;
 973     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
 974     NumPHIWrites += ScopStats.NumPHIWrites;
 975     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
 976     NumSingletonWrites += ScopStats.NumSingletonWrites;
 977     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
 978
 979     return false;
 980   }
 981
 982   void printScop(raw_ostream &OS, Scop &S) const override {
 983     if (!Impl)
 984       return;
 985
 986     assert(Impl->getScop() == &S);
 987     Impl->print(OS);
 988   }
 989
 990   void releaseMemory() override { Impl.reset(); }
 991 }; // class ForwardOpTree
 992
 993 char ForwardOpTree::ID;
 994
 995 } // namespace
 996
 997 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
 998
 999 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1000                       "Polly - Forward operand tree", false, false)
1001 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1002 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1003                     "Polly - Forward operand tree", false, false)