[ForwardOpTree] Reload know values.
[polly-mirror.git] / lib / Transform / ForwardOpTree.cpp
blob7fc0d5a3774e1ad26055769aefe66de24e412c32
1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
12 //===----------------------------------------------------------------------===//
14 #include "polly/ForwardOpTree.h"
15 #include "polly/Options.h"
16 #include "polly/ScopBuilder.h"
17 #include "polly/ScopInfo.h"
18 #include "polly/ScopPass.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/ISLOStream.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "polly/ZoneAlgo.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "isl/ctx.h"
40 #include "isl/isl-noexceptions.h"
41 #include <cassert>
42 #include <memory>
44 #define DEBUG_TYPE "polly-optree"
46 using namespace llvm;
47 using namespace polly;
49 static cl::opt<bool>
50 AnalyzeKnown("polly-optree-analyze-known",
51 cl::desc("Analyze array contents for load forwarding"),
52 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
54 static cl::opt<unsigned>
55 MaxOps("polly-optree-max-ops",
56 cl::desc("Maximum number of ISL operations to invest for known "
57 "analysis; 0=no limit"),
58 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
60 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
61 STATISTIC(KnownOutOfQuota,
62 "Analyses aborted because max_operations was reached");
64 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
65 STATISTIC(TotalKnownLoadsForwarded,
66 "Number of forwarded loads because their value was known");
67 STATISTIC(TotalReloads, "Number of reloaded values");
68 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
69 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
70 STATISTIC(TotalModifiedStmts,
71 "Number of statements with at least one forwarded tree");
73 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
75 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
76 STATISTIC(NumValueWritesInLoops,
77 "Number of scalar value writes nested in affine loops after OpTree");
78 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
79 STATISTIC(NumPHIWritesInLoops,
80 "Number of scalar phi writes nested in affine loops after OpTree");
81 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
82 STATISTIC(NumSingletonWritesInLoops,
83 "Number of singleton writes nested in affine loops after OpTree");
85 namespace {
87 /// The state of whether an operand tree was/can be forwarded.
88 ///
89 /// The items apply to an instructions and its operand tree with the instruction
90 /// as the root element. If the value in question is not an instruction in the
91 /// SCoP, it can be a leaf of an instruction's operand tree.
92 enum ForwardingDecision {
93 /// The root instruction or value cannot be forwarded at all.
94 FD_CannotForward,
96 /// The root instruction or value can be forwarded as a leaf of a larger
97 /// operand tree.
98 /// It does not make sense to move the value itself, it would just replace it
99 /// by a use of itself. For instance, a constant "5" used in a statement can
100 /// be forwarded, but it would just replace it by the same constant "5".
101 /// However, it makes sense to move as an operand of
103 /// %add = add 5, 5
105 /// where "5" is moved as part of a larger operand tree. "5" would be placed
106 /// (disregarding for a moment that literal constants don't have a location
107 /// and can be used anywhere) into the same statement as %add would.
108 FD_CanForwardLeaf,
110 /// The root instruction can be forwarded and doing so avoids a scalar
111 /// dependency.
113 /// This can be either because the operand tree can be moved to the target
114 /// statement, or a memory access is redirected to read from a different
115 /// location.
116 FD_CanForwardProfitably,
118 /// Used to indicate that a forwarding has be carried out successfully, and
119 /// the forwarded memory access can be deleted.
120 FD_DidForwardTree,
122 /// Used to indicate that a forwarding has be carried out successfully, and
123 /// the forwarded memory access is being reused.
124 FD_DidForwardLeaf,
126 /// A forwarding method cannot be applied to the operand tree.
127 /// The difference to FD_CannotForward is that there might be other methods
128 /// that can handle it.
129 /// The conditions that make an operand tree applicable must be checked even
130 /// with DoIt==true because a method following the one that returned
131 /// FD_NotApplicable might have returned FD_CanForwardTree.
132 FD_NotApplicable
135 /// Implementation of operand tree forwarding for a specific SCoP.
137 /// For a statement that requires a scalar value (through a value read
138 /// MemoryAccess), see if its operand can be moved into the statement. If so,
139 /// the MemoryAccess is removed and the all the operand tree instructions are
140 /// moved into the statement. All original instructions are left in the source
141 /// statements. The simplification pass can clean these up.
142 class ForwardOpTreeImpl : ZoneAlgorithm {
143 private:
144 /// Scope guard to limit the number of isl operations for this pass.
145 IslMaxOperationsGuard &MaxOpGuard;
147 /// How many instructions have been copied to other statements.
148 int NumInstructionsCopied = 0;
150 /// Number of loads forwarded because their value was known.
151 int NumKnownLoadsForwarded = 0;
153 /// Number of values reloaded from known array elements.
154 int NumReloads = 0;
156 /// How many read-only accesses have been copied.
157 int NumReadOnlyCopied = 0;
159 /// How many operand trees have been forwarded.
160 int NumForwardedTrees = 0;
162 /// Number of statements with at least one forwarded operand tree.
163 int NumModifiedStmts = 0;
165 /// Whether we carried out at least one change to the SCoP.
166 bool Modified = false;
168 /// Contains the zones where array elements are known to contain a specific
169 /// value.
170 /// { [Element[] -> Zone[]] -> ValInst[] }
171 /// @see computeKnown()
172 isl::union_map Known;
174 /// Translator for newly introduced ValInsts to already existing ValInsts such
175 /// that new introduced load instructions can reuse the Known analysis of its
176 /// original load. { ValInst[] -> ValInst[] }
177 isl::union_map Translator;
179 /// Get list of array elements that do contain the same ValInst[] at Domain[].
181 /// @param ValInst { Domain[] -> ValInst[] }
182 /// The values for which we search for alternative locations,
183 /// per statement instance.
185 /// @return { Domain[] -> Element[] }
186 /// For each statement instance, the array elements that contain the
187 /// same ValInst.
188 isl::union_map findSameContentElements(isl::union_map ValInst) {
189 assert(!ValInst.is_single_valued().is_false());
191 // { Domain[] }
192 isl::union_set Domain = ValInst.domain();
194 // { Domain[] -> Scatter[] }
195 isl::union_map Schedule = getScatterFor(Domain);
197 // { Element[] -> [Scatter[] -> ValInst[]] }
198 isl::union_map MustKnownCurried =
199 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
201 // { [Domain[] -> ValInst[]] -> Scatter[] }
202 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
204 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
205 isl::union_map SchedValDomVal =
206 DomValSched.range_product(ValInst.range_map()).reverse();
208 // { Element[] -> [Domain[] -> ValInst[]] }
209 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
211 // { Domain[] -> Element[] }
212 isl::union_map MustKnownMap =
213 MustKnownInst.uncurry().domain().unwrap().reverse();
214 simplify(MustKnownMap);
216 return MustKnownMap;
219 /// Find a single array element for each statement instance, within a single
220 /// array.
222 /// @param MustKnown { Domain[] -> Element[] }
223 /// Set of candidate array elements.
224 /// @param Domain { Domain[] }
225 /// The statement instance for which we need elements for.
227 /// @return { Domain[] -> Element[] }
228 /// For each statement instance, an array element out of @p MustKnown.
229 /// All array elements must be in the same array (Polly does not yet
230 /// support reading from different accesses using the same
231 /// MemoryAccess). If no mapping for all of @p Domain exists, returns
232 /// null.
233 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
234 // { Domain[] -> Element[] }
235 isl::map Result;
237 // MemoryAccesses can read only elements from a single array
238 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
239 // Look through all spaces until we find one that contains at least the
240 // wanted statement instance.s
241 MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
242 // Get the array this is accessing.
243 isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
244 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
246 // No support for generation of indirect array accesses.
247 if (SAI->getBasePtrOriginSAI())
248 return isl::stat::ok; // continue
250 // Determine whether this map contains all wanted values.
251 isl::set MapDom = Map.domain();
252 if (!Domain.is_subset(MapDom).is_true())
253 return isl::stat::ok; // continue
255 // There might be multiple array elements that contain the same value, but
256 // choose only one of them. lexmin is used because it returns a one-value
257 // mapping, we do not care about which one.
258 // TODO: Get the simplest access function.
259 Result = Map.lexmin();
260 return isl::stat::error; // break
263 return Result;
266 public:
267 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
268 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
270 /// Compute the zones of known array element contents.
272 /// @return True if the computed #Known is usable.
273 bool computeKnownValues() {
274 isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
276 // Check that nothing strange occurs.
277 collectCompatibleElts();
280 IslQuotaScope QuotaScope = MaxOpGuard.enter();
282 computeCommon();
283 Known = computeKnown(true, true);
285 // Preexisting ValInsts use the known content analysis of themselves.
286 Translator = makeIdentityMap(Known.range(), false);
289 if (!Known || !Translator) {
290 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
291 Known = nullptr;
292 Translator = nullptr;
293 DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
294 return false;
297 KnownAnalyzed++;
298 DEBUG(dbgs() << "All known: " << Known << "\n");
300 return true;
303 void printStatistics(raw_ostream &OS, int Indent = 0) {
304 OS.indent(Indent) << "Statistics {\n";
305 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
306 << '\n';
307 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
308 << '\n';
309 OS.indent(Indent + 4) << "Reloads: " << NumReloads<< '\n';
310 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
311 << '\n';
312 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
313 << '\n';
314 OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
315 << NumModifiedStmts << '\n';
316 OS.indent(Indent) << "}\n";
319 void printStatements(raw_ostream &OS, int Indent = 0) const {
320 OS.indent(Indent) << "After statements {\n";
321 for (auto &Stmt : *S) {
322 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
323 for (auto *MA : Stmt)
324 MA->print(OS);
326 OS.indent(Indent + 12);
327 Stmt.printInstructions(OS);
329 OS.indent(Indent) << "}\n";
332 /// Create a new MemoryAccess of type read and MemoryKind::Array.
334 /// @param Stmt The statement in which the access occurs.
335 /// @param LI The instruction that does the access.
336 /// @param AccessRelation The array element that each statement instance
337 /// accesses.
339 /// @param The newly created access.
340 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
341 isl::map AccessRelation) {
342 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
343 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
345 // Create a dummy SCEV access, to be replaced anyway.
346 SmallVector<const SCEV *, 4> Sizes;
347 Sizes.reserve(SAI->getNumberOfDimensions());
348 SmallVector<const SCEV *, 4> Subscripts;
349 Subscripts.reserve(SAI->getNumberOfDimensions());
350 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
351 Sizes.push_back(SAI->getDimensionSize(i));
352 Subscripts.push_back(nullptr);
355 MemoryAccess *Access =
356 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
357 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
358 S->addAccessFunction(Access);
359 Stmt->addAccess(Access, true);
361 Access->setNewAccessRelation(AccessRelation);
363 return Access;
366 /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
367 /// use in every instance of @p UseStmt.
369 /// @param UseStmt Statement a scalar is used in.
370 /// @param DefStmt Statement a scalar is defined in.
372 /// @return { DomainUse[] -> DomainDef[] }
373 isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
374 // { DomainUse[] -> Scatter[] }
375 isl::map UseScatter = getScatterFor(UseStmt);
377 // { Zone[] -> DomainDef[] }
378 isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
380 // { Scatter[] -> DomainDef[] }
381 isl::map ReachDefTimepoints =
382 convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
384 // { DomainUse[] -> DomainDef[] }
385 return UseScatter.apply_range(ReachDefTimepoints);
388 /// Forward a load by reading from an array element that contains the same
389 /// value. Typically the location it was loaded from.
391 /// @param TargetStmt The statement the operand tree will be copied to.
392 /// @param Inst The (possibly speculatable) instruction to forward.
393 /// @param UseStmt The statement that uses @p Inst.
394 /// @param UseLoop The loop @p Inst is used in.
395 /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
396 /// A mapping from the statement instance @p Inst is used
397 /// to the statement instance it is forwarded to.
398 /// @param DefStmt The statement @p Inst is defined in.
399 /// @param DefLoop The loop which contains @p Inst.
400 /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
401 /// A mapping from the statement instance @p Inst is
402 /// defined to the statement instance it is forwarded to.
403 /// @param DoIt If false, only determine whether an operand tree can be
404 /// forwarded. If true, carry out the forwarding. Do not
405 /// use DoIt==true if an operand tree is not known to be
406 /// forwardable.
408 /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new
409 /// load.
410 /// FD_CannotForward if the pointer operand cannot be forwarded.
411 /// FD_CanForwardProfitably if @p Inst is forwardable.
412 /// FD_DidForwardTree if @p DoIt was true.
413 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
414 ScopStmt *UseStmt, Loop *UseLoop,
415 isl::map UseToTarget, ScopStmt *DefStmt,
416 Loop *DefLoop, isl::map DefToTarget,
417 bool DoIt) {
418 // Cannot do anything without successful known analysis.
419 if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
420 DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
421 return FD_NotApplicable;
423 LoadInst *LI = dyn_cast<LoadInst>(Inst);
424 if (!LI)
425 return FD_NotApplicable;
427 // If the load is already in the statement, no forwarding is necessary.
428 // However, it might happen that the LoadInst is already present in the
429 // statement's instruction list. In that case we do as follows:
430 // - For the evaluation (DoIt==false), we can trivially forward it as it is
431 // benefit of forwarding an already present instruction.
432 // - For the execution (DoIt==true), prepend the instruction (to make it
433 // available to all instructions following in the instruction list), but
434 // do not add another MemoryAccess.
435 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
436 if (Access && !DoIt)
437 return FD_CanForwardProfitably;
439 ForwardingDecision OpDecision =
440 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
441 DefToTarget, DoIt);
442 switch (OpDecision) {
443 case FD_CannotForward:
444 assert(!DoIt);
445 return OpDecision;
447 case FD_CanForwardLeaf:
448 case FD_CanForwardProfitably:
449 assert(!DoIt);
450 break;
452 case FD_DidForwardLeaf:
453 case FD_DidForwardTree:
454 assert(DoIt);
455 break;
457 default:
458 llvm_unreachable("Shouldn't return this");
461 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
463 // { DomainDef[] -> ValInst[] }
464 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
466 // { DomainTarget[] -> ValInst[] }
467 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
468 isl::union_map TranslatedExpectedVal =
469 isl::union_map(TargetExpectedVal).apply_range(Translator);
471 // { DomainTarget[] -> Element[] }
472 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
474 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
475 if (!SameVal)
476 return FD_NotApplicable;
478 if (DoIt)
479 TargetStmt->prependInstruction(LI);
481 if (!DoIt)
482 return FD_CanForwardProfitably;
484 if (Access) {
485 DEBUG(dbgs() << " forwarded known load with preexisting MemoryAccess"
486 << Access << "\n");
487 } else {
488 Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
489 DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access
490 << "\n");
492 // { ValInst[] }
493 isl::space ValInstSpace = ExpectedVal.get_space().range();
495 // After adding a new load to the SCoP, also update the Known content
496 // about it. The new load will have a known ValInst of
497 // { [DomainTarget[] -> Value[]] }
498 // but which -- because it is a copy of it -- has same value as the
499 // { [DomainDef[] -> Value[]] }
500 // that it replicates. Instead of cloning the known content of
501 // [DomainDef[] -> Value[]]
502 // for DomainTarget[], we add a 'translator' that maps
503 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
504 // before comparing to the known content.
505 // TODO: 'Translator' could also be used to map PHINodes to their incoming
506 // ValInsts.
507 if (ValInstSpace.is_wrapping()) {
508 // { DefDomain[] -> Value[] }
509 isl::map ValInsts = ExpectedVal.range().unwrap();
511 // { DefDomain[] }
512 isl::set DefDomain = ValInsts.domain();
514 // { Value[] }
515 isl::space ValSpace = ValInstSpace.unwrap().range();
517 // { Value[] -> Value[] }
518 isl::map ValToVal =
519 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
521 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
522 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
524 Translator = Translator.add_map(LocalTranslator);
525 DEBUG(dbgs() << " local translator is " << LocalTranslator
526 << "\n");
529 DEBUG(dbgs() << " expected values where " << TargetExpectedVal
530 << "\n");
531 DEBUG(dbgs() << " candidate elements where " << Candidates << "\n");
532 assert(Access);
534 NumKnownLoadsForwarded++;
535 TotalKnownLoadsForwarded++;
536 return FD_DidForwardTree;
539 /// Forward a scalar by redirecting the access to an array element that stores
540 /// the same value.
542 /// @param TargetStmt The statement the operand tree will be copied to.
543 /// @param Inst The scalar to forward.
544 /// @param UseStmt The statement that uses @p Inst.
545 /// @param UseLoop The loop @p Inst is used in.
546 /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
547 /// A mapping from the statement instance @p Inst is used
548 /// in, to the statement instance it is forwarded to.
549 /// @param DefStmt The statement @p Inst is defined in.
550 /// @param DefLoop The loop which contains @p Inst.
551 /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
552 /// A mapping from the statement instance @p Inst is
553 /// defined in, to the statement instance it is forwarded
554 /// to.
555 /// @param DoIt If false, only determine whether an operand tree can be
556 /// forwarded. If true, carry out the forwarding. Do not
557 /// use DoIt==true if an operand tree is not known to be
558 /// forwardable.
560 /// @return FD_NotApplicable if @p Inst cannot be reloaded.
561 /// FD_CanForwardLeaf if @p Inst can be reloaded.
562 /// FD_CanForwardProfitably if @p Inst has been reloaded.
563 /// FD_DidForwardLeaf if @p DoIt was true.
564 ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
565 ScopStmt *UseStmt, Loop *UseLoop,
566 isl::map UseToTarget, ScopStmt *DefStmt,
567 Loop *DefLoop, isl::map DefToTarget,
568 bool DoIt) {
569 // Cannot do anything without successful known analysis.
570 if (Known.is_null())
571 return FD_NotApplicable;
573 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
574 if (Access && Access->isLatestArrayKind()) {
575 if (DoIt)
576 return FD_DidForwardLeaf;
577 return FD_CanForwardLeaf;
580 // { DomainDef[] -> ValInst[] }
581 isl::union_map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
583 // { DomainTarget[] -> ValInst[] }
584 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
585 isl::union_map TranslatedExpectedVal =
586 TargetExpectedVal.apply_range(Translator);
588 // { DomainTarget[] -> Element[] }
589 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
591 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
592 if (!SameVal)
593 return FD_NotApplicable;
595 if (!DoIt)
596 return FD_CanForwardProfitably;
598 if (!Access)
599 Access = TargetStmt->ensureValueRead(Inst);
601 simplify(SameVal);
602 Access->setNewAccessRelation(SameVal);
604 TotalReloads++;
605 NumReloads++;
606 return FD_DidForwardLeaf;
609 /// Forwards a speculatively executable instruction.
611 /// @param TargetStmt The statement the operand tree will be copied to.
612 /// @param UseInst The (possibly speculatable) instruction to forward.
613 /// @param DefStmt The statement @p UseInst is defined in.
614 /// @param DefLoop The loop which contains @p UseInst.
615 /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
616 /// A mapping from the statement instance @p UseInst is
617 /// defined to the statement instance it is forwarded to.
618 /// @param DoIt If false, only determine whether an operand tree can be
619 /// forwarded. If true, carry out the forwarding. Do not
620 /// use DoIt==true if an operand tree is not known to be
621 /// forwardable.
623 /// @return FD_NotApplicable if @p UseInst is not speculatable.
624 /// FD_CannotForward if one of @p UseInst's operands is not
625 /// forwardable.
626 /// FD_CanForwardTree if @p UseInst is forwardable.
627 /// FD_DidForward if @p DoIt was true.
628 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
629 Instruction *UseInst,
630 ScopStmt *DefStmt, Loop *DefLoop,
631 isl::map DefToTarget, bool DoIt) {
632 // PHIs, unless synthesizable, are not yet supported.
633 if (isa<PHINode>(UseInst))
634 return FD_NotApplicable;
636 // Compatible instructions must satisfy the following conditions:
637 // 1. Idempotent (instruction will be copied, not moved; although its
638 // original instance might be removed by simplification)
639 // 2. Not access memory (There might be memory writes between)
640 // 3. Not cause undefined behaviour (we might copy to a location when the
641 // original instruction was no executed; this is currently not possible
642 // because we do not forward PHINodes)
643 // 4. Not leak memory if executed multiple times (i.e. malloc)
645 // Instruction::mayHaveSideEffects is not sufficient because it considers
646 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
647 // not sufficient because it allows memory accesses.
648 if (mayBeMemoryDependent(*UseInst))
649 return FD_NotApplicable;
651 if (DoIt) {
652 // To ensure the right order, prepend this instruction before its
653 // operands. This ensures that its operands are inserted before the
654 // instruction using them.
655 // TODO: The operand tree is not really a tree, but a DAG. We should be
656 // able to handle DAGs without duplication.
657 TargetStmt->prependInstruction(UseInst);
658 NumInstructionsCopied++;
659 TotalInstructionsCopied++;
662 for (Value *OpVal : UseInst->operand_values()) {
663 ForwardingDecision OpDecision =
664 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
665 switch (OpDecision) {
666 case FD_CannotForward:
667 assert(!DoIt);
668 return FD_CannotForward;
670 case FD_CanForwardLeaf:
671 case FD_CanForwardProfitably:
672 assert(!DoIt);
673 break;
675 case FD_DidForwardLeaf:
676 case FD_DidForwardTree:
677 assert(DoIt);
678 break;
680 case FD_NotApplicable:
681 llvm_unreachable("forwardTree should never return FD_NotApplicable");
685 if (DoIt)
686 return FD_DidForwardTree;
687 return FD_CanForwardProfitably;
690 /// Determines whether an operand tree can be forwarded or carries out a
691 /// forwarding, depending on the @p DoIt flag.
693 /// @param TargetStmt The statement the operand tree will be copied to.
694 /// @param UseVal The value (usually an instruction) which is root of an
695 /// operand tree.
696 /// @param UseStmt The statement that uses @p UseVal.
697 /// @param UseLoop The loop @p UseVal is used in.
698 /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
699 /// A mapping from the statement instance @p UseVal is used
700 /// to the statement instance it is forwarded to.
701 /// @param DoIt If false, only determine whether an operand tree can be
702 /// forwarded. If true, carry out the forwarding. Do not
703 /// use DoIt==true if an operand tree is not known to be
704 /// forwardable.
706 /// @return If DoIt==false, return whether the operand tree can be forwarded.
707 /// If DoIt==true, return FD_DidForward.
708 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
709 ScopStmt *UseStmt, Loop *UseLoop,
710 isl::map UseToTarget, bool DoIt) {
711 ScopStmt *DefStmt = nullptr;
712 Loop *DefLoop = nullptr;
714 // { DefDomain[] -> TargetDomain[] }
715 isl::map DefToTarget;
717 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
718 switch (VUse.getKind()) {
719 case VirtualUse::Constant:
720 case VirtualUse::Block:
721 case VirtualUse::Hoisted:
722 // These can be used anywhere without special considerations.
723 if (DoIt)
724 return FD_DidForwardTree;
725 return FD_CanForwardLeaf;
727 case VirtualUse::Synthesizable: {
728 // ScopExpander will take care for of generating the code at the new
729 // location.
730 if (DoIt)
731 return FD_DidForwardTree;
733 // Check if the value is synthesizable at the new location as well. This
734 // might be possible when leaving a loop for which ScalarEvolution is
735 // unable to derive the exit value for.
736 // TODO: If there is a LCSSA PHI at the loop exit, use that one.
737 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
738 // do not forward past its loop header. This would require us to use a
739 // previous loop induction variable instead the current one. We currently
740 // do not allow forwarding PHI nodes, thus this should never occur (the
741 // only exception where no phi is necessary being an unreachable loop
742 // without edge from the outside).
743 VirtualUse TargetUse = VirtualUse::create(
744 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
745 if (TargetUse.getKind() == VirtualUse::Synthesizable)
746 return FD_CanForwardLeaf;
748 DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: "
749 << *UseVal << "\n");
750 return FD_CannotForward;
753 case VirtualUse::ReadOnly:
754 // Note that we cannot return FD_CanForwardTree here. With a operand tree
755 // depth of 0, UseVal is the use in TargetStmt that we try to replace.
756 // With -polly-analyze-read-only-scalars=true we would ensure the
757 // existence of a MemoryAccess (which already exists for a leaf) and be
758 // removed again by tryForwardTree because it's goal is to remove this
759 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
760 // to do so.
761 if (!DoIt)
762 return FD_CanForwardLeaf;
764 // If we model read-only scalars, we need to create a MemoryAccess for it.
765 if (ModelReadOnlyScalars)
766 TargetStmt->ensureValueRead(UseVal);
768 NumReadOnlyCopied++;
769 TotalReadOnlyCopied++;
770 return FD_DidForwardLeaf;
772 case VirtualUse::Intra:
773 // Knowing that UseStmt and DefStmt are the same statement instance, just
774 // reuse the information about UseStmt for DefStmt
775 DefStmt = UseStmt;
776 DefToTarget = UseToTarget;
778 LLVM_FALLTHROUGH;
779 case VirtualUse::Inter:
780 Instruction *Inst = cast<Instruction>(UseVal);
782 if (!DefStmt) {
783 DefStmt = S->getStmtFor(Inst);
784 if (!DefStmt)
785 return FD_CannotForward;
788 DefLoop = LI->getLoopFor(Inst->getParent());
790 if (DefToTarget.is_null() && !Known.is_null()) {
791 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
793 // { UseDomain[] -> DefDomain[] }
794 isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
796 // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
797 // { DefDomain[] -> TargetDomain[] }
798 DefToTarget = UseToTarget.apply_domain(UseToDef);
799 simplify(DefToTarget);
802 ForwardingDecision SpeculativeResult = forwardSpeculatable(
803 TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
804 if (SpeculativeResult != FD_NotApplicable)
805 return SpeculativeResult;
807 ForwardingDecision KnownResult =
808 forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
809 DefStmt, DefLoop, DefToTarget, DoIt);
810 if (KnownResult != FD_NotApplicable)
811 return KnownResult;
813 ForwardingDecision ReloadResult =
814 reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
815 DefStmt, DefLoop, DefToTarget, DoIt);
816 if (ReloadResult != FD_NotApplicable)
817 return ReloadResult;
819 // When no method is found to forward the operand tree, we effectively
820 // cannot handle it.
821 DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
822 return FD_CannotForward;
825 llvm_unreachable("Case unhandled");
828 /// Try to forward an operand tree rooted in @p RA.
829 bool tryForwardTree(MemoryAccess *RA) {
830 assert(RA->isLatestScalarKind());
831 DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
833 ScopStmt *Stmt = RA->getStatement();
834 Loop *InLoop = Stmt->getSurroundingLoop();
836 isl::map TargetToUse;
837 if (!Known.is_null()) {
838 isl::space DomSpace = Stmt->getDomainSpace();
839 TargetToUse =
840 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
843 ForwardingDecision Assessment = forwardTree(
844 Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
845 assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
846 if (Assessment != FD_CanForwardProfitably)
847 return false;
849 ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
850 InLoop, TargetToUse, true);
851 assert(((Execution == FD_DidForwardTree) ||
852 (Execution == FD_DidForwardLeaf)) &&
853 "A previous positive assessment must also be executable");
855 if (Execution == FD_DidForwardTree)
856 Stmt->removeSingleMemoryAccess(RA);
857 return true;
860 /// Return which SCoP this instance is processing.
861 Scop *getScop() const { return S; }
863 /// Run the algorithm: Use value read accesses as operand tree roots and try
864 /// to forward them into the statement.
865 bool forwardOperandTrees() {
866 for (ScopStmt &Stmt : *S) {
867 bool StmtModified = false;
869 // Because we are modifying the MemoryAccess list, collect them first to
870 // avoid iterator invalidation.
871 SmallVector<MemoryAccess *, 16> Accs;
872 for (MemoryAccess *RA : Stmt) {
873 if (!RA->isRead())
874 continue;
875 if (!RA->isLatestScalarKind())
876 continue;
878 Accs.push_back(RA);
881 for (MemoryAccess *RA : Accs) {
882 if (tryForwardTree(RA)) {
883 Modified = true;
884 StmtModified = true;
885 NumForwardedTrees++;
886 TotalForwardedTrees++;
890 if (StmtModified) {
891 NumModifiedStmts++;
892 TotalModifiedStmts++;
896 if (Modified)
897 ScopsModified++;
898 return Modified;
901 /// Print the pass result, performed transformations and the SCoP after the
902 /// transformation.
903 void print(raw_ostream &OS, int Indent = 0) {
904 printStatistics(OS, Indent);
906 if (!Modified) {
907 // This line can easily be checked in regression tests.
908 OS << "ForwardOpTree executed, but did not modify anything\n";
909 return;
912 printStatements(OS, Indent);
916 /// Pass that redirects scalar reads to array elements that are known to contain
917 /// the same value.
919 /// This reduces the number of scalar accesses and therefore potentially
920 /// increases the freedom of the scheduler. In the ideal case, all reads of a
921 /// scalar definition are redirected (We currently do not care about removing
922 /// the write in this case). This is also useful for the main DeLICM pass as
923 /// there are less scalars to be mapped.
924 class ForwardOpTree : public ScopPass {
925 private:
926 /// The pass implementation, also holding per-scop data.
927 std::unique_ptr<ForwardOpTreeImpl> Impl;
929 public:
930 static char ID;
932 explicit ForwardOpTree() : ScopPass(ID) {}
933 ForwardOpTree(const ForwardOpTree &) = delete;
934 ForwardOpTree &operator=(const ForwardOpTree &) = delete;
936 void getAnalysisUsage(AnalysisUsage &AU) const override {
937 AU.addRequiredTransitive<ScopInfoRegionPass>();
938 AU.addRequired<LoopInfoWrapperPass>();
939 AU.setPreservesAll();
942 bool runOnScop(Scop &S) override {
943 // Free resources for previous SCoP's computation, if not yet done.
944 releaseMemory();
946 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
949 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx(), MaxOps, false);
950 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
952 if (AnalyzeKnown) {
953 DEBUG(dbgs() << "Prepare forwarders...\n");
954 Impl->computeKnownValues();
957 DEBUG(dbgs() << "Forwarding operand trees...\n");
958 Impl->forwardOperandTrees();
960 if (MaxOpGuard.hasQuotaExceeded()) {
961 DEBUG(dbgs() << "Not all operations completed because of "
962 "max_operations exceeded\n");
963 KnownOutOfQuota++;
967 DEBUG(dbgs() << "\nFinal Scop:\n");
968 DEBUG(dbgs() << S);
970 // Update statistics
971 auto ScopStats = S.getStatistics();
972 NumValueWrites += ScopStats.NumValueWrites;
973 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
974 NumPHIWrites += ScopStats.NumPHIWrites;
975 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
976 NumSingletonWrites += ScopStats.NumSingletonWrites;
977 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
979 return false;
982 void printScop(raw_ostream &OS, Scop &S) const override {
983 if (!Impl)
984 return;
986 assert(Impl->getScop() == &S);
987 Impl->print(OS);
990 void releaseMemory() override { Impl.reset(); }
991 }; // class ForwardOpTree
993 char ForwardOpTree::ID;
995 } // namespace
997 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
999 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1000 "Polly - Forward operand tree", false, false)
1001 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1002 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1003 "Polly - Forward operand tree", false, false)