[ZoneAlgo] Use getDefToTarget in makeValInst. NFC.
[polly-mirror.git] / lib / Transform / ForwardOpTree.cpp
blob17985ecf11f79e99ed658f15cdbb3990d1463e05
1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Move instructions between statements.
12 //===----------------------------------------------------------------------===//
14 #include "polly/ForwardOpTree.h"
15 #include "polly/Options.h"
16 #include "polly/ScopBuilder.h"
17 #include "polly/ScopInfo.h"
18 #include "polly/ScopPass.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/ISLOStream.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/VirtualInstruction.h"
23 #include "polly/ZoneAlgo.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "isl/ctx.h"
40 #include "isl/isl-noexceptions.h"
41 #include <cassert>
42 #include <memory>
44 #define DEBUG_TYPE "polly-optree"
46 using namespace llvm;
47 using namespace polly;
49 static cl::opt<bool>
50 AnalyzeKnown("polly-optree-analyze-known",
51 cl::desc("Analyze array contents for load forwarding"),
52 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
54 static cl::opt<bool>
55 NormalizePHIs("polly-optree-normalize-phi",
56 cl::desc("Replace PHIs by their incoming values"),
57 cl::cat(PollyCategory), cl::init(false), cl::Hidden);
59 static cl::opt<unsigned>
60 MaxOps("polly-optree-max-ops",
61 cl::desc("Maximum number of ISL operations to invest for known "
62 "analysis; 0=no limit"),
63 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
65 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
66 STATISTIC(KnownOutOfQuota,
67 "Analyses aborted because max_operations was reached");
69 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
70 STATISTIC(TotalKnownLoadsForwarded,
71 "Number of forwarded loads because their value was known");
72 STATISTIC(TotalReloads, "Number of reloaded values");
73 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
74 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
75 STATISTIC(TotalModifiedStmts,
76 "Number of statements with at least one forwarded tree");
78 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
80 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
81 STATISTIC(NumValueWritesInLoops,
82 "Number of scalar value writes nested in affine loops after OpTree");
83 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
84 STATISTIC(NumPHIWritesInLoops,
85 "Number of scalar phi writes nested in affine loops after OpTree");
86 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
87 STATISTIC(NumSingletonWritesInLoops,
88 "Number of singleton writes nested in affine loops after OpTree");
90 namespace {
92 /// The state of whether an operand tree was/can be forwarded.
93 ///
94 /// The items apply to an instructions and its operand tree with the instruction
95 /// as the root element. If the value in question is not an instruction in the
96 /// SCoP, it can be a leaf of an instruction's operand tree.
97 enum ForwardingDecision {
98 /// The root instruction or value cannot be forwarded at all.
99 FD_CannotForward,
101 /// The root instruction or value can be forwarded as a leaf of a larger
102 /// operand tree.
103 /// It does not make sense to move the value itself, it would just replace it
104 /// by a use of itself. For instance, a constant "5" used in a statement can
105 /// be forwarded, but it would just replace it by the same constant "5".
106 /// However, it makes sense to move as an operand of
108 /// %add = add 5, 5
110 /// where "5" is moved as part of a larger operand tree. "5" would be placed
111 /// (disregarding for a moment that literal constants don't have a location
112 /// and can be used anywhere) into the same statement as %add would.
113 FD_CanForwardLeaf,
115 /// The root instruction can be forwarded and doing so avoids a scalar
116 /// dependency.
118 /// This can be either because the operand tree can be moved to the target
119 /// statement, or a memory access is redirected to read from a different
120 /// location.
121 FD_CanForwardProfitably,
123 /// Used to indicate that a forwarding has be carried out successfully, and
124 /// the forwarded memory access can be deleted.
125 FD_DidForwardTree,
127 /// Used to indicate that a forwarding has be carried out successfully, and
128 /// the forwarded memory access is being reused.
129 FD_DidForwardLeaf,
131 /// A forwarding method cannot be applied to the operand tree.
132 /// The difference to FD_CannotForward is that there might be other methods
133 /// that can handle it.
134 /// The conditions that make an operand tree applicable must be checked even
135 /// with DoIt==true because a method following the one that returned
136 /// FD_NotApplicable might have returned FD_CanForwardTree.
137 FD_NotApplicable
140 /// Implementation of operand tree forwarding for a specific SCoP.
142 /// For a statement that requires a scalar value (through a value read
143 /// MemoryAccess), see if its operand can be moved into the statement. If so,
144 /// the MemoryAccess is removed and the all the operand tree instructions are
145 /// moved into the statement. All original instructions are left in the source
146 /// statements. The simplification pass can clean these up.
147 class ForwardOpTreeImpl : ZoneAlgorithm {
148 private:
149 /// Scope guard to limit the number of isl operations for this pass.
150 IslMaxOperationsGuard &MaxOpGuard;
152 /// How many instructions have been copied to other statements.
153 int NumInstructionsCopied = 0;
155 /// Number of loads forwarded because their value was known.
156 int NumKnownLoadsForwarded = 0;
158 /// Number of values reloaded from known array elements.
159 int NumReloads = 0;
161 /// How many read-only accesses have been copied.
162 int NumReadOnlyCopied = 0;
164 /// How many operand trees have been forwarded.
165 int NumForwardedTrees = 0;
167 /// Number of statements with at least one forwarded operand tree.
168 int NumModifiedStmts = 0;
170 /// Whether we carried out at least one change to the SCoP.
171 bool Modified = false;
173 /// Contains the zones where array elements are known to contain a specific
174 /// value.
175 /// { [Element[] -> Zone[]] -> ValInst[] }
176 /// @see computeKnown()
177 isl::union_map Known;
179 /// Translator for newly introduced ValInsts to already existing ValInsts such
180 /// that new introduced load instructions can reuse the Known analysis of its
181 /// original load. { ValInst[] -> ValInst[] }
182 isl::union_map Translator;
184 /// Get list of array elements that do contain the same ValInst[] at Domain[].
186 /// @param ValInst { Domain[] -> ValInst[] }
187 /// The values for which we search for alternative locations,
188 /// per statement instance.
190 /// @return { Domain[] -> Element[] }
191 /// For each statement instance, the array elements that contain the
192 /// same ValInst.
193 isl::union_map findSameContentElements(isl::union_map ValInst) {
194 assert(!ValInst.is_single_valued().is_false());
196 // { Domain[] }
197 isl::union_set Domain = ValInst.domain();
199 // { Domain[] -> Scatter[] }
200 isl::union_map Schedule = getScatterFor(Domain);
202 // { Element[] -> [Scatter[] -> ValInst[]] }
203 isl::union_map MustKnownCurried =
204 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
206 // { [Domain[] -> ValInst[]] -> Scatter[] }
207 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
209 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
210 isl::union_map SchedValDomVal =
211 DomValSched.range_product(ValInst.range_map()).reverse();
213 // { Element[] -> [Domain[] -> ValInst[]] }
214 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
216 // { Domain[] -> Element[] }
217 isl::union_map MustKnownMap =
218 MustKnownInst.uncurry().domain().unwrap().reverse();
219 simplify(MustKnownMap);
221 return MustKnownMap;
224 /// Find a single array element for each statement instance, within a single
225 /// array.
227 /// @param MustKnown { Domain[] -> Element[] }
228 /// Set of candidate array elements.
229 /// @param Domain { Domain[] }
230 /// The statement instance for which we need elements for.
232 /// @return { Domain[] -> Element[] }
233 /// For each statement instance, an array element out of @p MustKnown.
234 /// All array elements must be in the same array (Polly does not yet
235 /// support reading from different accesses using the same
236 /// MemoryAccess). If no mapping for all of @p Domain exists, returns
237 /// null.
238 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
239 // { Domain[] -> Element[] }
240 isl::map Result;
242 // MemoryAccesses can read only elements from a single array
243 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
244 // Look through all spaces until we find one that contains at least the
245 // wanted statement instance.s
246 MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
247 // Get the array this is accessing.
248 isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
249 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
251 // No support for generation of indirect array accesses.
252 if (SAI->getBasePtrOriginSAI())
253 return isl::stat::ok; // continue
255 // Determine whether this map contains all wanted values.
256 isl::set MapDom = Map.domain();
257 if (!Domain.is_subset(MapDom).is_true())
258 return isl::stat::ok; // continue
260 // There might be multiple array elements that contain the same value, but
261 // choose only one of them. lexmin is used because it returns a one-value
262 // mapping, we do not care about which one.
263 // TODO: Get the simplest access function.
264 Result = Map.lexmin();
265 return isl::stat::error; // break
268 return Result;
271 public:
272 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
273 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
275 /// Compute the zones of known array element contents.
277 /// @return True if the computed #Known is usable.
278 bool computeKnownValues() {
279 isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
281 // Check that nothing strange occurs.
282 collectCompatibleElts();
285 IslQuotaScope QuotaScope = MaxOpGuard.enter();
287 computeCommon();
288 if (NormalizePHIs)
289 computeNormalizedPHIs();
290 Known = computeKnown(true, true);
292 // Preexisting ValInsts use the known content analysis of themselves.
293 Translator = makeIdentityMap(Known.range(), false);
296 if (!Known || !Translator || !NormalizeMap) {
297 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
298 Known = nullptr;
299 Translator = nullptr;
300 NormalizeMap = nullptr;
301 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
302 return false;
305 KnownAnalyzed++;
306 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
308 return true;
311 void printStatistics(raw_ostream &OS, int Indent = 0) {
312 OS.indent(Indent) << "Statistics {\n";
313 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
314 << '\n';
315 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
316 << '\n';
317 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
318 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
319 << '\n';
320 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
321 << '\n';
322 OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
323 << NumModifiedStmts << '\n';
324 OS.indent(Indent) << "}\n";
327 void printStatements(raw_ostream &OS, int Indent = 0) const {
328 OS.indent(Indent) << "After statements {\n";
329 for (auto &Stmt : *S) {
330 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
331 for (auto *MA : Stmt)
332 MA->print(OS);
334 OS.indent(Indent + 12);
335 Stmt.printInstructions(OS);
337 OS.indent(Indent) << "}\n";
340 /// Create a new MemoryAccess of type read and MemoryKind::Array.
342 /// @param Stmt The statement in which the access occurs.
343 /// @param LI The instruction that does the access.
344 /// @param AccessRelation The array element that each statement instance
345 /// accesses.
347 /// @param The newly created access.
348 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
349 isl::map AccessRelation) {
350 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
351 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
353 // Create a dummy SCEV access, to be replaced anyway.
354 SmallVector<const SCEV *, 4> Sizes;
355 Sizes.reserve(SAI->getNumberOfDimensions());
356 SmallVector<const SCEV *, 4> Subscripts;
357 Subscripts.reserve(SAI->getNumberOfDimensions());
358 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
359 Sizes.push_back(SAI->getDimensionSize(i));
360 Subscripts.push_back(nullptr);
363 MemoryAccess *Access =
364 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
365 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
366 S->addAccessFunction(Access);
367 Stmt->addAccess(Access, true);
369 Access->setNewAccessRelation(AccessRelation);
371 return Access;
374 /// Forward a load by reading from an array element that contains the same
375 /// value. Typically the location it was loaded from.
377 /// @param TargetStmt The statement the operand tree will be copied to.
378 /// @param Inst The (possibly speculatable) instruction to forward.
379 /// @param UseStmt The statement that uses @p Inst.
380 /// @param UseLoop The loop @p Inst is used in.
381 /// @param DefStmt The statement @p Inst is defined in.
382 /// @param DefLoop The loop which contains @p Inst.
383 /// @param DoIt If false, only determine whether an operand tree can be
384 /// forwarded. If true, carry out the forwarding. Do not
385 /// use DoIt==true if an operand tree is not known to be
386 /// forwardable.
388 /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new
389 /// load.
390 /// FD_CannotForward if the pointer operand cannot be forwarded.
391 /// FD_CanForwardProfitably if @p Inst is forwardable.
392 /// FD_DidForwardTree if @p DoIt was true.
393 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
394 ScopStmt *UseStmt, Loop *UseLoop,
395 ScopStmt *DefStmt, Loop *DefLoop,
396 bool DoIt) {
397 // Cannot do anything without successful known analysis.
398 if (Known.is_null() || Translator.is_null() ||
399 MaxOpGuard.hasQuotaExceeded())
400 return FD_NotApplicable;
402 LoadInst *LI = dyn_cast<LoadInst>(Inst);
403 if (!LI)
404 return FD_NotApplicable;
406 // If the load is already in the statement, no forwarding is necessary.
407 // However, it might happen that the LoadInst is already present in the
408 // statement's instruction list. In that case we do as follows:
409 // - For the evaluation (DoIt==false), we can trivially forward it as it is
410 // benefit of forwarding an already present instruction.
411 // - For the execution (DoIt==true), prepend the instruction (to make it
412 // available to all instructions following in the instruction list), but
413 // do not add another MemoryAccess.
414 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
415 if (Access && !DoIt)
416 return FD_CanForwardProfitably;
418 ForwardingDecision OpDecision = forwardTree(
419 TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
420 switch (OpDecision) {
421 case FD_CannotForward:
422 assert(!DoIt);
423 return OpDecision;
425 case FD_CanForwardLeaf:
426 case FD_CanForwardProfitably:
427 assert(!DoIt);
428 break;
430 case FD_DidForwardLeaf:
431 case FD_DidForwardTree:
432 assert(DoIt);
433 break;
435 default:
436 llvm_unreachable("Shouldn't return this");
439 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
441 // { DomainDef[] -> ValInst[] }
442 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
443 assert(!isNormalized(ExpectedVal).is_false() &&
444 "LoadInsts are always normalized");
446 // { DomainUse[] -> DomainTarget[] }
447 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
449 // { DomainTarget[] -> ValInst[] }
450 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
451 isl::union_map TranslatedExpectedVal =
452 isl::union_map(TargetExpectedVal).apply_range(Translator);
454 // { DomainTarget[] -> Element[] }
455 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
457 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
458 if (!SameVal)
459 return FD_NotApplicable;
461 if (DoIt)
462 TargetStmt->prependInstruction(LI);
464 if (!DoIt)
465 return FD_CanForwardProfitably;
467 if (Access) {
468 LLVM_DEBUG(
469 dbgs() << " forwarded known load with preexisting MemoryAccess"
470 << Access << "\n");
471 } else {
472 Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
473 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess"
474 << Access << "\n");
476 // { ValInst[] }
477 isl::space ValInstSpace = ExpectedVal.get_space().range();
479 // After adding a new load to the SCoP, also update the Known content
480 // about it. The new load will have a known ValInst of
481 // { [DomainTarget[] -> Value[]] }
482 // but which -- because it is a copy of it -- has same value as the
483 // { [DomainDef[] -> Value[]] }
484 // that it replicates. Instead of cloning the known content of
485 // [DomainDef[] -> Value[]]
486 // for DomainTarget[], we add a 'translator' that maps
487 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
488 // before comparing to the known content.
489 // TODO: 'Translator' could also be used to map PHINodes to their incoming
490 // ValInsts.
491 if (ValInstSpace.is_wrapping()) {
492 // { DefDomain[] -> Value[] }
493 isl::map ValInsts = ExpectedVal.range().unwrap();
495 // { DefDomain[] }
496 isl::set DefDomain = ValInsts.domain();
498 // { Value[] }
499 isl::space ValSpace = ValInstSpace.unwrap().range();
501 // { Value[] -> Value[] }
502 isl::map ValToVal =
503 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
505 // { DomainDef[] -> DomainTarget[] }
506 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
508 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
509 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
511 Translator = Translator.add_map(LocalTranslator);
512 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator
513 << "\n");
516 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal
517 << "\n");
518 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates
519 << "\n");
520 assert(Access);
522 NumKnownLoadsForwarded++;
523 TotalKnownLoadsForwarded++;
524 return FD_DidForwardTree;
527 /// Forward a scalar by redirecting the access to an array element that stores
528 /// the same value.
530 /// @param TargetStmt The statement the operand tree will be copied to.
531 /// @param Inst The scalar to forward.
532 /// @param UseStmt The statement that uses @p Inst.
533 /// @param UseLoop The loop @p Inst is used in.
534 /// @param DefStmt The statement @p Inst is defined in.
535 /// @param DefLoop The loop which contains @p Inst.
536 /// @param DoIt If false, only determine whether an operand tree can be
537 /// forwarded. If true, carry out the forwarding. Do not
538 /// use DoIt==true if an operand tree is not known to be
539 /// forwardable.
541 /// @return FD_NotApplicable if @p Inst cannot be reloaded.
542 /// FD_CanForwardLeaf if @p Inst can be reloaded.
543 /// FD_CanForwardProfitably if @p Inst has been reloaded.
544 /// FD_DidForwardLeaf if @p DoIt was true.
545 ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
546 ScopStmt *UseStmt, Loop *UseLoop,
547 ScopStmt *DefStmt, Loop *DefLoop,
548 bool DoIt) {
549 // Cannot do anything without successful known analysis.
550 if (Known.is_null() || Translator.is_null() ||
551 MaxOpGuard.hasQuotaExceeded())
552 return FD_NotApplicable;
554 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
555 if (Access && Access->isLatestArrayKind()) {
556 if (DoIt)
557 return FD_DidForwardLeaf;
558 return FD_CanForwardLeaf;
561 // Don't spend too much time analyzing whether it can be reloaded. When
562 // carrying-out the forwarding, we cannot bail-out in the middle of the
563 // transformation. It also shouldn't take as long because some results are
564 // cached.
565 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
567 // { DomainDef[] -> ValInst[] }
568 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
570 // { DomainUse[] -> DomainTarget[] }
571 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
573 // { DomainTarget[] -> ValInst[] }
574 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
575 isl::union_map TranslatedExpectedVal =
576 TargetExpectedVal.apply_range(Translator);
578 // { DomainTarget[] -> Element[] }
579 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
581 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
582 if (!SameVal)
583 return FD_NotApplicable;
585 if (!DoIt)
586 return FD_CanForwardProfitably;
588 if (!Access)
589 Access = TargetStmt->ensureValueRead(Inst);
591 simplify(SameVal);
592 Access->setNewAccessRelation(SameVal);
594 TotalReloads++;
595 NumReloads++;
596 return FD_DidForwardLeaf;
599 /// Forwards a speculatively executable instruction.
601 /// @param TargetStmt The statement the operand tree will be copied to.
602 /// @param UseInst The (possibly speculatable) instruction to forward.
603 /// @param DefStmt The statement @p UseInst is defined in.
604 /// @param DefLoop The loop which contains @p UseInst.
605 /// @param DoIt If false, only determine whether an operand tree can be
606 /// forwarded. If true, carry out the forwarding. Do not
607 /// use DoIt==true if an operand tree is not known to be
608 /// forwardable.
610 /// @return FD_NotApplicable if @p UseInst is not speculatable.
611 /// FD_CannotForward if one of @p UseInst's operands is not
612 /// forwardable.
613 /// FD_CanForwardTree if @p UseInst is forwardable.
614 /// FD_DidForward if @p DoIt was true.
615 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
616 Instruction *UseInst,
617 ScopStmt *DefStmt, Loop *DefLoop,
618 bool DoIt) {
619 // PHIs, unless synthesizable, are not yet supported.
620 if (isa<PHINode>(UseInst))
621 return FD_NotApplicable;
623 // Compatible instructions must satisfy the following conditions:
624 // 1. Idempotent (instruction will be copied, not moved; although its
625 // original instance might be removed by simplification)
626 // 2. Not access memory (There might be memory writes between)
627 // 3. Not cause undefined behaviour (we might copy to a location when the
628 // original instruction was no executed; this is currently not possible
629 // because we do not forward PHINodes)
630 // 4. Not leak memory if executed multiple times (i.e. malloc)
632 // Instruction::mayHaveSideEffects is not sufficient because it considers
633 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
634 // not sufficient because it allows memory accesses.
635 if (mayBeMemoryDependent(*UseInst))
636 return FD_NotApplicable;
638 if (DoIt) {
639 // To ensure the right order, prepend this instruction before its
640 // operands. This ensures that its operands are inserted before the
641 // instruction using them.
642 // TODO: The operand tree is not really a tree, but a DAG. We should be
643 // able to handle DAGs without duplication.
644 TargetStmt->prependInstruction(UseInst);
645 NumInstructionsCopied++;
646 TotalInstructionsCopied++;
649 for (Value *OpVal : UseInst->operand_values()) {
650 ForwardingDecision OpDecision =
651 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
652 switch (OpDecision) {
653 case FD_CannotForward:
654 assert(!DoIt);
655 return FD_CannotForward;
657 case FD_CanForwardLeaf:
658 case FD_CanForwardProfitably:
659 assert(!DoIt);
660 break;
662 case FD_DidForwardLeaf:
663 case FD_DidForwardTree:
664 assert(DoIt);
665 break;
667 case FD_NotApplicable:
668 llvm_unreachable("forwardTree should never return FD_NotApplicable");
672 if (DoIt)
673 return FD_DidForwardTree;
674 return FD_CanForwardProfitably;
677 /// Determines whether an operand tree can be forwarded or carries out a
678 /// forwarding, depending on the @p DoIt flag.
680 /// @param TargetStmt The statement the operand tree will be copied to.
681 /// @param UseVal The value (usually an instruction) which is root of an
682 /// operand tree.
683 /// @param UseStmt The statement that uses @p UseVal.
684 /// @param UseLoop The loop @p UseVal is used in.
685 /// @param DoIt If false, only determine whether an operand tree can be
686 /// forwarded. If true, carry out the forwarding. Do not
687 /// use DoIt==true if an operand tree is not known to be
688 /// forwardable.
690 /// @return If DoIt==false, return whether the operand tree can be forwarded.
691 /// If DoIt==true, return FD_DidForward.
692 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
693 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
694 ScopStmt *DefStmt = nullptr;
695 Loop *DefLoop = nullptr;
697 // { DefDomain[] -> TargetDomain[] }
698 isl::map DefToTarget;
700 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
701 switch (VUse.getKind()) {
702 case VirtualUse::Constant:
703 case VirtualUse::Block:
704 case VirtualUse::Hoisted:
705 // These can be used anywhere without special considerations.
706 if (DoIt)
707 return FD_DidForwardTree;
708 return FD_CanForwardLeaf;
710 case VirtualUse::Synthesizable: {
711 // ScopExpander will take care for of generating the code at the new
712 // location.
713 if (DoIt)
714 return FD_DidForwardTree;
716 // Check if the value is synthesizable at the new location as well. This
717 // might be possible when leaving a loop for which ScalarEvolution is
718 // unable to derive the exit value for.
719 // TODO: If there is a LCSSA PHI at the loop exit, use that one.
720 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
721 // do not forward past its loop header. This would require us to use a
722 // previous loop induction variable instead the current one. We currently
723 // do not allow forwarding PHI nodes, thus this should never occur (the
724 // only exception where no phi is necessary being an unreachable loop
725 // without edge from the outside).
726 VirtualUse TargetUse = VirtualUse::create(
727 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
728 if (TargetUse.getKind() == VirtualUse::Synthesizable)
729 return FD_CanForwardLeaf;
731 LLVM_DEBUG(
732 dbgs() << " Synthesizable would not be synthesizable anymore: "
733 << *UseVal << "\n");
734 return FD_CannotForward;
737 case VirtualUse::ReadOnly:
738 // Note that we cannot return FD_CanForwardTree here. With a operand tree
739 // depth of 0, UseVal is the use in TargetStmt that we try to replace.
740 // With -polly-analyze-read-only-scalars=true we would ensure the
741 // existence of a MemoryAccess (which already exists for a leaf) and be
742 // removed again by tryForwardTree because it's goal is to remove this
743 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
744 // to do so.
745 if (!DoIt)
746 return FD_CanForwardLeaf;
748 // If we model read-only scalars, we need to create a MemoryAccess for it.
749 if (ModelReadOnlyScalars)
750 TargetStmt->ensureValueRead(UseVal);
752 NumReadOnlyCopied++;
753 TotalReadOnlyCopied++;
754 return FD_DidForwardLeaf;
756 case VirtualUse::Intra:
757 // Knowing that UseStmt and DefStmt are the same statement instance, just
758 // reuse the information about UseStmt for DefStmt
759 DefStmt = UseStmt;
761 LLVM_FALLTHROUGH;
762 case VirtualUse::Inter:
763 Instruction *Inst = cast<Instruction>(UseVal);
765 if (!DefStmt) {
766 DefStmt = S->getStmtFor(Inst);
767 if (!DefStmt)
768 return FD_CannotForward;
771 DefLoop = LI->getLoopFor(Inst->getParent());
773 ForwardingDecision SpeculativeResult =
774 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
775 if (SpeculativeResult != FD_NotApplicable)
776 return SpeculativeResult;
778 ForwardingDecision KnownResult = forwardKnownLoad(
779 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
780 if (KnownResult != FD_NotApplicable)
781 return KnownResult;
783 ForwardingDecision ReloadResult = reloadKnownContent(
784 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
785 if (ReloadResult != FD_NotApplicable)
786 return ReloadResult;
788 // When no method is found to forward the operand tree, we effectively
789 // cannot handle it.
790 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
791 return FD_CannotForward;
794 llvm_unreachable("Case unhandled");
797 /// Try to forward an operand tree rooted in @p RA.
798 bool tryForwardTree(MemoryAccess *RA) {
799 assert(RA->isLatestScalarKind());
800 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
802 ScopStmt *Stmt = RA->getStatement();
803 Loop *InLoop = Stmt->getSurroundingLoop();
805 isl::map TargetToUse;
806 if (!Known.is_null()) {
807 isl::space DomSpace = Stmt->getDomainSpace();
808 TargetToUse =
809 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
812 ForwardingDecision Assessment =
813 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
814 assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
815 if (Assessment != FD_CanForwardProfitably)
816 return false;
818 ForwardingDecision Execution =
819 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
820 assert(((Execution == FD_DidForwardTree) ||
821 (Execution == FD_DidForwardLeaf)) &&
822 "A previous positive assessment must also be executable");
824 if (Execution == FD_DidForwardTree)
825 Stmt->removeSingleMemoryAccess(RA);
826 return true;
829 /// Return which SCoP this instance is processing.
830 Scop *getScop() const { return S; }
832 /// Run the algorithm: Use value read accesses as operand tree roots and try
833 /// to forward them into the statement.
834 bool forwardOperandTrees() {
835 for (ScopStmt &Stmt : *S) {
836 bool StmtModified = false;
838 // Because we are modifying the MemoryAccess list, collect them first to
839 // avoid iterator invalidation.
840 SmallVector<MemoryAccess *, 16> Accs;
841 for (MemoryAccess *RA : Stmt) {
842 if (!RA->isRead())
843 continue;
844 if (!RA->isLatestScalarKind())
845 continue;
847 Accs.push_back(RA);
850 for (MemoryAccess *RA : Accs) {
851 if (tryForwardTree(RA)) {
852 Modified = true;
853 StmtModified = true;
854 NumForwardedTrees++;
855 TotalForwardedTrees++;
859 if (StmtModified) {
860 NumModifiedStmts++;
861 TotalModifiedStmts++;
865 if (Modified)
866 ScopsModified++;
867 return Modified;
870 /// Print the pass result, performed transformations and the SCoP after the
871 /// transformation.
872 void print(raw_ostream &OS, int Indent = 0) {
873 printStatistics(OS, Indent);
875 if (!Modified) {
876 // This line can easily be checked in regression tests.
877 OS << "ForwardOpTree executed, but did not modify anything\n";
878 return;
881 printStatements(OS, Indent);
885 /// Pass that redirects scalar reads to array elements that are known to contain
886 /// the same value.
888 /// This reduces the number of scalar accesses and therefore potentially
889 /// increases the freedom of the scheduler. In the ideal case, all reads of a
890 /// scalar definition are redirected (We currently do not care about removing
891 /// the write in this case). This is also useful for the main DeLICM pass as
892 /// there are less scalars to be mapped.
893 class ForwardOpTree : public ScopPass {
894 private:
895 /// The pass implementation, also holding per-scop data.
896 std::unique_ptr<ForwardOpTreeImpl> Impl;
898 public:
899 static char ID;
901 explicit ForwardOpTree() : ScopPass(ID) {}
902 ForwardOpTree(const ForwardOpTree &) = delete;
903 ForwardOpTree &operator=(const ForwardOpTree &) = delete;
905 void getAnalysisUsage(AnalysisUsage &AU) const override {
906 AU.addRequiredTransitive<ScopInfoRegionPass>();
907 AU.addRequired<LoopInfoWrapperPass>();
908 AU.setPreservesAll();
911 bool runOnScop(Scop &S) override {
912 // Free resources for previous SCoP's computation, if not yet done.
913 releaseMemory();
915 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
918 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
919 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
921 if (AnalyzeKnown) {
922 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
923 Impl->computeKnownValues();
926 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
927 Impl->forwardOperandTrees();
929 if (MaxOpGuard.hasQuotaExceeded()) {
930 LLVM_DEBUG(dbgs() << "Not all operations completed because of "
931 "max_operations exceeded\n");
932 KnownOutOfQuota++;
936 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
937 LLVM_DEBUG(dbgs() << S);
939 // Update statistics
940 auto ScopStats = S.getStatistics();
941 NumValueWrites += ScopStats.NumValueWrites;
942 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
943 NumPHIWrites += ScopStats.NumPHIWrites;
944 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
945 NumSingletonWrites += ScopStats.NumSingletonWrites;
946 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
948 return false;
951 void printScop(raw_ostream &OS, Scop &S) const override {
952 if (!Impl)
953 return;
955 assert(Impl->getScop() == &S);
956 Impl->print(OS);
959 void releaseMemory() override { Impl.reset(); }
960 }; // class ForwardOpTree
962 char ForwardOpTree::ID;
963 } // namespace
965 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
967 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
968 "Polly - Forward operand tree", false, false)
969 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
970 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
971 "Polly - Forward operand tree", false, false)