1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Undo the effect of Loop Invariant Code Motion (LICM) and
11 // GVN Partial Redundancy Elimination (PRE) on SCoP-level.
13 // Namely, remove register/scalar dependencies by mapping them back to array
16 // The algorithms here work on the scatter space - the image space of the
17 // schedule returned by Scop::getSchedule(). We call an element in that space a
18 // "timepoint". Timepoints are lexicographically ordered such that we can
19 // defined ranges in the scatter space. We use two flavors of such ranges:
20 // Timepoint sets and zones. A timepoint set is simply a subset of the scatter
21 // space and is directly stored as isl_set.
23 // Zones are used to describe the space between timepoints as open sets, i.e.
24 // they do not contain the extrema. Using isl rational sets to express these
25 // would be overkill. We also cannot store them as the integer timepoints they
26 // contain; the (nonempty) zone between 1 and 2 would be empty and
27 // indistinguishable from e.g. the zone between 3 and 4. Also, we cannot store
28 // the integer set including the extrema; the set ]1,2[ + ]3,4[ could be
29 // coalesced to ]1,3[, although we defined the range [2,3] to be not in the set.
30 // Instead, we store the "half-open" integer extrema, including the lower bound,
31 // but excluding the upper bound. Examples:
33 // * The set { [i] : 1 <= i <= 3 } represents the zone ]0,3[ (which contains the
34 // integer points 1 and 2, but not 0 or 3)
36 // * { [1] } represents the zone ]0,1[
38 // * { [i] : i = 1 or i = 3 } represents the zone ]0,1[ + ]2,3[
40 // Therefore, an integer i in the set represents the zone ]i-1,i[, i.e. strictly
41 // speaking the integer points never belong to the zone. However, depending an
42 // the interpretation, one might want to include them. Part of the
43 // interpretation may not be known when the zone is constructed.
45 // Reads are assumed to always take place before writes, hence we can think of
46 // reads taking place at the beginning of a timepoint and writes at the end.
48 // Let's assume that the zone represents the lifetime of a variable. That is,
49 // the zone begins with a write that defines the value during its lifetime and
50 // ends with the last read of that value. In the following we consider whether a
51 // read/write at the beginning/ending of the lifetime zone should be within the
52 // zone or outside of it.
54 // * A read at the timepoint that starts the live-range loads the previous
55 // value. Hence, exclude the timepoint starting the zone.
57 // * A write at the timepoint that starts the live-range is not defined whether
58 // it occurs before or after the write that starts the lifetime. We do not
59 // allow this situation to occur. Hence, we include the timepoint starting the
60 // zone to determine whether they are conflicting.
62 // * A read at the timepoint that ends the live-range reads the same variable.
63 // We include the timepoint at the end of the zone to include that read into
64 // the live-range. Doing otherwise would mean that the two reads access
65 // different values, which would mean that the value they read are both alive
66 // at the same time but occupy the same variable.
68 // * A write at the timepoint that ends the live-range starts a new live-range.
69 // It must not be included in the live-range of the previous definition.
71 // All combinations of reads and writes at the endpoints are possible, but most
72 // of the time only the write->read (for instance, a live-range from definition
73 // to last use) and read->write (for instance, an unused range from last use to
74 // overwrite) and combinations are interesting (half-open ranges). write->write
75 // zones might be useful as well in some context to represent
76 // output-dependencies.
78 // @see convertZoneToTimepoints
81 // The code makes use of maps and sets in many different spaces. To not loose
82 // track in which space a set or map is expected to be in, variables holding an
83 // isl reference are usually annotated in the comments. They roughly follow isl
84 // syntax for spaces, but only the tuples, not the dimensions. The tuples have a
85 // meaning as follows:
87 // * Space[] - An unspecified tuple. Used for function parameters such that the
88 // function caller can use it for anything they like.
90 // * Domain[] - A statement instance as returned by ScopStmt::getDomain()
91 // isl_id_get_name: Stmt_<NameOfBasicBlock>
92 // isl_id_get_user: Pointer to ScopStmt
94 // * Element[] - An array element as in the range part of
95 // MemoryAccess::getAccessRelation()
96 // isl_id_get_name: MemRef_<NameOfArrayVariable>
97 // isl_id_get_user: Pointer to ScopArrayInfo
99 // * Scatter[] - Scatter space or space of timepoints
102 // * Zone[] - Range between timepoints as described above
105 // An annotation "{ Domain[] -> Scatter[] }" therefore means: A map from a
106 // statement instance to a timepoint, aka a schedule. There is only one scatter
107 // space, but most of the time multiple statements are processed in one set.
108 // This is why most of the time isl_union_map has to be used.
110 // The basic algorithm works as follows:
111 // At first we verify that the SCoP is compatible with this technique. For
112 // instance, two writes cannot write to the same location at the same statement
113 // instance because we cannot determine within the polyhedral model which one
114 // comes first. Once this was verified, we compute zones at which an array
115 // element is unused. This computation can fail if it takes too long. Then the
116 // main algorithm is executed. Because every store potentially trails an unused
117 // zone, we start at stores. We search for a scalar (MemoryKind::Value or
118 // MemoryKind::PHI) that we can map to the array element overwritten by the
119 // store, preferably one that is used by the store or at least the ScopStmt.
120 // When it does not conflict with the lifetime of the values in the array
121 // element, the map is applied and the unused zone updated as it is now used. We
122 // continue to try to map scalars to the array element until there are no more
123 // candidates to map. The algorithm is greedy in the sense that the first scalar
124 // not conflicting will be mapped. Other scalars processed later that could have
125 // fit the same unused zone will be rejected. As such the result depends on the
128 //===----------------------------------------------------------------------===//
130 #include "polly/DeLICM.h"
131 #include "polly/Options.h"
132 #include "polly/ScopInfo.h"
133 #include "polly/ScopPass.h"
134 #include "polly/Support/ISLTools.h"
135 #include "llvm/ADT/Statistic.h"
136 #define DEBUG_TYPE "polly-delicm"
138 using namespace polly
;
139 using namespace llvm
;
144 DelicmMaxOps("polly-delicm-max-ops",
145 cl::desc("Maximum number of isl operations to invest for "
146 "lifetime analysis; 0=no limit"),
147 cl::init(1000000), cl::cat(PollyCategory
));
149 STATISTIC(DeLICMAnalyzed
, "Number of successfully analyzed SCoPs");
150 STATISTIC(DeLICMOutOfQuota
,
151 "Analyses aborted because max_operations was reached");
152 STATISTIC(DeLICMIncompatible
, "Number of SCoPs incompatible for analysis");
153 STATISTIC(MappedValueScalars
, "Number of mapped Value scalars");
154 STATISTIC(MappedPHIScalars
, "Number of mapped PHI scalars");
155 STATISTIC(TargetsMapped
, "Number of stores used for at least one mapping");
156 STATISTIC(DeLICMScopsModified
, "Number of SCoPs optimized");
158 /// Class for keeping track of scalar def-use chains in the polyhedral
161 /// MemoryKind::Value:
162 /// There is one definition per llvm::Value or zero (read-only values defined
163 /// before the SCoP) and an arbitrary number of reads.
165 /// MemoryKind::PHI, MemoryKind::ExitPHI:
166 /// There is at least one write (the incoming blocks/stmts) and one
167 /// (MemoryKind::PHI) or zero (MemoryKind::ExitPHI) reads per llvm::PHINode.
168 class ScalarDefUseChains
{
170 /// The definitions (i.e. write MemoryAccess) of a MemoryKind::Value scalar.
171 DenseMap
<const ScopArrayInfo
*, MemoryAccess
*> ValueDefAccs
;
173 /// List of all uses (i.e. read MemoryAccesses) for a MemoryKind::Value
175 DenseMap
<const ScopArrayInfo
*, SmallVector
<MemoryAccess
*, 4>> ValueUseAccs
;
177 /// The receiving part (i.e. read MemoryAccess) of a MemoryKind::PHI scalar.
178 DenseMap
<const ScopArrayInfo
*, MemoryAccess
*> PHIReadAccs
;
180 /// List of all incoming values (write MemoryAccess) of a MemoryKind::PHI or
181 /// MemoryKind::ExitPHI scalar.
182 DenseMap
<const ScopArrayInfo
*, SmallVector
<MemoryAccess
*, 4>>
186 /// Find the MemoryAccesses that access the ScopArrayInfo-represented memory.
188 /// @param S The SCoP to analyze.
189 void compute(Scop
*S
) {
190 // Purge any previous result.
193 for (auto &Stmt
: *S
) {
194 for (auto *MA
: Stmt
) {
195 if (MA
->isOriginalValueKind() && MA
->isWrite()) {
196 auto *SAI
= MA
->getScopArrayInfo();
197 assert(!ValueDefAccs
.count(SAI
) &&
198 "There can be at most one "
199 "definition per MemoryKind::Value scalar");
200 ValueDefAccs
[SAI
] = MA
;
203 if (MA
->isOriginalValueKind() && MA
->isRead())
204 ValueUseAccs
[MA
->getScopArrayInfo()].push_back(MA
);
206 if (MA
->isOriginalAnyPHIKind() && MA
->isRead()) {
207 auto *SAI
= MA
->getScopArrayInfo();
208 assert(!PHIReadAccs
.count(SAI
) &&
209 "There must be exactly one read "
210 "per PHI (that's where the PHINode is)");
211 PHIReadAccs
[SAI
] = MA
;
214 if (MA
->isOriginalAnyPHIKind() && MA
->isWrite())
215 PHIIncomingAccs
[MA
->getScopArrayInfo()].push_back(MA
);
220 /// Free all memory used by the analysis.
222 ValueDefAccs
.clear();
223 ValueUseAccs
.clear();
225 PHIIncomingAccs
.clear();
228 MemoryAccess
*getValueDef(const ScopArrayInfo
*SAI
) const {
229 return ValueDefAccs
.lookup(SAI
);
232 ArrayRef
<MemoryAccess
*> getValueUses(const ScopArrayInfo
*SAI
) const {
233 auto It
= ValueUseAccs
.find(SAI
);
234 if (It
== ValueUseAccs
.end())
239 MemoryAccess
*getPHIRead(const ScopArrayInfo
*SAI
) const {
240 return PHIReadAccs
.lookup(SAI
);
243 ArrayRef
<MemoryAccess
*> getPHIIncomings(const ScopArrayInfo
*SAI
) const {
244 auto It
= PHIIncomingAccs
.find(SAI
);
245 if (It
== PHIIncomingAccs
.end())
251 IslPtr
<isl_union_map
> computeReachingDefinition(IslPtr
<isl_union_map
> Schedule
,
252 IslPtr
<isl_union_map
> Writes
,
253 bool InclDef
, bool InclRedef
) {
254 return computeReachingWrite(Schedule
, Writes
, false, InclDef
, InclRedef
);
257 IslPtr
<isl_union_map
> computeReachingOverwrite(IslPtr
<isl_union_map
> Schedule
,
258 IslPtr
<isl_union_map
> Writes
,
260 bool InclOverwrite
) {
261 return computeReachingWrite(Schedule
, Writes
, true, InclPrevWrite
,
265 /// Compute the next overwrite for a scalar.
267 /// @param Schedule { DomainWrite[] -> Scatter[] }
268 /// Schedule of (at least) all writes. Instances not in @p
269 /// Writes are ignored.
270 /// @param Writes { DomainWrite[] }
271 /// The element instances that write to the scalar.
272 /// @param InclPrevWrite Whether to extend the timepoints to include
273 /// the timepoint where the previous write happens.
274 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint
275 /// of the overwrite itself.
277 /// @return { Scatter[] -> DomainDef[] }
278 IslPtr
<isl_union_map
>
279 computeScalarReachingOverwrite(IslPtr
<isl_union_map
> Schedule
,
280 IslPtr
<isl_union_set
> Writes
, bool InclPrevWrite
,
281 bool InclOverwrite
) {
284 auto WritesMap
= give(isl_union_map_from_domain(Writes
.take()));
286 // { [Element[] -> Scatter[]] -> DomainWrite[] }
287 auto Result
= computeReachingOverwrite(
288 std::move(Schedule
), std::move(WritesMap
), InclPrevWrite
, InclOverwrite
);
290 return give(isl_union_map_domain_factor_range(Result
.take()));
293 /// Overload of computeScalarReachingOverwrite, with only one writing statement.
294 /// Consequently, the result consists of only one map space.
296 /// @param Schedule { DomainWrite[] -> Scatter[] }
297 /// @param Writes { DomainWrite[] }
298 /// @param InclPrevWrite Include the previous write to result.
299 /// @param InclOverwrite Include the overwrite to the result.
301 /// @return { Scatter[] -> DomainWrite[] }
302 IslPtr
<isl_map
> computeScalarReachingOverwrite(IslPtr
<isl_union_map
> Schedule
,
303 IslPtr
<isl_set
> Writes
,
305 bool InclOverwrite
) {
306 auto ScatterSpace
= getScatterSpace(Schedule
);
307 auto DomSpace
= give(isl_set_get_space(Writes
.keep()));
309 auto ReachOverwrite
= computeScalarReachingOverwrite(
310 Schedule
, give(isl_union_set_from_set(Writes
.take())), InclPrevWrite
,
313 auto ResultSpace
= give(isl_space_map_from_domain_and_range(
314 ScatterSpace
.take(), DomSpace
.take()));
315 return singleton(std::move(ReachOverwrite
), ResultSpace
);
318 /// Compute the reaching definition of a scalar.
320 /// Compared to computeReachingDefinition, there is just one element which is
321 /// accessed and therefore only a set if instances that accesses that element is
324 /// @param Schedule { DomainWrite[] -> Scatter[] }
325 /// @param Writes { DomainWrite[] }
326 /// @param InclDef Include the timepoint of the definition to the result.
327 /// @param InclRedef Include the timepoint of the overwrite into the result.
329 /// @return { Scatter[] -> DomainWrite[] }
330 IslPtr
<isl_union_map
>
331 computeScalarReachingDefinition(IslPtr
<isl_union_map
> Schedule
,
332 IslPtr
<isl_union_set
> Writes
, bool InclDef
,
335 // { DomainWrite[] -> Element[] }
336 auto Defs
= give(isl_union_map_from_domain(Writes
.take()));
338 // { [Element[] -> Scatter[]] -> DomainWrite[] }
340 computeReachingDefinition(Schedule
, Defs
, InclDef
, InclRedef
);
342 // { Scatter[] -> DomainWrite[] }
343 return give(isl_union_set_unwrap(
344 isl_union_map_range(isl_union_map_curry(ReachDefs
.take()))));
347 /// Compute the reaching definition of a scalar.
349 /// This overload accepts only a single writing statement as an isl_map,
350 /// consequently the result also is only a single isl_map.
352 /// @param Schedule { DomainWrite[] -> Scatter[] }
353 /// @param Writes { DomainWrite[] }
354 /// @param InclDef Include the timepoint of the definition to the result.
355 /// @param InclRedef Include the timepoint of the overwrite into the result.
357 /// @return { Scatter[] -> DomainWrite[] }
358 IslPtr
<isl_map
> computeScalarReachingDefinition( // { Domain[] -> Zone[] }
359 IslPtr
<isl_union_map
> Schedule
, IslPtr
<isl_set
> Writes
, bool InclDef
,
361 auto DomainSpace
= give(isl_set_get_space(Writes
.keep()));
362 auto ScatterSpace
= getScatterSpace(Schedule
);
364 // { Scatter[] -> DomainWrite[] }
365 auto UMap
= computeScalarReachingDefinition(
366 Schedule
, give(isl_union_set_from_set(Writes
.take())), InclDef
,
369 auto ResultSpace
= give(isl_space_map_from_domain_and_range(
370 ScatterSpace
.take(), DomainSpace
.take()));
371 return singleton(UMap
, ResultSpace
);
374 /// If InputVal is not defined in the stmt itself, return the MemoryAccess that
375 /// reads the scalar. Return nullptr otherwise (if the value is defined in the
376 /// scop, or is synthesizable).
377 MemoryAccess
*getInputAccessOf(Value
*InputVal
, ScopStmt
*Stmt
) {
378 for (auto *MA
: *Stmt
) {
381 if (!MA
->isLatestScalarKind())
384 assert(MA
->getAccessValue() == MA
->getBaseAddr());
385 if (MA
->getAccessValue() == InputVal
)
391 /// Represent the knowledge of the contents of any array elements in any zone or
392 /// the knowledge we would add when mapping a scalar to an array element.
394 /// Every array element at every zone unit has one of two states:
396 /// - Unused: Not occupied by any value so a transformation can change it to
399 /// - Occupied: The element contains a value that is still needed.
401 /// The union of Unused and Unknown zones forms the universe, the set of all
402 /// elements at every timepoint. The universe can easily be derived from the
403 /// array elements that are accessed someway. Arrays that are never accessed
404 /// also never play a role in any computation and can hence be ignored. With a
405 /// given universe, only one of the sets needs to stored implicitly. Computing
406 /// the complement is also an expensive operation, hence this class has been
407 /// designed that only one of sets is needed while the other is assumed to be
408 /// implicit. It can still be given, but is mostly ignored.
410 /// There are two use cases for the Knowledge class:
412 /// 1) To represent the knowledge of the current state of ScopInfo. The unused
413 /// state means that an element is currently unused: there is no read of it
414 /// before the next overwrite. Also called 'Existing'.
416 /// 2) To represent the requirements for mapping a scalar to array elements. The
417 /// unused state means that there is no change/requirement. Also called
420 /// In addition to these states at unit zones, Knowledge needs to know when
421 /// values are written. This is because written values may have no lifetime (one
422 /// reason is that the value is never read). Such writes would therefore never
423 /// conflict, but overwrite values that might still be required. Another source
424 /// of problems are multiple writes to the same element at the same timepoint,
425 /// because their order is undefined.
428 /// { [Element[] -> Zone[]] }
429 /// Set of array elements and when they are alive.
430 /// Can contain a nullptr; in this case the set is implicitly defined as the
431 /// complement of #Unused.
433 /// The set of alive array elements is represented as zone, as the set of live
434 /// values can differ depending on how the elements are interpreted.
435 /// Assuming a value X is written at timestep [0] and read at timestep [1]
436 /// without being used at any later point, then the value is alive in the
437 /// interval ]0,1[. This interval cannot be represented by an integer set, as
438 /// it does not contain any integer point. Zones allow us to represent this
439 /// interval and can be converted to sets of timepoints when needed (e.g., in
440 /// isConflicting when comparing to the write sets).
441 /// @see convertZoneToTimepoints and this file's comment for more details.
442 IslPtr
<isl_union_set
> Occupied
;
444 /// { [Element[] -> Zone[]] }
445 /// Set of array elements when they are not alive, i.e. their memory can be
446 /// used for other purposed. Can contain a nullptr; in this case the set is
447 /// implicitly defined as the complement of #Occupied.
448 IslPtr
<isl_union_set
> Unused
;
450 /// { [Element[] -> Scatter[]] }
451 /// The write actions currently in the scop or that would be added when
452 /// mapping a scalar.
453 IslPtr
<isl_union_set
> Written
;
455 /// Check whether this Knowledge object is well-formed.
456 void checkConsistency() const {
458 // Default-initialized object
459 if (!Occupied
&& !Unused
&& !Written
)
462 assert(Occupied
|| Unused
);
465 // If not all fields are defined, we cannot derived the universe.
466 if (!Occupied
|| !Unused
)
469 assert(isl_union_set_is_disjoint(Occupied
.keep(), Unused
.keep()) ==
471 auto Universe
= give(isl_union_set_union(Occupied
.copy(), Unused
.copy()));
472 assert(isl_union_set_is_subset(Written
.keep(), Universe
.keep()) ==
478 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do
479 /// not use such an object.
482 /// Create a new object with the given members.
483 Knowledge(IslPtr
<isl_union_set
> Occupied
, IslPtr
<isl_union_set
> Unused
,
484 IslPtr
<isl_union_set
> Written
)
485 : Occupied(std::move(Occupied
)), Unused(std::move(Unused
)),
486 Written(std::move(Written
)) {
490 /// Alternative constructor taking isl_sets instead isl_union_sets.
491 Knowledge(IslPtr
<isl_set
> Occupied
, IslPtr
<isl_set
> Unused
,
492 IslPtr
<isl_set
> Written
)
493 : Knowledge(give(isl_union_set_from_set(Occupied
.take())),
494 give(isl_union_set_from_set(Unused
.take())),
495 give(isl_union_set_from_set(Written
.take()))) {}
497 /// Return whether this object was not default-constructed.
498 bool isUsable() const { return (Occupied
|| Unused
) && Written
; }
500 /// Print the content of this object to @p OS.
501 void print(llvm::raw_ostream
&OS
, unsigned Indent
= 0) const {
504 OS
.indent(Indent
) << "Occupied: " << Occupied
<< "\n";
506 OS
.indent(Indent
) << "Occupied: <Everything else not in Unused>\n";
508 OS
.indent(Indent
) << "Unused: " << Unused
<< "\n";
510 OS
.indent(Indent
) << "Unused: <Everything else not in Occupied>\n";
511 OS
.indent(Indent
) << "Written : " << Written
<< '\n';
513 OS
.indent(Indent
) << "Invalid knowledge\n";
517 /// Combine two knowledges, this and @p That.
518 void learnFrom(Knowledge That
) {
519 assert(!isConflicting(*this, That
));
520 assert(Unused
&& That
.Occupied
);
523 "This function is only prepared to learn occupied elements from That");
524 assert(!Occupied
&& "This function does not implement "
526 "give(isl_union_set_union(this->Occupied.take(), "
527 "That.Occupied.copy()));`");
529 Unused
= give(isl_union_set_subtract(Unused
.take(), That
.Occupied
.copy()));
530 Written
= give(isl_union_set_union(Written
.take(), That
.Written
.take()));
535 /// Determine whether two Knowledges conflict with each other.
537 /// In theory @p Existing and @p Proposed are symmetric, but the
538 /// implementation is constrained by the implicit interpretation. That is, @p
539 /// Existing must have #Unused defined (use case 1) and @p Proposed must have
540 /// #Occupied defined (use case 1).
542 /// A conflict is defined as non-preserved semantics when they are merged. For
543 /// instance, when for the same array and zone they assume different
546 /// @param Existing One of the knowledges with #Unused defined.
547 /// @param Proposed One of the knowledges with #Occupied defined.
548 /// @param OS Dump the conflict reason to this output stream; use
549 /// nullptr to not output anything.
550 /// @param Indent Indention for the conflict reason.
552 /// @return True, iff the two knowledges are conflicting.
553 static bool isConflicting(const Knowledge
&Existing
,
554 const Knowledge
&Proposed
,
555 llvm::raw_ostream
*OS
= nullptr,
556 unsigned Indent
= 0) {
557 assert(Existing
.Unused
);
558 assert(Proposed
.Occupied
);
561 if (Existing
.Occupied
&& Proposed
.Unused
) {
562 auto ExistingUniverse
= give(isl_union_set_union(Existing
.Occupied
.copy(),
563 Existing
.Unused
.copy()));
564 auto ProposedUniverse
= give(isl_union_set_union(Proposed
.Occupied
.copy(),
565 Proposed
.Unused
.copy()));
566 assert(isl_union_set_is_equal(ExistingUniverse
.keep(),
567 ProposedUniverse
.keep()) == isl_bool_true
&&
568 "Both inputs' Knowledges must be over the same universe");
572 // Are the new lifetimes required for Proposed unused in Existing?
573 if (isl_union_set_is_subset(Proposed
.Occupied
.keep(),
574 Existing
.Unused
.keep()) != isl_bool_true
) {
576 auto ConflictingLifetimes
= give(isl_union_set_subtract(
577 Proposed
.Occupied
.copy(), Existing
.Unused
.copy()));
578 OS
->indent(Indent
) << "Proposed lifetimes are not unused in existing\n";
579 OS
->indent(Indent
) << "Conflicting lifetimes: " << ConflictingLifetimes
585 // Do the writes in Existing only overwrite unused values in Proposed?
586 // We convert here the set of lifetimes to actual timepoints. A lifetime is
587 // in conflict with a set of write timepoints, if either a live timepoint is
588 // clearly within the lifetime or if a write happens at the beginning of the
589 // lifetime (where it would conflict with the value that actually writes the
590 // value alive). There is no conflict at the end of a lifetime, as the alive
591 // value will always be read, before it is overwritten again. The last
592 // property holds in Polly for all scalar values and we expect all users of
593 // Knowledge to check this property also for accesses to MemoryKind::Array.
594 auto ProposedFixedDefs
=
595 convertZoneToTimepoints(Proposed
.Occupied
, true, false);
596 if (isl_union_set_is_disjoint(Existing
.Written
.keep(),
597 ProposedFixedDefs
.keep()) != isl_bool_true
) {
599 auto ConflictingWrites
= give(isl_union_set_intersect(
600 Existing
.Written
.copy(), ProposedFixedDefs
.copy()));
601 OS
->indent(Indent
) << "Proposed writes into range used by existing\n";
602 OS
->indent(Indent
) << "Conflicting writes: " << ConflictingWrites
608 // Do the new writes in Proposed only overwrite unused values in Existing?
609 auto ExistingAvailableDefs
=
610 convertZoneToTimepoints(Existing
.Unused
, true, false);
611 if (isl_union_set_is_subset(Proposed
.Written
.keep(),
612 ExistingAvailableDefs
.keep()) !=
615 auto ConflictingWrites
= give(isl_union_set_subtract(
616 Proposed
.Written
.copy(), ExistingAvailableDefs
.copy()));
618 << "Proposed a lifetime where there is an Existing write into it\n";
619 OS
->indent(Indent
) << "Conflicting writes: " << ConflictingWrites
625 // Does Proposed write at the same time as Existing already does (order of
626 // writes is undefined)?
627 if (isl_union_set_is_disjoint(Existing
.Written
.keep(),
628 Proposed
.Written
.keep()) != isl_bool_true
) {
630 auto ConflictingWrites
= give(isl_union_set_intersect(
631 Existing
.Written
.copy(), Proposed
.Written
.copy()));
632 OS
->indent(Indent
) << "Proposed writes at the same time as an already "
634 OS
->indent(Indent
) << "Conflicting writes: " << ConflictingWrites
644 std::string
printIntruction(Instruction
*Instr
, bool IsForDebug
= false) {
646 raw_string_ostream
OS(Result
);
647 Instr
->print(OS
, IsForDebug
);
650 while (i
< Result
.size() && Result
[i
] == ' ')
652 return Result
.substr(i
);
655 /// Base class for algorithms based on zones, like DeLICM.
656 class ZoneAlgorithm
{
658 /// Hold a reference to the isl_ctx to avoid it being freed before we released
659 /// all of the isl objects.
661 /// This must be declared before any other member that holds an isl object.
662 /// This guarantees that the shared_ptr and its isl_ctx is destructed last,
663 /// after all other members free'd the isl objects they were holding.
664 std::shared_ptr
<isl_ctx
> IslCtx
;
666 /// Cached reaching definitions for each ScopStmt.
668 /// Use getScalarReachingDefinition() to get its contents.
669 DenseMap
<ScopStmt
*, IslPtr
<isl_map
>> ScalarReachDefZone
;
671 /// The analyzed Scop.
674 /// Parameter space that does not need realignment.
675 IslPtr
<isl_space
> ParamSpace
;
677 /// Space the schedule maps to.
678 IslPtr
<isl_space
> ScatterSpace
;
680 /// Cached version of the schedule and domains.
681 IslPtr
<isl_union_map
> Schedule
;
683 /// Set of all referenced elements.
684 /// { Element[] -> Element[] }
685 IslPtr
<isl_union_set
> AllElements
;
687 /// Combined access relations of all MemoryKind::Array READ accesses.
688 /// { DomainRead[] -> Element[] }
689 IslPtr
<isl_union_map
> AllReads
;
691 /// Combined access relations of all MemoryKind::Array, MAY_WRITE accesses.
692 /// { DomainMayWrite[] -> Element[] }
693 IslPtr
<isl_union_map
> AllMayWrites
;
695 /// Combined access relations of all MemoryKind::Array, MUST_WRITE accesses.
696 /// { DomainMustWrite[] -> Element[] }
697 IslPtr
<isl_union_map
> AllMustWrites
;
699 /// Prepare the object before computing the zones of @p S.
700 ZoneAlgorithm(Scop
*S
)
701 : IslCtx(S
->getSharedIslCtx()), S(S
), Schedule(give(S
->getSchedule())) {
703 auto Domains
= give(S
->getDomains());
706 give(isl_union_map_intersect_domain(Schedule
.take(), Domains
.take()));
707 ParamSpace
= give(isl_union_map_get_space(Schedule
.keep()));
708 ScatterSpace
= getScatterSpace(Schedule
);
712 /// Check whether @p Stmt can be accurately analyzed by zones.
714 /// What violates our assumptions:
715 /// - A load after a write of the same location; we assume that all reads
716 /// occur before the writes.
717 /// - Two writes to the same location; we cannot model the order in which
720 /// Scalar reads implicitly always occur before other accesses therefore never
721 /// violate the first condition. There is also at most one write to a scalar,
722 /// satisfying the second condition.
723 bool isCompatibleStmt(ScopStmt
*Stmt
) {
724 auto Stores
= makeEmptyUnionMap();
725 auto Loads
= makeEmptyUnionMap();
727 // This assumes that the MemoryKind::Array MemoryAccesses are iterated in
729 for (auto *MA
: *Stmt
) {
730 if (!MA
->isLatestArrayKind())
734 give(isl_union_map_from_map(getAccessRelationFor(MA
).take()));
737 // Reject load after store to same location.
738 if (!isl_union_map_is_disjoint(Stores
.keep(), AccRel
.keep())) {
739 OptimizationRemarkMissed
R(DEBUG_TYPE
, "LoadAfterStore",
740 MA
->getAccessInstruction());
741 R
<< "load after store of same element in same statement";
742 R
<< " (previous stores: " << Stores
;
743 R
<< ", loading: " << AccRel
<< ")";
744 S
->getFunction().getContext().diagnose(R
);
748 Loads
= give(isl_union_map_union(Loads
.take(), AccRel
.take()));
753 if (!isa
<StoreInst
>(MA
->getAccessInstruction())) {
754 DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n");
755 OptimizationRemarkMissed
R(DEBUG_TYPE
, "UnusualStore",
756 MA
->getAccessInstruction());
757 R
<< "encountered write that is not a StoreInst: "
758 << printIntruction(MA
->getAccessInstruction());
759 S
->getFunction().getContext().diagnose(R
);
763 // In region statements the order is less clear, eg. the load and store
764 // might be in a boxed loop.
765 if (Stmt
->isRegionStmt() &&
766 !isl_union_map_is_disjoint(Loads
.keep(), AccRel
.keep())) {
767 OptimizationRemarkMissed
R(DEBUG_TYPE
, "StoreInSubregion",
768 MA
->getAccessInstruction());
769 R
<< "store is in a non-affine subregion";
770 S
->getFunction().getContext().diagnose(R
);
774 // Do not allow more than one store to the same location.
775 if (!isl_union_map_is_disjoint(Stores
.keep(), AccRel
.keep())) {
776 OptimizationRemarkMissed
R(DEBUG_TYPE
, "StoreAfterStore",
777 MA
->getAccessInstruction());
778 R
<< "store after store of same element in same statement";
779 R
<< " (previous stores: " << Stores
;
780 R
<< ", storing: " << AccRel
<< ")";
781 S
->getFunction().getContext().diagnose(R
);
785 Stores
= give(isl_union_map_union(Stores
.take(), AccRel
.take()));
791 void addArrayReadAccess(MemoryAccess
*MA
) {
792 assert(MA
->isLatestArrayKind());
793 assert(MA
->isRead());
795 // { DomainRead[] -> Element[] }
796 auto AccRel
= getAccessRelationFor(MA
);
797 AllReads
= give(isl_union_map_add_map(AllReads
.take(), AccRel
.copy()));
800 void addArrayWriteAccess(MemoryAccess
*MA
) {
801 assert(MA
->isLatestArrayKind());
802 assert(MA
->isWrite());
804 // { Domain[] -> Element[] }
805 auto AccRel
= getAccessRelationFor(MA
);
807 if (MA
->isMustWrite())
809 give(isl_union_map_add_map(AllMustWrites
.take(), AccRel
.copy()));
811 if (MA
->isMayWrite())
813 give(isl_union_map_add_map(AllMayWrites
.take(), AccRel
.copy()));
817 IslPtr
<isl_union_set
> makeEmptyUnionSet() {
818 return give(isl_union_set_empty(ParamSpace
.copy()));
821 IslPtr
<isl_union_map
> makeEmptyUnionMap() {
822 return give(isl_union_map_empty(ParamSpace
.copy()));
825 /// Check whether @p S can be accurately analyzed by zones.
826 bool isCompatibleScop() {
827 for (auto &Stmt
: *S
) {
828 if (!isCompatibleStmt(&Stmt
))
834 /// Get the schedule for @p Stmt.
836 /// The domain of the result is as narrow as possible.
837 IslPtr
<isl_map
> getScatterFor(ScopStmt
*Stmt
) const {
838 auto ResultSpace
= give(isl_space_map_from_domain_and_range(
839 Stmt
->getDomainSpace(), ScatterSpace
.copy()));
840 return give(isl_union_map_extract_map(Schedule
.keep(), ResultSpace
.take()));
843 /// Get the schedule of @p MA's parent statement.
844 IslPtr
<isl_map
> getScatterFor(MemoryAccess
*MA
) const {
845 return getScatterFor(MA
->getStatement());
848 /// Get the schedule for the statement instances of @p Domain.
849 IslPtr
<isl_union_map
> getScatterFor(IslPtr
<isl_union_set
> Domain
) const {
850 return give(isl_union_map_intersect_domain(Schedule
.copy(), Domain
.take()));
853 /// Get the schedule for the statement instances of @p Domain.
854 IslPtr
<isl_map
> getScatterFor(IslPtr
<isl_set
> Domain
) const {
855 auto ResultSpace
= give(isl_space_map_from_domain_and_range(
856 isl_set_get_space(Domain
.keep()), ScatterSpace
.copy()));
857 auto UDomain
= give(isl_union_set_from_set(Domain
.copy()));
858 auto UResult
= getScatterFor(std::move(UDomain
));
859 auto Result
= singleton(std::move(UResult
), std::move(ResultSpace
));
860 assert(isl_set_is_equal(give(isl_map_domain(Result
.copy())).keep(),
861 Domain
.keep()) == isl_bool_true
);
865 /// Get the domain of @p Stmt.
866 IslPtr
<isl_set
> getDomainFor(ScopStmt
*Stmt
) const {
867 return give(Stmt
->getDomain());
870 /// Get the domain @p MA's parent statement.
871 IslPtr
<isl_set
> getDomainFor(MemoryAccess
*MA
) const {
872 return getDomainFor(MA
->getStatement());
875 /// Get the access relation of @p MA.
877 /// The domain of the result is as narrow as possible.
878 IslPtr
<isl_map
> getAccessRelationFor(MemoryAccess
*MA
) const {
879 auto Domain
= getDomainFor(MA
);
880 auto AccRel
= give(MA
->getLatestAccessRelation());
881 return give(isl_map_intersect_domain(AccRel
.take(), Domain
.take()));
884 /// Get the reaching definition of a scalar defined in @p Stmt.
886 /// Note that this does not depend on the llvm::Instruction, only on the
887 /// statement it is defined in. Therefore the same computation can be reused.
889 /// @param Stmt The statement in which a scalar is defined.
891 /// @return { Scatter[] -> DomainDef[] }
892 IslPtr
<isl_map
> getScalarReachingDefinition(ScopStmt
*Stmt
) {
893 auto &Result
= ScalarReachDefZone
[Stmt
];
897 auto Domain
= getDomainFor(Stmt
);
898 Result
= computeScalarReachingDefinition(Schedule
, Domain
, false, true);
905 /// Compute the different zones.
906 void computeCommon() {
907 AllReads
= makeEmptyUnionMap();
908 AllMayWrites
= makeEmptyUnionMap();
909 AllMustWrites
= makeEmptyUnionMap();
911 for (auto &Stmt
: *S
) {
912 for (auto *MA
: Stmt
) {
913 if (!MA
->isLatestArrayKind())
917 addArrayReadAccess(MA
);
920 addArrayWriteAccess(MA
);
924 // { DomainWrite[] -> Element[] }
926 give(isl_union_map_union(AllMustWrites
.copy(), AllMayWrites
.copy()));
929 AllElements
= makeEmptyUnionSet();
930 foreachElt(AllWrites
, [this](IslPtr
<isl_map
> Write
) {
931 auto Space
= give(isl_map_get_space(Write
.keep()));
932 auto EltSpace
= give(isl_space_range(Space
.take()));
933 auto EltUniv
= give(isl_set_universe(EltSpace
.take()));
935 give(isl_union_set_add_set(AllElements
.take(), EltUniv
.take()));
939 /// Print the current state of all MemoryAccesses to @p.
940 void printAccesses(llvm::raw_ostream
&OS
, int Indent
= 0) const {
941 OS
.indent(Indent
) << "After accesses {\n";
942 for (auto &Stmt
: *S
) {
943 OS
.indent(Indent
+ 4) << Stmt
.getBaseName() << "\n";
944 for (auto *MA
: Stmt
)
947 OS
.indent(Indent
) << "}\n";
951 /// Return the SCoP this object is analyzing.
952 Scop
*getScop() const { return S
; }
955 /// Implementation of the DeLICM/DePRE transformation.
956 class DeLICMImpl
: public ZoneAlgorithm
{
958 /// Knowledge before any transformation took place.
959 Knowledge OriginalZone
;
961 /// Current knowledge of the SCoP including all already applied
965 ScalarDefUseChains DefUse
;
967 /// Determine whether two knowledges are conflicting with each other.
969 /// @see Knowledge::isConflicting
970 bool isConflicting(const Knowledge
&Proposed
) {
971 raw_ostream
*OS
= nullptr;
972 DEBUG(OS
= &llvm::dbgs());
973 return Knowledge::isConflicting(Zone
, Proposed
, OS
, 4);
976 /// Determine whether @p SAI is a scalar that can be mapped to an array
978 bool isMappable(const ScopArrayInfo
*SAI
) {
981 if (SAI
->isValueKind()) {
982 auto *MA
= DefUse
.getValueDef(SAI
);
985 << " Reject because value is read-only within the scop\n");
989 // Mapping if value is used after scop is not supported. The code
990 // generator would need to reload the scalar after the scop, but it
991 // does not have the information to where it is mapped to. Only the
992 // MemoryAccesses have that information, not the ScopArrayInfo.
993 auto Inst
= MA
->getAccessInstruction();
994 for (auto User
: Inst
->users()) {
995 if (!isa
<Instruction
>(User
))
997 auto UserInst
= cast
<Instruction
>(User
);
999 if (!S
->contains(UserInst
)) {
1000 DEBUG(dbgs() << " Reject because value is escaping\n");
1008 if (SAI
->isPHIKind()) {
1009 auto *MA
= DefUse
.getPHIRead(SAI
);
1012 // Mapping of an incoming block from before the SCoP is not supported by
1013 // the code generator.
1014 auto PHI
= cast
<PHINode
>(MA
->getAccessInstruction());
1015 for (auto Incoming
: PHI
->blocks()) {
1016 if (!S
->contains(Incoming
)) {
1017 DEBUG(dbgs() << " Reject because at least one incoming block is "
1018 "not in the scop region\n");
1026 DEBUG(dbgs() << " Reject ExitPHI or other non-value\n");
1030 /// Compute the uses of a MemoryKind::Value and its lifetime (from its
1031 /// definition to the last use).
1033 /// @param SAI The ScopArrayInfo representing the value's storage.
1035 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] }
1036 /// First element is the set of uses for each definition.
1037 /// The second is the lifetime of each definition.
1038 std::tuple
<IslPtr
<isl_union_map
>, IslPtr
<isl_map
>>
1039 computeValueUses(const ScopArrayInfo
*SAI
) {
1040 assert(SAI
->isValueKind());
1043 auto Reads
= makeEmptyUnionSet();
1046 for (auto *MA
: DefUse
.getValueUses(SAI
))
1048 give(isl_union_set_add_set(Reads
.take(), getDomainFor(MA
).take()));
1050 // { DomainRead[] -> Scatter[] }
1051 auto ReadSchedule
= getScatterFor(Reads
);
1053 auto *DefMA
= DefUse
.getValueDef(SAI
);
1057 auto Writes
= getDomainFor(DefMA
);
1059 // { DomainDef[] -> Scatter[] }
1060 auto WriteScatter
= getScatterFor(Writes
);
1062 // { Scatter[] -> DomainDef[] }
1063 auto ReachDef
= getScalarReachingDefinition(DefMA
->getStatement());
1065 // { [DomainDef[] -> Scatter[]] -> DomainUse[] }
1067 isl_union_map_apply_range(isl_union_map_from_map(isl_map_range_map(
1068 isl_map_reverse(ReachDef
.take()))),
1069 isl_union_map_reverse(ReadSchedule
.take())));
1071 // { DomainDef[] -> Scatter[] }
1073 singleton(give(isl_union_set_unwrap(isl_union_map_domain(Uses
.copy()))),
1074 give(isl_space_map_from_domain_and_range(
1075 isl_set_get_space(Writes
.keep()), ScatterSpace
.copy())));
1077 // { DomainDef[] -> Zone[] }
1078 auto Lifetime
= betweenScatter(WriteScatter
, UseScatter
, false, true);
1080 // { DomainDef[] -> DomainRead[] }
1081 auto DefUses
= give(isl_union_map_domain_factor_domain(Uses
.take()));
1083 return std::make_pair(DefUses
, Lifetime
);
1086 /// For each 'execution' of a PHINode, get the incoming block that was
1087 /// executed before.
1089 /// For each PHI instance we can directly determine which was the incoming
1090 /// block, and hence derive which value the PHI has.
1092 /// @param SAI The ScopArrayInfo representing the PHI's storage.
1094 /// @return { DomainPHIRead[] -> DomainPHIWrite[] }
1095 IslPtr
<isl_union_map
> computePerPHI(const ScopArrayInfo
*SAI
) {
1096 assert(SAI
->isPHIKind());
1098 // { DomainPHIWrite[] -> Scatter[] }
1099 auto PHIWriteScatter
= makeEmptyUnionMap();
1101 // Collect all incoming block timepoint.
1102 for (auto *MA
: DefUse
.getPHIIncomings(SAI
)) {
1103 auto Scatter
= getScatterFor(MA
);
1105 give(isl_union_map_add_map(PHIWriteScatter
.take(), Scatter
.take()));
1108 // { DomainPHIRead[] -> Scatter[] }
1109 auto PHIReadScatter
= getScatterFor(DefUse
.getPHIRead(SAI
));
1111 // { DomainPHIRead[] -> Scatter[] }
1112 auto BeforeRead
= beforeScatter(PHIReadScatter
, true);
1115 auto WriteTimes
= singleton(
1116 give(isl_union_map_range(PHIWriteScatter
.copy())), ScatterSpace
);
1118 // { DomainPHIRead[] -> Scatter[] }
1119 auto PHIWriteTimes
=
1120 give(isl_map_intersect_range(BeforeRead
.take(), WriteTimes
.take()));
1121 auto LastPerPHIWrites
= give(isl_map_lexmax(PHIWriteTimes
.take()));
1123 // { DomainPHIRead[] -> DomainPHIWrite[] }
1124 auto Result
= give(isl_union_map_apply_range(
1125 isl_union_map_from_map(LastPerPHIWrites
.take()),
1126 isl_union_map_reverse(PHIWriteScatter
.take())));
1127 assert(isl_union_map_is_single_valued(Result
.keep()) == isl_bool_true
);
1128 assert(isl_union_map_is_injective(Result
.keep()) == isl_bool_true
);
1132 /// Try to map a MemoryKind::Value to a given array element.
1134 /// @param SAI Representation of the scalar's memory to map.
1135 /// @param TargetElt { Scatter[] -> Element[] }
1136 /// Suggestion where to map a scalar to when at a timepoint.
1138 /// @return true if the scalar was successfully mapped.
1139 bool tryMapValue(const ScopArrayInfo
*SAI
, IslPtr
<isl_map
> TargetElt
) {
1140 assert(SAI
->isValueKind());
1142 auto *DefMA
= DefUse
.getValueDef(SAI
);
1143 assert(DefMA
->isValueKind());
1144 assert(DefMA
->isMustWrite());
1146 // Stop if the scalar has already been mapped.
1147 if (!DefMA
->getLatestScopArrayInfo()->isValueKind())
1150 // { DomainDef[] -> Scatter[] }
1151 auto DefSched
= getScatterFor(DefMA
);
1153 // Where each write is mapped to, according to the suggestion.
1154 // { DomainDef[] -> Element[] }
1155 auto DefTarget
= give(isl_map_apply_domain(
1156 TargetElt
.copy(), isl_map_reverse(DefSched
.copy())));
1157 simplify(DefTarget
);
1158 DEBUG(dbgs() << " Def Mapping: " << DefTarget
<< '\n');
1160 auto OrigDomain
= getDomainFor(DefMA
);
1161 auto MappedDomain
= give(isl_map_domain(DefTarget
.copy()));
1162 if (!isl_set_is_subset(OrigDomain
.keep(), MappedDomain
.keep())) {
1164 << " Reject because mapping does not encompass all instances\n");
1168 // { DomainDef[] -> Zone[] }
1169 IslPtr
<isl_map
> Lifetime
;
1171 // { DomainDef[] -> DomainUse[] }
1172 IslPtr
<isl_union_map
> DefUses
;
1174 std::tie(DefUses
, Lifetime
) = computeValueUses(SAI
);
1175 DEBUG(dbgs() << " Lifetime: " << Lifetime
<< '\n');
1177 /// { [Element[] -> Zone[]] }
1178 auto EltZone
= give(
1179 isl_map_wrap(isl_map_apply_domain(Lifetime
.copy(), DefTarget
.copy())));
1182 // { [Element[] -> Scatter[]] }
1183 auto DefEltSched
= give(isl_map_wrap(isl_map_reverse(
1184 isl_map_apply_domain(DefTarget
.copy(), DefSched
.copy()))));
1185 simplify(DefEltSched
);
1187 Knowledge
Proposed(EltZone
, nullptr, DefEltSched
);
1188 if (isConflicting(Proposed
))
1191 // { DomainUse[] -> Element[] }
1192 auto UseTarget
= give(
1193 isl_union_map_apply_range(isl_union_map_reverse(DefUses
.take()),
1194 isl_union_map_from_map(DefTarget
.copy())));
1196 mapValue(SAI
, std::move(DefTarget
), std::move(UseTarget
),
1197 std::move(Lifetime
), std::move(Proposed
));
1201 /// After a scalar has been mapped, update the global knowledge.
1202 void applyLifetime(Knowledge Proposed
) {
1203 Zone
.learnFrom(std::move(Proposed
));
1206 /// Map a MemoryKind::Value scalar to an array element.
1208 /// Callers must have ensured that the mapping is valid and not conflicting.
1210 /// @param SAI The ScopArrayInfo representing the scalar's memory to
1212 /// @param DefTarget { DomainDef[] -> Element[] }
1213 /// The array element to map the scalar to.
1214 /// @param UseTarget { DomainUse[] -> Element[] }
1215 /// The array elements the uses are mapped to.
1216 /// @param Lifetime { DomainDef[] -> Zone[] }
1217 /// The lifetime of each llvm::Value definition for
1219 /// @param Proposed Mapping constraints for reporting.
1220 void mapValue(const ScopArrayInfo
*SAI
, IslPtr
<isl_map
> DefTarget
,
1221 IslPtr
<isl_union_map
> UseTarget
, IslPtr
<isl_map
> Lifetime
,
1222 Knowledge Proposed
) {
1223 // Redirect the read accesses.
1224 for (auto *MA
: DefUse
.getValueUses(SAI
)) {
1226 auto Domain
= getDomainFor(MA
);
1228 // { DomainUse[] -> Element[] }
1229 auto NewAccRel
= give(isl_union_map_intersect_domain(
1230 UseTarget
.copy(), isl_union_set_from_set(Domain
.take())));
1231 simplify(NewAccRel
);
1233 assert(isl_union_map_n_map(NewAccRel
.keep()) == 1);
1234 MA
->setNewAccessRelation(isl_map_from_union_map(NewAccRel
.take()));
1237 auto *WA
= DefUse
.getValueDef(SAI
);
1238 WA
->setNewAccessRelation(DefTarget
.copy());
1239 applyLifetime(Proposed
);
1241 MappedValueScalars
++;
1244 /// Try to map a MemoryKind::PHI scalar to a given array element.
1246 /// @param SAI Representation of the scalar's memory to map.
1247 /// @param TargetElt { Scatter[] -> Element[] }
1248 /// Suggestion where to map the scalar to when at a
1251 /// @return true if the PHI scalar has been mapped.
1252 bool tryMapPHI(const ScopArrayInfo
*SAI
, IslPtr
<isl_map
> TargetElt
) {
1253 auto *PHIRead
= DefUse
.getPHIRead(SAI
);
1254 assert(PHIRead
->isPHIKind());
1255 assert(PHIRead
->isRead());
1257 // Skip if already been mapped.
1258 if (!PHIRead
->getLatestScopArrayInfo()->isPHIKind())
1261 // { DomainRead[] -> Scatter[] }
1262 auto PHISched
= getScatterFor(PHIRead
);
1264 // { DomainRead[] -> Element[] }
1266 give(isl_map_apply_range(PHISched
.copy(), TargetElt
.copy()));
1267 simplify(PHITarget
);
1268 DEBUG(dbgs() << " Mapping: " << PHITarget
<< '\n');
1270 auto OrigDomain
= getDomainFor(PHIRead
);
1271 auto MappedDomain
= give(isl_map_domain(PHITarget
.copy()));
1272 if (!isl_set_is_subset(OrigDomain
.keep(), MappedDomain
.keep())) {
1274 << " Reject because mapping does not encompass all instances\n");
1278 // { DomainRead[] -> DomainWrite[] }
1279 auto PerPHIWrites
= computePerPHI(SAI
);
1281 // { DomainWrite[] -> Element[] }
1282 auto WritesTarget
= give(isl_union_map_reverse(isl_union_map_apply_domain(
1283 PerPHIWrites
.copy(), isl_union_map_from_map(PHITarget
.copy()))));
1284 simplify(WritesTarget
);
1286 // { DomainWrite[] }
1287 auto ExpandedWritesDom
= give(isl_union_map_domain(WritesTarget
.copy()));
1288 auto UniverseWritesDom
= give(isl_union_set_empty(ParamSpace
.copy()));
1290 for (auto *MA
: DefUse
.getPHIIncomings(SAI
))
1291 UniverseWritesDom
= give(isl_union_set_add_set(UniverseWritesDom
.take(),
1292 getDomainFor(MA
).take()));
1294 if (!isl_union_set_is_subset(UniverseWritesDom
.keep(),
1295 ExpandedWritesDom
.keep())) {
1296 DEBUG(dbgs() << " Reject because did not find PHI write mapping for "
1298 DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget
<< '\n');
1299 DEBUG(dbgs() << " Missing instances: "
1300 << give(isl_union_set_subtract(UniverseWritesDom
.copy(),
1301 ExpandedWritesDom
.copy()))
1306 // { DomainRead[] -> Scatter[] }
1307 auto PerPHIWriteScatter
= give(isl_map_from_union_map(
1308 isl_union_map_apply_range(PerPHIWrites
.copy(), Schedule
.copy())));
1310 // { DomainRead[] -> Zone[] }
1311 auto Lifetime
= betweenScatter(PerPHIWriteScatter
, PHISched
, false, true);
1313 DEBUG(dbgs() << " Lifetime: " << Lifetime
<< "\n");
1315 // { DomainWrite[] -> Zone[] }
1316 auto WriteLifetime
= give(isl_union_map_apply_domain(
1317 isl_union_map_from_map(Lifetime
.copy()), PerPHIWrites
.copy()));
1319 // { DomainWrite[] -> [Element[] -> Scatter[]] }
1320 auto WrittenTranslator
=
1321 give(isl_union_map_range_product(WritesTarget
.copy(), Schedule
.copy()));
1323 // { [Element[] -> Scatter[]] }
1324 auto Written
= give(isl_union_map_range(WrittenTranslator
.copy()));
1327 // { DomainWrite[] -> [Element[] -> Zone[]] }
1328 auto LifetimeTranslator
= give(
1329 isl_union_map_range_product(WritesTarget
.copy(), WriteLifetime
.take()));
1331 // { [Element[] -> Zone[] }
1332 auto Occupied
= give(isl_union_map_range(LifetimeTranslator
.copy()));
1335 Knowledge
Proposed(Occupied
, nullptr, Written
);
1336 if (isConflicting(Proposed
))
1339 mapPHI(SAI
, std::move(PHITarget
), std::move(WritesTarget
),
1340 std::move(Lifetime
), std::move(Proposed
));
1344 /// Map a MemoryKind::PHI scalar to an array element.
1346 /// Callers must have ensured that the mapping is valid and not conflicting
1347 /// with the common knowledge.
1349 /// @param SAI The ScopArrayInfo representing the scalar's memory to
1351 /// @param ReadTarget { DomainRead[] -> Element[] }
1352 /// The array element to map the scalar to.
1353 /// @param WriteTarget { DomainWrite[] -> Element[] }
1354 /// New access target for each PHI incoming write.
1355 /// @param Lifetime { DomainRead[] -> Zone[] }
1356 /// The lifetime of each PHI for reporting.
1357 /// @param Proposed Mapping constraints for reporting.
1358 void mapPHI(const ScopArrayInfo
*SAI
, IslPtr
<isl_map
> ReadTarget
,
1359 IslPtr
<isl_union_map
> WriteTarget
, IslPtr
<isl_map
> Lifetime
,
1360 Knowledge Proposed
) {
1361 // Redirect the PHI incoming writes.
1362 for (auto *MA
: DefUse
.getPHIIncomings(SAI
)) {
1363 // { DomainWrite[] }
1364 auto Domain
= getDomainFor(MA
);
1366 // { DomainWrite[] -> Element[] }
1367 auto NewAccRel
= give(isl_union_map_intersect_domain(
1368 WriteTarget
.copy(), isl_union_set_from_set(Domain
.take())));
1369 simplify(NewAccRel
);
1371 assert(isl_union_map_n_map(NewAccRel
.keep()) == 1);
1372 MA
->setNewAccessRelation(isl_map_from_union_map(NewAccRel
.take()));
1375 // Redirect the PHI read.
1376 auto *PHIRead
= DefUse
.getPHIRead(SAI
);
1377 PHIRead
->setNewAccessRelation(ReadTarget
.copy());
1378 applyLifetime(Proposed
);
1383 /// Search and map scalars to memory overwritten by @p TargetStoreMA.
1385 /// Start trying to map scalars that are used in the same statement as the
1386 /// store. For every successful mapping, try to also map scalars of the
1387 /// statements where those are written. Repeat, until no more mapping
1388 /// opportunity is found.
1390 /// There is currently no preference in which order scalars are tried.
1391 /// Ideally, we would direct it towards a load instruction of the same array
1393 bool collapseScalarsToStore(MemoryAccess
*TargetStoreMA
) {
1394 assert(TargetStoreMA
->isLatestArrayKind());
1395 assert(TargetStoreMA
->isMustWrite());
1397 auto TargetStmt
= TargetStoreMA
->getStatement();
1400 auto TargetDom
= getDomainFor(TargetStmt
);
1402 // { DomTarget[] -> Element[] }
1403 auto TargetAccRel
= getAccessRelationFor(TargetStoreMA
);
1405 // { Zone[] -> DomTarget[] }
1406 // For each point in time, find the next target store instance.
1408 computeScalarReachingOverwrite(Schedule
, TargetDom
, false, true);
1410 // { Zone[] -> Element[] }
1411 // Use the target store's write location as a suggestion to map scalars to.
1413 give(isl_map_apply_range(Target
.take(), TargetAccRel
.take()));
1414 simplify(EltTarget
);
1415 DEBUG(dbgs() << " Target mapping is " << EltTarget
<< '\n');
1417 // Stack of elements not yet processed.
1418 SmallVector
<MemoryAccess
*, 16> Worklist
;
1420 // Set of scalars already tested.
1421 SmallPtrSet
<const ScopArrayInfo
*, 16> Closed
;
1423 // Lambda to add all scalar reads to the work list.
1424 auto ProcessAllIncoming
= [&](ScopStmt
*Stmt
) {
1425 for (auto *MA
: *Stmt
) {
1426 if (!MA
->isLatestScalarKind())
1431 Worklist
.push_back(MA
);
1435 // Add initial scalar. Either the value written by the store, or all inputs
1436 // of its statement.
1437 auto WrittenVal
= TargetStoreMA
->getAccessValue();
1438 if (auto InputAcc
= getInputAccessOf(WrittenVal
, TargetStmt
))
1439 Worklist
.push_back(InputAcc
);
1441 ProcessAllIncoming(TargetStmt
);
1443 auto AnyMapped
= false;
1445 S
->getRegion().getEntry()->getParent()->getParent()->getDataLayout();
1447 DL
.getTypeAllocSize(TargetStoreMA
->getAccessValue()->getType());
1449 while (!Worklist
.empty()) {
1450 auto *MA
= Worklist
.pop_back_val();
1452 auto *SAI
= MA
->getScopArrayInfo();
1453 if (Closed
.count(SAI
))
1456 DEBUG(dbgs() << "\n Trying to map " << MA
<< " (SAI: " << SAI
1459 // Skip non-mappable scalars.
1460 if (!isMappable(SAI
))
1463 auto MASize
= DL
.getTypeAllocSize(MA
->getAccessValue()->getType());
1464 if (MASize
> StoreSize
) {
1465 DEBUG(dbgs() << " Reject because storage size is insufficient\n");
1469 // Try to map MemoryKind::Value scalars.
1470 if (SAI
->isValueKind()) {
1471 if (!tryMapValue(SAI
, EltTarget
))
1474 auto *DefAcc
= DefUse
.getValueDef(SAI
);
1475 ProcessAllIncoming(DefAcc
->getStatement());
1481 // Try to map MemoryKind::PHI scalars.
1482 if (SAI
->isPHIKind()) {
1483 if (!tryMapPHI(SAI
, EltTarget
))
1485 // Add inputs of all incoming statements to the worklist.
1486 for (auto *PHIWrite
: DefUse
.getPHIIncomings(SAI
))
1487 ProcessAllIncoming(PHIWrite
->getStatement());
1499 /// Compute when an array element is unused.
1501 /// @return { [Element[] -> Zone[]] }
1502 IslPtr
<isl_union_set
> computeLifetime() const {
1503 // { Element[] -> Zone[] }
1504 auto ArrayUnused
= computeArrayUnused(Schedule
, AllMustWrites
, AllReads
,
1505 false, false, true);
1507 auto Result
= give(isl_union_map_wrap(ArrayUnused
.copy()));
1513 /// Determine when an array element is written to.
1515 /// @return { [Element[] -> Scatter[]] }
1516 IslPtr
<isl_union_set
> computeWritten() const {
1517 // { WriteDomain[] -> Element[] }
1519 give(isl_union_map_union(AllMustWrites
.copy(), AllMayWrites
.copy()));
1521 // { Scatter[] -> Element[] }
1522 auto WriteTimepoints
=
1523 give(isl_union_map_apply_domain(AllWrites
.copy(), Schedule
.copy()));
1526 give(isl_union_map_wrap(isl_union_map_reverse(WriteTimepoints
.copy())));
1532 /// Determine whether an access touches at most one element.
1534 /// The accessed element could be a scalar or accessing an array with constant
1535 /// subscript, such that all instances access only that element.
1537 /// @param MA The access to test.
1539 /// @return True, if zero or one elements are accessed; False if at least two
1540 /// different elements are accessed.
1541 bool isScalarAccess(MemoryAccess
*MA
) {
1542 auto Map
= getAccessRelationFor(MA
);
1543 auto Set
= give(isl_map_range(Map
.take()));
1544 return isl_set_is_singleton(Set
.keep()) == isl_bool_true
;
1548 DeLICMImpl(Scop
*S
) : ZoneAlgorithm(S
) {}
1550 /// Calculate the lifetime (definition to last use) of every array element.
1552 /// @return True if the computed lifetimes (#Zone) is usable.
1553 bool computeZone() {
1554 // Check that nothing strange occurs.
1555 if (!isCompatibleScop()) {
1556 DeLICMIncompatible
++;
1561 IslPtr
<isl_union_set
> EltUnused
, EltWritten
;
1564 IslMaxOperationsGuard
MaxOpGuard(IslCtx
.get(), DelicmMaxOps
);
1568 EltUnused
= computeLifetime();
1569 EltWritten
= computeWritten();
1572 if (isl_ctx_last_error(IslCtx
.get()) == isl_error_quota
) {
1574 DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n");
1575 DebugLoc Begin
, End
;
1576 getDebugLocations(getBBPairForRegion(&S
->getRegion()), Begin
, End
);
1577 OptimizationRemarkAnalysis
R(DEBUG_TYPE
, "OutOfQuota", Begin
,
1579 R
<< "maximal number of operations exceeded during zone analysis";
1580 S
->getFunction().getContext().diagnose(R
);
1584 OriginalZone
= Knowledge(nullptr, EltUnused
, EltWritten
);
1585 DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone
.print(dbgs(), 4));
1587 Zone
= OriginalZone
;
1589 return DelicmMaxOps
== 0 || Zone
.isUsable();
1592 /// Try to map as many scalars to unused array elements as possible.
1594 /// Multiple scalars might be mappable to intersecting unused array element
1595 /// zones, but we can only chose one. This is a greedy algorithm, therefore
1596 /// the first processed element claims it.
1597 void greedyCollapse() {
1598 bool Modified
= false;
1600 for (auto &Stmt
: *S
) {
1601 for (auto *MA
: Stmt
) {
1602 if (!MA
->isLatestArrayKind())
1607 if (MA
->isMayWrite()) {
1608 DEBUG(dbgs() << "Access " << MA
1609 << " pruned because it is a MAY_WRITE\n");
1610 OptimizationRemarkMissed
R(DEBUG_TYPE
, "TargetMayWrite",
1611 MA
->getAccessInstruction());
1612 R
<< "Skipped possible mapping target because it is not an "
1613 "unconditional overwrite";
1614 S
->getFunction().getContext().diagnose(R
);
1618 if (Stmt
.getNumIterators() == 0) {
1619 DEBUG(dbgs() << "Access " << MA
1620 << " pruned because it is not in a loop\n");
1621 OptimizationRemarkMissed
R(DEBUG_TYPE
, "WriteNotInLoop",
1622 MA
->getAccessInstruction());
1623 R
<< "skipped possible mapping target because it is not in a loop";
1624 S
->getFunction().getContext().diagnose(R
);
1628 if (isScalarAccess(MA
)) {
1629 DEBUG(dbgs() << "Access " << MA
1630 << " pruned because it writes only a single element\n");
1631 OptimizationRemarkMissed
R(DEBUG_TYPE
, "ScalarWrite",
1632 MA
->getAccessInstruction());
1633 R
<< "skipped possible mapping target because the memory location "
1634 "written to does not depend on its outer loop";
1635 S
->getFunction().getContext().diagnose(R
);
1639 DEBUG(dbgs() << "Analyzing target access " << MA
<< "\n");
1640 if (collapseScalarsToStore(MA
))
1646 DeLICMScopsModified
++;
1649 /// Dump the internal information about a performed DeLICM to @p OS.
1650 void print(llvm::raw_ostream
&OS
, int Indent
= 0) {
1651 if (!Zone
.isUsable()) {
1652 OS
<< "Zone not computed\n";
1656 printAccesses(OS
, Indent
);
1660 class DeLICM
: public ScopPass
{
1662 DeLICM(const DeLICM
&) = delete;
1663 const DeLICM
&operator=(const DeLICM
&) = delete;
1665 /// The pass implementation, also holding per-scop data.
1666 std::unique_ptr
<DeLICMImpl
> Impl
;
1668 void collapseToUnused(Scop
&S
) {
1669 Impl
= make_unique
<DeLICMImpl
>(&S
);
1671 if (!Impl
->computeZone()) {
1672 DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n");
1676 DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n");
1677 Impl
->greedyCollapse();
1679 DEBUG(dbgs() << "\nFinal Scop:\n");
1680 DEBUG(S
.print(dbgs()));
1685 explicit DeLICM() : ScopPass(ID
) {}
1687 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1688 AU
.addRequiredTransitive
<ScopInfoRegionPass
>();
1689 AU
.setPreservesAll();
1692 virtual bool runOnScop(Scop
&S
) override
{
1693 // Free resources for previous scop's computation, if not yet done.
1696 collapseToUnused(S
);
1701 virtual void printScop(raw_ostream
&OS
, Scop
&S
) const override
{
1704 assert(Impl
->getScop() == &S
);
1706 OS
<< "DeLICM result:\n";
1710 virtual void releaseMemory() override
{ Impl
.reset(); }
1714 } // anonymous namespace
1716 Pass
*polly::createDeLICMPass() { return new DeLICM(); }
1718 INITIALIZE_PASS_BEGIN(DeLICM
, "polly-delicm", "Polly - DeLICM/DePRE", false,
1720 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass
)
1721 INITIALIZE_PASS_END(DeLICM
, "polly-delicm", "Polly - DeLICM/DePRE", false,
1724 bool polly::isConflicting(IslPtr
<isl_union_set
> ExistingOccupied
,
1725 IslPtr
<isl_union_set
> ExistingUnused
,
1726 IslPtr
<isl_union_set
> ExistingWrites
,
1727 IslPtr
<isl_union_set
> ProposedOccupied
,
1728 IslPtr
<isl_union_set
> ProposedUnused
,
1729 IslPtr
<isl_union_set
> ProposedWrites
,
1730 llvm::raw_ostream
*OS
, unsigned Indent
) {
1731 Knowledge
Existing(std::move(ExistingOccupied
), std::move(ExistingUnused
),
1732 std::move(ExistingWrites
));
1733 Knowledge
Proposed(std::move(ProposedOccupied
), std::move(ProposedUnused
),
1734 std::move(ProposedWrites
));
1736 return Knowledge::isConflicting(Existing
, Proposed
, OS
, Indent
);