1 //===- ScopBuilder.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Create a polyhedral description for a static control flow region.
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
14 //===----------------------------------------------------------------------===//
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/AssumptionCache.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/RegionIterator.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/raw_ostream.h"
59 using namespace polly
;
61 #define DEBUG_TYPE "polly-scops"
63 STATISTIC(ScopFound
, "Number of valid Scops");
64 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
65 STATISTIC(InfeasibleScops
,
66 "Number of SCoPs with statically infeasible context.");
68 bool polly::ModelReadOnlyScalars
;
70 // The maximal number of dimensions we allow during invariant load construction.
71 // More complex access ranges will result in very high compile time and are also
72 // unlikely to result in good code. This value is very high and should only
73 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
74 static int const MaxDimensionsInAccessRange
= 9;
76 static cl::opt
<bool, true> XModelReadOnlyScalars(
77 "polly-analyze-read-only-scalars",
78 cl::desc("Model read-only scalar values in the scop description"),
79 cl::location(ModelReadOnlyScalars
), cl::Hidden
, cl::ZeroOrMore
,
80 cl::init(true), cl::cat(PollyCategory
));
83 OptComputeOut("polly-analysis-computeout",
84 cl::desc("Bound the scop analysis by a maximal amount of "
85 "computational steps (0 means no bound)"),
86 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
87 cl::cat(PollyCategory
));
89 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
90 "polly-allow-dereference-of-all-function-parameters",
92 "Treat all parameters to functions that are pointers as dereferencible."
93 " This is useful for invariant load hoisting, since we can generate"
94 " less runtime checks. This is only valid if all pointers to functions"
95 " are always initialized, so that Polly can choose to hoist"
97 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
99 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
100 "polly-rtc-max-arrays-per-group",
101 cl::desc("The maximal number of arrays to compare in each alias group."),
102 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
104 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
105 "polly-rtc-max-array-disjuncts",
106 cl::desc("The maximal number of disjunts allowed in memory accesses to "
108 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
110 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
111 "polly-rtc-max-parameters",
112 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
113 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
115 static cl::opt
<bool> UnprofitableScalarAccs(
116 "polly-unprofitable-scalar-accs",
117 cl::desc("Count statements with scalar accesses as not optimizable"),
118 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
120 static cl::opt
<std::string
> UserContextStr(
121 "polly-context", cl::value_desc("isl parameter set"),
122 cl::desc("Provide additional constraints on the context parameters"),
123 cl::init(""), cl::cat(PollyCategory
));
125 static cl::opt
<bool> DetectFortranArrays(
126 "polly-detect-fortran-arrays",
127 cl::desc("Detect Fortran arrays and use this for code generation"),
128 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
130 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
131 cl::desc("Detect and exploit reductions"),
132 cl::Hidden
, cl::ZeroOrMore
,
133 cl::init(true), cl::cat(PollyCategory
));
135 // Multiplicative reductions can be disabled separately as these kind of
136 // operations can overflow easily. Additive reductions and bit operations
137 // are in contrast pretty stable.
138 static cl::opt
<bool> DisableMultiplicativeReductions(
139 "polly-disable-multiplicative-reductions",
140 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
141 cl::init(false), cl::cat(PollyCategory
));
143 enum class GranularityChoice
{ BasicBlocks
, ScalarIndependence
, Stores
};
145 static cl::opt
<GranularityChoice
> StmtGranularity(
146 "polly-stmt-granularity",
148 "Algorithm to use for splitting basic blocks into multiple statements"),
149 cl::values(clEnumValN(GranularityChoice::BasicBlocks
, "bb",
150 "One statement per basic block"),
151 clEnumValN(GranularityChoice::ScalarIndependence
, "scalar-indep",
152 "Scalar independence heuristic"),
153 clEnumValN(GranularityChoice::Stores
, "store",
154 "Store-level granularity")),
155 cl::init(GranularityChoice::ScalarIndependence
), cl::cat(PollyCategory
));
157 /// Helper to treat non-affine regions and basic blocks the same.
161 /// Return the block that is the representing block for @p RN.
162 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
163 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
164 : RN
->getNodeAs
<BasicBlock
>();
167 /// Return the @p idx'th block that is executed after @p RN.
168 static inline BasicBlock
*
169 getRegionNodeSuccessor(RegionNode
*RN
, Instruction
*TI
, unsigned idx
) {
170 if (RN
->isSubRegion()) {
172 return RN
->getNodeAs
<Region
>()->getExit();
174 return TI
->getSuccessor(idx
);
177 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
178 const DominatorTree
&DT
) {
179 if (!RN
->isSubRegion())
180 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
181 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
182 if (isErrorBlock(*BB
, R
, LI
, DT
))
189 /// Create a map to map from a given iteration to a subsequent iteration.
191 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
192 /// is incremented by one and all other dimensions are equal, e.g.,
193 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
195 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
196 static isl::map
createNextIterationMap(isl::space SetSpace
, unsigned Dim
) {
197 isl::space MapSpace
= SetSpace
.map_from_set();
198 isl::map NextIterationMap
= isl::map::universe(MapSpace
);
199 for (unsigned u
= 0; u
< NextIterationMap
.dim(isl::dim::in
); u
++)
202 NextIterationMap
.equate(isl::dim::in
, u
, isl::dim::out
, u
);
204 isl::constraint::alloc_equality(isl::local_space(MapSpace
));
205 C
= C
.set_constant_si(1);
206 C
= C
.set_coefficient_si(isl::dim::in
, Dim
, 1);
207 C
= C
.set_coefficient_si(isl::dim::out
, Dim
, -1);
208 NextIterationMap
= NextIterationMap
.add_constraint(C
);
209 return NextIterationMap
;
212 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
213 static isl::set
collectBoundedParts(isl::set S
) {
214 isl::set BoundedParts
= isl::set::empty(S
.get_space());
215 for (isl::basic_set BSet
: S
.get_basic_set_list())
216 if (BSet
.is_bounded())
217 BoundedParts
= BoundedParts
.unite(isl::set(BSet
));
221 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
223 /// @returns A separation of @p S into first an unbounded then a bounded subset,
224 /// both with regards to the dimension @p Dim.
225 static std::pair
<isl::set
, isl::set
> partitionSetParts(isl::set S
,
227 for (unsigned u
= 0, e
= S
.n_dim(); u
< e
; u
++)
228 S
= S
.lower_bound_si(isl::dim::set
, u
, 0);
230 unsigned NumDimsS
= S
.n_dim();
231 isl::set OnlyDimS
= S
;
233 // Remove dimensions that are greater than Dim as they are not interesting.
234 assert(NumDimsS
>= Dim
+ 1);
235 OnlyDimS
= OnlyDimS
.project_out(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
237 // Create artificial parametric upper bounds for dimensions smaller than Dim
238 // as we are not interested in them.
239 OnlyDimS
= OnlyDimS
.insert_dims(isl::dim::param
, 0, Dim
);
241 for (unsigned u
= 0; u
< Dim
; u
++) {
242 isl::constraint C
= isl::constraint::alloc_inequality(
243 isl::local_space(OnlyDimS
.get_space()));
244 C
= C
.set_coefficient_si(isl::dim::param
, u
, 1);
245 C
= C
.set_coefficient_si(isl::dim::set
, u
, -1);
246 OnlyDimS
= OnlyDimS
.add_constraint(C
);
249 // Collect all bounded parts of OnlyDimS.
250 isl::set BoundedParts
= collectBoundedParts(OnlyDimS
);
252 // Create the dimensions greater than Dim again.
254 BoundedParts
.insert_dims(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
256 // Remove the artificial upper bound parameters again.
257 BoundedParts
= BoundedParts
.remove_dims(isl::dim::param
, 0, Dim
);
259 isl::set UnboundedParts
= S
.subtract(BoundedParts
);
260 return std::make_pair(UnboundedParts
, BoundedParts
);
263 /// Create the conditions under which @p L @p Pred @p R is true.
264 static isl::set
buildConditionSet(ICmpInst::Predicate Pred
, isl::pw_aff L
,
267 case ICmpInst::ICMP_EQ
:
269 case ICmpInst::ICMP_NE
:
271 case ICmpInst::ICMP_SLT
:
273 case ICmpInst::ICMP_SLE
:
275 case ICmpInst::ICMP_SGT
:
277 case ICmpInst::ICMP_SGE
:
279 case ICmpInst::ICMP_ULT
:
281 case ICmpInst::ICMP_UGT
:
283 case ICmpInst::ICMP_ULE
:
285 case ICmpInst::ICMP_UGE
:
288 llvm_unreachable("Non integer predicate not supported");
292 isl::set
ScopBuilder::adjustDomainDimensions(isl::set Dom
, Loop
*OldL
,
294 // If the loops are the same there is nothing to do.
298 int OldDepth
= scop
->getRelativeLoopDepth(OldL
);
299 int NewDepth
= scop
->getRelativeLoopDepth(NewL
);
300 // If both loops are non-affine loops there is nothing to do.
301 if (OldDepth
== -1 && NewDepth
== -1)
304 // Distinguish three cases:
305 // 1) The depth is the same but the loops are not.
306 // => One loop was left one was entered.
307 // 2) The depth increased from OldL to NewL.
308 // => One loop was entered, none was left.
309 // 3) The depth decreased from OldL to NewL.
310 // => Loops were left were difference of the depths defines how many.
311 if (OldDepth
== NewDepth
) {
312 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
313 Dom
= Dom
.project_out(isl::dim::set
, NewDepth
, 1);
314 Dom
= Dom
.add_dims(isl::dim::set
, 1);
315 } else if (OldDepth
< NewDepth
) {
316 assert(OldDepth
+ 1 == NewDepth
);
317 auto &R
= scop
->getRegion();
319 assert(NewL
->getParentLoop() == OldL
||
320 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
321 Dom
= Dom
.add_dims(isl::dim::set
, 1);
323 assert(OldDepth
> NewDepth
);
324 int Diff
= OldDepth
- NewDepth
;
325 int NumDim
= Dom
.n_dim();
326 assert(NumDim
>= Diff
);
327 Dom
= Dom
.project_out(isl::dim::set
, NumDim
- Diff
, Diff
);
333 /// Compute the isl representation for the SCEV @p E in this BB.
335 /// @param BB The BB for which isl representation is to be
337 /// @param InvalidDomainMap A map of BB to their invalid domains.
338 /// @param E The SCEV that should be translated.
339 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
341 /// Note that this function will also adjust the invalid context accordingly.
343 __isl_give isl_pw_aff
*
344 ScopBuilder::getPwAff(BasicBlock
*BB
,
345 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
346 const SCEV
*E
, bool NonNegative
) {
347 PWACtx PWAC
= scop
->getPwAff(E
, BB
, NonNegative
);
348 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
349 return PWAC
.first
.release();
352 /// Build condition sets for unsigned ICmpInst(s).
353 /// Special handling is required for unsigned operands to ensure that if
354 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
355 /// it should wrap around.
357 /// @param IsStrictUpperBound holds information on the predicate relation
358 /// between TestVal and UpperBound, i.e,
359 /// TestVal < UpperBound OR TestVal <= UpperBound
360 __isl_give isl_set
*ScopBuilder::buildUnsignedConditionSets(
361 BasicBlock
*BB
, Value
*Condition
, __isl_keep isl_set
*Domain
,
362 const SCEV
*SCEV_TestVal
, const SCEV
*SCEV_UpperBound
,
363 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
364 bool IsStrictUpperBound
) {
365 // Do not take NonNeg assumption on TestVal
366 // as it might have MSB (Sign bit) set.
367 isl_pw_aff
*TestVal
= getPwAff(BB
, InvalidDomainMap
, SCEV_TestVal
, false);
368 // Take NonNeg assumption on UpperBound.
369 isl_pw_aff
*UpperBound
=
370 getPwAff(BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
374 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
375 isl_pw_aff_get_domain_space(TestVal
))),
376 isl_pw_aff_copy(TestVal
));
379 if (IsStrictUpperBound
)
380 // TestVal < UpperBound
381 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
383 // TestVal <= UpperBound
384 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
386 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
387 return ConsequenceCondSet
;
390 bool ScopBuilder::buildConditionSets(
391 BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
, __isl_keep isl_set
*Domain
,
392 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
393 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
394 Value
*Condition
= getConditionFromTerminator(SI
);
395 assert(Condition
&& "No condition for switch");
397 isl_pw_aff
*LHS
, *RHS
;
398 LHS
= getPwAff(BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
400 unsigned NumSuccessors
= SI
->getNumSuccessors();
401 ConditionSets
.resize(NumSuccessors
);
402 for (auto &Case
: SI
->cases()) {
403 unsigned Idx
= Case
.getSuccessorIndex();
404 ConstantInt
*CaseValue
= Case
.getCaseValue();
406 RHS
= getPwAff(BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
407 isl_set
*CaseConditionSet
=
408 buildConditionSet(ICmpInst::ICMP_EQ
, isl::manage_copy(LHS
),
411 ConditionSets
[Idx
] = isl_set_coalesce(
412 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
415 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
416 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
417 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
419 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
420 ConditionSets
[0] = isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
);
422 isl_pw_aff_free(LHS
);
427 bool ScopBuilder::buildConditionSets(
428 BasicBlock
*BB
, Value
*Condition
, Instruction
*TI
, Loop
*L
,
429 __isl_keep isl_set
*Domain
,
430 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
431 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
432 isl_set
*ConsequenceCondSet
= nullptr;
434 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
435 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
436 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
438 isl_pw_aff
*LHS
= getPwAff(BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
439 isl_pw_aff
*RHS
= getPwAff(BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
440 ConsequenceCondSet
= buildConditionSet(ICmpInst::ICMP_SLE
, isl::manage(LHS
),
443 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
444 auto *Unique
= dyn_cast
<ConstantInt
>(
445 getUniqueNonErrorValue(PHI
, &scop
->getRegion(), LI
, DT
));
447 if (Unique
->isZero())
448 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
450 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
451 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
453 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
455 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
456 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
457 auto Opcode
= BinOp
->getOpcode();
458 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
460 bool Valid
= buildConditionSets(BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
461 InvalidDomainMap
, ConditionSets
) &&
462 buildConditionSets(BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
463 InvalidDomainMap
, ConditionSets
);
465 while (!ConditionSets
.empty())
466 isl_set_free(ConditionSets
.pop_back_val());
470 isl_set_free(ConditionSets
.pop_back_val());
471 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
472 isl_set_free(ConditionSets
.pop_back_val());
473 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
475 if (Opcode
== Instruction::And
)
476 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
478 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
480 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
482 "Condition of exiting branch was neither constant nor ICmp!");
484 Region
&R
= scop
->getRegion();
486 isl_pw_aff
*LHS
, *RHS
;
487 // For unsigned comparisons we assumed the signed bit of neither operand
488 // to be set. The comparison is equal to a signed comparison under this
490 bool NonNeg
= ICond
->isUnsigned();
491 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
492 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
494 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, LI
, DT
);
495 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, LI
, DT
);
497 switch (ICond
->getPredicate()) {
498 case ICmpInst::ICMP_ULT
:
500 buildUnsignedConditionSets(BB
, Condition
, Domain
, LeftOperand
,
501 RightOperand
, InvalidDomainMap
, true);
503 case ICmpInst::ICMP_ULE
:
505 buildUnsignedConditionSets(BB
, Condition
, Domain
, LeftOperand
,
506 RightOperand
, InvalidDomainMap
, false);
508 case ICmpInst::ICMP_UGT
:
510 buildUnsignedConditionSets(BB
, Condition
, Domain
, RightOperand
,
511 LeftOperand
, InvalidDomainMap
, true);
513 case ICmpInst::ICMP_UGE
:
515 buildUnsignedConditionSets(BB
, Condition
, Domain
, RightOperand
,
516 LeftOperand
, InvalidDomainMap
, false);
519 LHS
= getPwAff(BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
520 RHS
= getPwAff(BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
521 ConsequenceCondSet
= buildConditionSet(ICond
->getPredicate(),
522 isl::manage(LHS
), isl::manage(RHS
))
528 // If no terminator was given we are only looking for parameter constraints
529 // under which @p Condition is true/false.
531 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
532 assert(ConsequenceCondSet
);
533 ConsequenceCondSet
= isl_set_coalesce(
534 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
536 isl_set
*AlternativeCondSet
= nullptr;
538 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
541 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
542 isl_set_copy(ConsequenceCondSet
));
544 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
548 scop
->invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
549 TI
? TI
->getParent() : nullptr /* BasicBlock */);
550 isl_set_free(AlternativeCondSet
);
551 isl_set_free(ConsequenceCondSet
);
555 ConditionSets
.push_back(ConsequenceCondSet
);
556 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
561 bool ScopBuilder::buildConditionSets(
562 BasicBlock
*BB
, Instruction
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
563 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
564 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
565 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
566 return buildConditionSets(BB
, SI
, L
, Domain
, InvalidDomainMap
,
569 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
571 if (TI
->getNumSuccessors() == 1) {
572 ConditionSets
.push_back(isl_set_copy(Domain
));
576 Value
*Condition
= getConditionFromTerminator(TI
);
577 assert(Condition
&& "No condition for Terminator");
579 return buildConditionSets(BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
583 bool ScopBuilder::propagateDomainConstraints(
584 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
585 // Iterate over the region R and propagate the domain constrains from the
586 // predecessors to the current node. In contrast to the
587 // buildDomainsWithBranchConstraints function, this one will pull the domain
588 // information from the predecessors instead of pushing it to the successors.
589 // Additionally, we assume the domains to be already present in the domain
590 // map here. However, we iterate again in reverse post order so we know all
591 // predecessors have been visited before a block or non-affine subregion is
594 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
595 for (auto *RN
: RTraversal
) {
596 // Recurse for affine subregions but go on for basic blocks and non-affine
598 if (RN
->isSubRegion()) {
599 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
600 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
601 if (!propagateDomainConstraints(SubRegion
, InvalidDomainMap
))
607 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
608 isl::set
&Domain
= scop
->getOrInitEmptyDomain(BB
);
611 // Under the union of all predecessor conditions we can reach this block.
612 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
);
613 Domain
= Domain
.intersect(PredDom
).coalesce();
614 Domain
= Domain
.align_params(scop
->getParamSpace());
616 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
617 if (BBLoop
&& BBLoop
->getHeader() == BB
&& scop
->contains(BBLoop
))
618 if (!addLoopBoundsToHeaderDomain(BBLoop
, InvalidDomainMap
))
625 void ScopBuilder::propagateDomainConstraintsToRegionExit(
626 BasicBlock
*BB
, Loop
*BBLoop
,
627 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
,
628 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
629 // Check if the block @p BB is the entry of a region. If so we propagate it's
630 // domain to the exit block of the region. Otherwise we are done.
631 auto *RI
= scop
->getRegion().getRegionInfo();
632 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
633 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
634 if (!BBReg
|| BBReg
->getEntry() != BB
|| !scop
->contains(ExitBB
))
637 // Do not propagate the domain if there is a loop backedge inside the region
638 // that would prevent the exit block from being executed.
640 while (L
&& scop
->contains(L
)) {
641 SmallVector
<BasicBlock
*, 4> LatchBBs
;
642 BBLoop
->getLoopLatches(LatchBBs
);
643 for (auto *LatchBB
: LatchBBs
)
644 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
646 L
= L
->getParentLoop();
649 isl::set Domain
= scop
->getOrInitEmptyDomain(BB
);
650 assert(Domain
&& "Cannot propagate a nullptr");
652 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, scop
->getBoxedLoops());
654 // Since the dimensions of @p BB and @p ExitBB might be different we have to
655 // adjust the domain before we can propagate it.
656 isl::set AdjustedDomain
= adjustDomainDimensions(Domain
, BBLoop
, ExitBBLoop
);
657 isl::set
&ExitDomain
= scop
->getOrInitEmptyDomain(ExitBB
);
659 // If the exit domain is not yet created we set it otherwise we "add" the
661 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
663 // Initialize the invalid domain.
664 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
666 FinishedExitBlocks
.insert(ExitBB
);
669 isl::set
ScopBuilder::getPredecessorDomainConstraints(BasicBlock
*BB
,
671 // If @p BB is the ScopEntry we are done
672 if (scop
->getRegion().getEntry() == BB
)
673 return isl::set::universe(Domain
.get_space());
675 // The region info of this function.
676 auto &RI
= *scop
->getRegion().getRegionInfo();
678 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, scop
->getBoxedLoops());
680 // A domain to collect all predecessor domains, thus all conditions under
681 // which the block is executed. To this end we start with the empty domain.
682 isl::set PredDom
= isl::set::empty(Domain
.get_space());
684 // Set of regions of which the entry block domain has been propagated to BB.
685 // all predecessors inside any of the regions can be skipped.
686 SmallSet
<Region
*, 8> PropagatedRegions
;
688 for (auto *PredBB
: predecessors(BB
)) {
690 if (DT
.dominates(BB
, PredBB
))
693 // If the predecessor is in a region we used for propagation we can skip it.
694 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
695 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
700 // Check if there is a valid region we can use for propagation, thus look
701 // for a region that contains the predecessor and has @p BB as exit block.
702 auto *PredR
= RI
.getRegionFor(PredBB
);
703 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
706 // If a valid region for propagation was found use the entry of that region
707 // for propagation, otherwise the PredBB directly.
708 if (PredR
->getExit() == BB
) {
709 PredBB
= PredR
->getEntry();
710 PropagatedRegions
.insert(PredR
);
713 isl::set PredBBDom
= scop
->getDomainConditions(PredBB
);
715 getFirstNonBoxedLoopFor(PredBB
, LI
, scop
->getBoxedLoops());
716 PredBBDom
= adjustDomainDimensions(PredBBDom
, PredBBLoop
, BBLoop
);
717 PredDom
= PredDom
.unite(PredBBDom
);
723 bool ScopBuilder::addLoopBoundsToHeaderDomain(
724 Loop
*L
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
725 int LoopDepth
= scop
->getRelativeLoopDepth(L
);
726 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
728 BasicBlock
*HeaderBB
= L
->getHeader();
729 assert(scop
->isDomainDefined(HeaderBB
));
730 isl::set
&HeaderBBDom
= scop
->getOrInitEmptyDomain(HeaderBB
);
732 isl::map NextIterationMap
=
733 createNextIterationMap(HeaderBBDom
.get_space(), LoopDepth
);
735 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
737 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
738 L
->getLoopLatches(LatchBlocks
);
740 for (BasicBlock
*LatchBB
: LatchBlocks
) {
741 // If the latch is only reachable via error statements we skip it.
742 if (!scop
->isDomainDefined(LatchBB
))
745 isl::set LatchBBDom
= scop
->getDomainConditions(LatchBB
);
747 isl::set BackedgeCondition
= nullptr;
749 Instruction
*TI
= LatchBB
->getTerminator();
750 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
751 assert(BI
&& "Only branch instructions allowed in loop latches");
753 if (BI
->isUnconditional())
754 BackedgeCondition
= LatchBBDom
;
756 SmallVector
<isl_set
*, 8> ConditionSets
;
757 int idx
= BI
->getSuccessor(0) != HeaderBB
;
758 if (!buildConditionSets(LatchBB
, TI
, L
, LatchBBDom
.get(),
759 InvalidDomainMap
, ConditionSets
))
762 // Free the non back edge condition set as we do not need it.
763 isl_set_free(ConditionSets
[1 - idx
]);
765 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
768 int LatchLoopDepth
= scop
->getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
769 assert(LatchLoopDepth
>= LoopDepth
);
770 BackedgeCondition
= BackedgeCondition
.project_out(
771 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
772 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
775 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
776 for (int i
= 0; i
< LoopDepth
; i
++)
777 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
779 isl::set UnionBackedgeConditionComplement
=
780 UnionBackedgeCondition
.complement();
781 UnionBackedgeConditionComplement
=
782 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
784 UnionBackedgeConditionComplement
=
785 UnionBackedgeConditionComplement
.apply(ForwardMap
);
786 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
787 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
789 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
790 HeaderBBDom
= Parts
.second
;
792 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
793 // the bounded assumptions to the context as they are already implied by the
795 if (scop
->hasNSWAddRecForLoop(L
))
798 isl::set UnboundedCtx
= Parts
.first
.params();
799 scop
->recordAssumption(INFINITELOOP
, UnboundedCtx
,
800 HeaderBB
->getTerminator()->getDebugLoc(),
805 void ScopBuilder::buildInvariantEquivalenceClasses() {
806 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
808 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
809 for (LoadInst
*LInst
: RIL
) {
810 const SCEV
*PointerSCEV
= SE
.getSCEV(LInst
->getPointerOperand());
812 Type
*Ty
= LInst
->getType();
813 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
815 scop
->addInvariantLoadMapping(LInst
, ClassRep
);
820 scop
->addInvariantEquivClass(
821 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
825 bool ScopBuilder::buildDomains(
826 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
827 bool IsOnlyNonAffineRegion
= scop
->isNonAffineSubRegion(R
);
828 auto *EntryBB
= R
->getEntry();
829 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
830 int LD
= scop
->getRelativeLoopDepth(L
);
832 isl_set_universe(isl_space_set_alloc(scop
->getIslCtx().get(), 0, LD
+ 1));
834 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
835 isl::noexceptions::set Domain
= isl::manage(S
);
836 scop
->setDomain(EntryBB
, Domain
);
838 if (IsOnlyNonAffineRegion
)
839 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
841 if (!buildDomainsWithBranchConstraints(R
, InvalidDomainMap
))
844 if (!propagateDomainConstraints(R
, InvalidDomainMap
))
847 // Error blocks and blocks dominated by them have been assumed to never be
848 // executed. Representing them in the Scop does not add any value. In fact,
849 // it is likely to cause issues during construction of the ScopStmts. The
850 // contents of error blocks have not been verified to be expressible and
851 // will cause problems when building up a ScopStmt for them.
852 // Furthermore, basic blocks dominated by error blocks may reference
853 // instructions in the error block which, if the error block is not modeled,
854 // can themselves not be constructed properly. To this end we will replace
855 // the domains of error blocks and those only reachable via error blocks
856 // with an empty set. Additionally, we will record for each block under which
857 // parameter combination it would be reached via an error block in its
858 // InvalidDomain. This information is needed during load hoisting.
859 if (!propagateInvalidStmtDomains(R
, InvalidDomainMap
))
865 bool ScopBuilder::buildDomainsWithBranchConstraints(
866 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
867 // To create the domain for each block in R we iterate over all blocks and
868 // subregions in R and propagate the conditions under which the current region
869 // element is executed. To this end we iterate in reverse post order over R as
870 // it ensures that we first visit all predecessors of a region node (either a
871 // basic block or a subregion) before we visit the region node itself.
872 // Initially, only the domain for the SCoP region entry block is set and from
873 // there we propagate the current domain to all successors, however we add the
874 // condition that the successor is actually executed next.
875 // As we are only interested in non-loop carried constraints here we can
876 // simply skip loop back edges.
878 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
879 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
880 for (auto *RN
: RTraversal
) {
881 // Recurse for affine subregions but go on for basic blocks and non-affine
883 if (RN
->isSubRegion()) {
884 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
885 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
886 if (!buildDomainsWithBranchConstraints(SubRegion
, InvalidDomainMap
))
892 if (containsErrorBlock(RN
, scop
->getRegion(), LI
, DT
))
893 scop
->notifyErrorBlock();
896 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
897 Instruction
*TI
= BB
->getTerminator();
899 if (isa
<UnreachableInst
>(TI
))
902 if (!scop
->isDomainDefined(BB
))
904 isl::set Domain
= scop
->getDomainConditions(BB
);
906 scop
->updateMaxLoopDepth(isl_set_n_dim(Domain
.get()));
908 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
909 // Propagate the domain from BB directly to blocks that have a superset
910 // domain, at the moment only region exit nodes of regions that start in BB.
911 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
,
914 // If all successors of BB have been set a domain through the propagation
915 // above we do not need to build condition sets but can just skip this
916 // block. However, it is important to note that this is a local property
917 // with regards to the region @p R. To this end FinishedExitBlocks is a
919 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
920 return FinishedExitBlocks
.count(SuccBB
);
922 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
925 // Build the condition sets for the successor nodes of the current region
926 // node. If it is a non-affine subregion we will always execute the single
927 // exit node, hence the single entry node domain is the condition set. For
928 // basic blocks we use the helper function buildConditionSets.
929 SmallVector
<isl_set
*, 8> ConditionSets
;
930 if (RN
->isSubRegion())
931 ConditionSets
.push_back(Domain
.copy());
932 else if (!buildConditionSets(BB
, TI
, BBLoop
, Domain
.get(), InvalidDomainMap
,
936 // Now iterate over the successors and set their initial domain based on
937 // their condition set. We skip back edges here and have to be careful when
938 // we leave a loop not to keep constraints over a dimension that doesn't
940 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
941 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
942 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
943 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
945 // Skip blocks outside the region.
946 if (!scop
->contains(SuccBB
))
949 // If we propagate the domain of some block to "SuccBB" we do not have to
950 // adjust the domain.
951 if (FinishedExitBlocks
.count(SuccBB
))
955 if (DT
.dominates(SuccBB
, BB
))
959 getFirstNonBoxedLoopFor(SuccBB
, LI
, scop
->getBoxedLoops());
961 CondSet
= adjustDomainDimensions(CondSet
, BBLoop
, SuccBBLoop
);
963 // Set the domain for the successor or merge it with an existing domain in
964 // case there are multiple paths (without loop back edges) to the
966 isl::set
&SuccDomain
= scop
->getOrInitEmptyDomain(SuccBB
);
969 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
971 // Initialize the invalid domain.
972 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
973 SuccDomain
= CondSet
;
976 SuccDomain
= SuccDomain
.detect_equalities();
978 // Check if the maximal number of domain disjunctions was reached.
979 // In case this happens we will clean up and bail.
980 if (SuccDomain
.n_basic_set() < MaxDisjunctsInDomain
)
983 scop
->invalidate(COMPLEXITY
, DebugLoc());
984 while (++u
< ConditionSets
.size())
985 isl_set_free(ConditionSets
[u
]);
993 bool ScopBuilder::propagateInvalidStmtDomains(
994 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
995 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
996 for (auto *RN
: RTraversal
) {
998 // Recurse for affine subregions but go on for basic blocks and non-affine
1000 if (RN
->isSubRegion()) {
1001 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
1002 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
1003 propagateInvalidStmtDomains(SubRegion
, InvalidDomainMap
);
1008 bool ContainsErrorBlock
= containsErrorBlock(RN
, scop
->getRegion(), LI
, DT
);
1009 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
1010 isl::set
&Domain
= scop
->getOrInitEmptyDomain(BB
);
1011 assert(Domain
&& "Cannot propagate a nullptr");
1013 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
1015 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
1017 if (!IsInvalidBlock
) {
1018 InvalidDomain
= InvalidDomain
.intersect(Domain
);
1020 InvalidDomain
= Domain
;
1021 isl::set DomPar
= Domain
.params();
1022 scop
->recordAssumption(ERRORBLOCK
, DomPar
,
1023 BB
->getTerminator()->getDebugLoc(),
1025 Domain
= isl::set::empty(Domain
.get_space());
1028 if (InvalidDomain
.is_empty()) {
1029 InvalidDomainMap
[BB
] = InvalidDomain
;
1033 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
1034 auto *TI
= BB
->getTerminator();
1035 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
1036 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
1037 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
1039 // Skip successors outside the SCoP.
1040 if (!scop
->contains(SuccBB
))
1044 if (DT
.dominates(SuccBB
, BB
))
1048 getFirstNonBoxedLoopFor(SuccBB
, LI
, scop
->getBoxedLoops());
1050 auto AdjustedInvalidDomain
=
1051 adjustDomainDimensions(InvalidDomain
, BBLoop
, SuccBBLoop
);
1053 isl::set SuccInvalidDomain
= InvalidDomainMap
[SuccBB
];
1054 SuccInvalidDomain
= SuccInvalidDomain
.unite(AdjustedInvalidDomain
);
1055 SuccInvalidDomain
= SuccInvalidDomain
.coalesce();
1057 InvalidDomainMap
[SuccBB
] = SuccInvalidDomain
;
1059 // Check if the maximal number of domain disjunctions was reached.
1060 // In case this happens we will bail.
1061 if (SuccInvalidDomain
.n_basic_set() < MaxDisjunctsInDomain
)
1064 InvalidDomainMap
.erase(BB
);
1065 scop
->invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
1069 InvalidDomainMap
[BB
] = InvalidDomain
;
1075 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
1076 Region
*NonAffineSubRegion
,
1078 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1079 // true, are not modeled as ordinary PHI nodes as they are not part of the
1080 // region. However, we model the operands in the predecessor blocks that are
1081 // part of the region as regular scalar accesses.
1083 // If we can synthesize a PHI we can skip it, however only if it is in
1084 // the region. If it is not it can only be in the exit block of the region.
1085 // In this case we model the operands but not the PHI itself.
1086 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
1087 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
1090 // PHI nodes are modeled as if they had been demoted prior to the SCoP
1091 // detection. Hence, the PHI is a load of a new memory location in which the
1092 // incoming value was written at the end of the incoming basic block.
1093 bool OnlyNonAffineSubRegionOperands
= true;
1094 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
1095 Value
*Op
= PHI
->getIncomingValue(u
);
1096 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
1097 ScopStmt
*OpStmt
= scop
->getIncomingStmtFor(PHI
->getOperandUse(u
));
1099 // Do not build PHI dependences inside a non-affine subregion, but make
1100 // sure that the necessary scalar values are still made available.
1101 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
1102 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
1103 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
1104 ensureValueRead(Op
, OpStmt
);
1108 OnlyNonAffineSubRegionOperands
= false;
1109 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
1112 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
1113 addPHIReadAccess(PHIStmt
, PHI
);
1117 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
1118 Instruction
*Inst
) {
1119 assert(!isa
<PHINode
>(Inst
));
1121 // Pull-in required operands.
1122 for (Use
&Op
: Inst
->operands())
1123 ensureValueRead(Op
.get(), UserStmt
);
1126 // Create a sequence of two schedules. Either argument may be null and is
1127 // interpreted as the empty schedule. Can also return null if both schedules are
1129 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
1135 return Prev
.sequence(Succ
);
1138 // Create an isl_multi_union_aff that defines an identity mapping from the
1139 // elements of USet to their N-th dimension.
1143 // Domain: { A[i,j]; B[i,j,k] }
1146 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1148 // @param USet A union set describing the elements for which to generate a
1150 // @param N The dimension to map to.
1151 // @returns A mapping from USet to its N-th dimension.
1152 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
1155 assert(!USet
.is_empty());
1157 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
1159 for (isl::set S
: USet
.get_set_list()) {
1160 int Dim
= S
.dim(isl::dim::set
);
1161 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
1164 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
1166 Result
= Result
.add_pw_multi_aff(PMA
);
1169 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
1172 void ScopBuilder::buildSchedule() {
1173 Loop
*L
= getLoopSurroundingScop(*scop
, LI
);
1174 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
1175 buildSchedule(scop
->getRegion().getNode(), LoopStack
);
1176 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
1177 scop
->setScheduleTree(LoopStack
[0].Schedule
);
1180 /// To generate a schedule for the elements in a Region we traverse the Region
1181 /// in reverse-post-order and add the contained RegionNodes in traversal order
1182 /// to the schedule of the loop that is currently at the top of the LoopStack.
1183 /// For loop-free codes, this results in a correct sequential ordering.
1194 /// Including loops requires additional processing. Whenever a loop header is
1195 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1196 /// from an empty schedule, we first process all RegionNodes that are within
1197 /// this loop and complete the sequential schedule at this loop-level before
1198 /// processing about any other nodes. To implement this
1199 /// loop-nodes-first-processing, the reverse post-order traversal is
1200 /// insufficient. Hence, we additionally check if the traversal yields
1201 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1202 /// These region-nodes are then queue and only traverse after the all nodes
1203 /// within the current loop have been processed.
1204 void ScopBuilder::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
) {
1205 Loop
*OuterScopLoop
= getLoopSurroundingScop(*scop
, LI
);
1207 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
1208 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
1209 std::deque
<RegionNode
*> DelayList
;
1210 bool LastRNWaiting
= false;
1212 // Iterate over the region @p R in reverse post-order but queue
1213 // sub-regions/blocks iff they are not part of the last encountered but not
1214 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1215 // that we queued the last sub-region/block from the reverse post-order
1216 // iterator. If it is set we have to explore the next sub-region/block from
1217 // the iterator (if any) to guarantee progress. If it is not set we first try
1218 // the next queued sub-region/blocks.
1219 while (!WorkList
.empty() || !DelayList
.empty()) {
1222 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
1223 RN
= WorkList
.front();
1224 WorkList
.pop_front();
1225 LastRNWaiting
= false;
1227 RN
= DelayList
.front();
1228 DelayList
.pop_front();
1231 Loop
*L
= getRegionNodeLoop(RN
, LI
);
1232 if (!scop
->contains(L
))
1235 Loop
*LastLoop
= LoopStack
.back().L
;
1236 if (LastLoop
!= L
) {
1237 if (LastLoop
&& !LastLoop
->contains(L
)) {
1238 LastRNWaiting
= true;
1239 DelayList
.push_back(RN
);
1242 LoopStack
.push_back({L
, nullptr, 0});
1244 buildSchedule(RN
, LoopStack
);
1248 void ScopBuilder::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
) {
1249 if (RN
->isSubRegion()) {
1250 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
1251 if (!scop
->isNonAffineSubRegion(LocalRegion
)) {
1252 buildSchedule(LocalRegion
, LoopStack
);
1257 assert(LoopStack
.rbegin() != LoopStack
.rend());
1258 auto LoopData
= LoopStack
.rbegin();
1259 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
1261 for (auto *Stmt
: scop
->getStmtListFor(RN
)) {
1262 isl::union_set UDomain
{Stmt
->getDomain()};
1263 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
1264 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
1267 // Check if we just processed the last node in this loop. If we did, finalize
1270 // - adding new schedule dimensions
1271 // - folding the resulting schedule into the parent loop schedule
1272 // - dropping the loop schedule from the LoopStack.
1274 // Then continue to check surrounding loops, which might also have been
1275 // completed by this node.
1276 size_t Dimension
= LoopStack
.size();
1277 while (LoopData
->L
&&
1278 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
1279 isl::schedule Schedule
= LoopData
->Schedule
;
1280 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
1282 assert(std::next(LoopData
) != LoopStack
.rend());
1287 isl::union_set Domain
= Schedule
.get_domain();
1288 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
1289 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
1290 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
1293 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
1295 // Now pop all loops processed up there from the LoopStack
1296 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
1299 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
1300 // Check for uses of this instruction outside the scop. Because we do not
1301 // iterate over such instructions and therefore did not "ensure" the existence
1302 // of a write, we must determine such use here.
1303 if (scop
->isEscaping(Inst
))
1304 ensureValueWrite(Inst
);
1307 /// Check that a value is a Fortran Array descriptor.
1309 /// We check if V has the following structure:
1310 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
1311 /// [<num> x %struct.descriptor_dimension] }
1314 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
1316 /// 1. V's type name starts with "struct.array"
1317 /// 2. V's type has layout as shown.
1318 /// 3. Final member of V's type has name "struct.descriptor_dimension",
1319 /// 4. "struct.descriptor_dimension" has layout as shown.
1320 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
1322 /// We are interested in such types since this is the code that dragonegg
1323 /// generates for Fortran array descriptors.
1325 /// @param V the Value to be checked.
1327 /// @returns True if V is a Fortran array descriptor, False otherwise.
1328 bool isFortranArrayDescriptor(Value
*V
) {
1329 PointerType
*PTy
= dyn_cast
<PointerType
>(V
->getType());
1334 Type
*Ty
= PTy
->getElementType();
1335 assert(Ty
&& "Ty expected to be initialized");
1336 auto *StructArrTy
= dyn_cast
<StructType
>(Ty
);
1338 if (!(StructArrTy
&& StructArrTy
->hasName()))
1341 if (!StructArrTy
->getName().startswith("struct.array"))
1344 if (StructArrTy
->getNumElements() != 4)
1347 const ArrayRef
<Type
*> ArrMemberTys
= StructArrTy
->elements();
1350 if (ArrMemberTys
[0] != Type::getInt8PtrTy(V
->getContext()))
1353 // Get a reference to the int type and check that all the members
1354 // share the same int type
1355 Type
*IntTy
= ArrMemberTys
[1];
1356 if (ArrMemberTys
[2] != IntTy
)
1359 // type: [<num> x %struct.descriptor_dimension]
1360 ArrayType
*DescriptorDimArrayTy
= dyn_cast
<ArrayType
>(ArrMemberTys
[3]);
1361 if (!DescriptorDimArrayTy
)
1364 // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
1365 StructType
*DescriptorDimTy
=
1366 dyn_cast
<StructType
>(DescriptorDimArrayTy
->getElementType());
1368 if (!(DescriptorDimTy
&& DescriptorDimTy
->hasName()))
1371 if (DescriptorDimTy
->getName() != "struct.descriptor_dimension")
1374 if (DescriptorDimTy
->getNumElements() != 3)
1377 for (auto MemberTy
: DescriptorDimTy
->elements()) {
1378 if (MemberTy
!= IntTy
)
1385 Value
*ScopBuilder::findFADAllocationVisible(MemAccInst Inst
) {
1386 // match: 4.1 & 4.2 store/load
1387 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
1391 if (Inst
.getAlignment() != 8)
1394 Value
*Address
= Inst
.getPointerOperand();
1396 const BitCastInst
*Bitcast
= nullptr;
1398 if (auto *Slot
= dyn_cast
<GetElementPtrInst
>(Address
)) {
1399 Value
*TypedMem
= Slot
->getPointerOperand();
1401 Bitcast
= dyn_cast
<BitCastInst
>(TypedMem
);
1404 Bitcast
= dyn_cast
<BitCastInst
>(Address
);
1410 auto *MallocMem
= Bitcast
->getOperand(0);
1413 auto *MallocCall
= dyn_cast
<CallInst
>(MallocMem
);
1417 Function
*MallocFn
= MallocCall
->getCalledFunction();
1418 if (!(MallocFn
&& MallocFn
->hasName() && MallocFn
->getName() == "malloc"))
1421 // Find all uses the malloc'd memory.
1422 // We are looking for a "store" into a struct with the type being the Fortran
1424 for (auto user
: MallocMem
->users()) {
1426 auto *MallocStore
= dyn_cast
<StoreInst
>(user
);
1430 auto *DescriptorGEP
=
1431 dyn_cast
<GEPOperator
>(MallocStore
->getPointerOperand());
1436 auto DescriptorType
=
1437 dyn_cast
<StructType
>(DescriptorGEP
->getSourceElementType());
1438 if (!(DescriptorType
&& DescriptorType
->hasName()))
1441 Value
*Descriptor
= dyn_cast
<Value
>(DescriptorGEP
->getPointerOperand());
1446 if (!isFortranArrayDescriptor(Descriptor
))
1455 Value
*ScopBuilder::findFADAllocationInvisible(MemAccInst Inst
) {
1457 if (!isa
<LoadInst
>(Inst
) && !isa
<StoreInst
>(Inst
))
1460 Value
*Slot
= Inst
.getPointerOperand();
1462 LoadInst
*MemLoad
= nullptr;
1464 if (auto *SlotGEP
= dyn_cast
<GetElementPtrInst
>(Slot
)) {
1466 MemLoad
= dyn_cast
<LoadInst
>(SlotGEP
->getPointerOperand());
1469 MemLoad
= dyn_cast
<LoadInst
>(Slot
);
1475 auto *BitcastOperator
=
1476 dyn_cast
<BitCastOperator
>(MemLoad
->getPointerOperand());
1477 if (!BitcastOperator
)
1480 Value
*Descriptor
= dyn_cast
<Value
>(BitcastOperator
->getOperand(0));
1484 if (!isFortranArrayDescriptor(Descriptor
))
1490 void ScopBuilder::addRecordedAssumptions() {
1491 for (auto &AS
: llvm::reverse(scop
->recorded_assumptions())) {
1494 scop
->addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
,
1495 nullptr /* BasicBlock */);
1499 // If the domain was deleted the assumptions are void.
1500 isl_set
*Dom
= scop
->getDomainConditions(AS
.BB
).release();
1504 // If a basic block was given use its domain to simplify the assumption.
1505 // In case of restrictions we know they only have to hold on the domain,
1506 // thus we can intersect them with the domain of the block. However, for
1507 // assumptions the domain has to imply them, thus:
1509 // Dom => S <==> A v B <==> A - B
1511 // To avoid the complement we will register A - B as a restriction not an
1513 isl_set
*S
= AS
.Set
.copy();
1514 if (AS
.Sign
== AS_RESTRICTION
)
1515 S
= isl_set_params(isl_set_intersect(S
, Dom
));
1516 else /* (AS.Sign == AS_ASSUMPTION) */
1517 S
= isl_set_params(isl_set_subtract(Dom
, S
));
1519 scop
->addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
1521 scop
->clearRecordedAssumptions();
1524 void ScopBuilder::addUserAssumptions(
1525 AssumptionCache
&AC
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
1526 for (auto &Assumption
: AC
.assumptions()) {
1527 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
1528 if (!CI
|| CI
->getNumArgOperands() != 1)
1531 bool InScop
= scop
->contains(CI
);
1532 if (!InScop
&& !scop
->isDominatedBy(DT
, CI
->getParent()))
1535 auto *L
= LI
.getLoopFor(CI
->getParent());
1536 auto *Val
= CI
->getArgOperand(0);
1537 ParameterSetTy DetectedParams
;
1538 auto &R
= scop
->getRegion();
1539 if (!isAffineConstraint(Val
, &R
, L
, SE
, DetectedParams
)) {
1541 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
1542 << "Non-affine user assumption ignored.");
1546 // Collect all newly introduced parameters.
1547 ParameterSetTy NewParams
;
1548 for (auto *Param
: DetectedParams
) {
1549 Param
= extractConstantFactor(Param
, SE
).second
;
1550 Param
= scop
->getRepresentingInvariantLoadSCEV(Param
);
1551 if (scop
->isParam(Param
))
1553 NewParams
.insert(Param
);
1556 SmallVector
<isl_set
*, 2> ConditionSets
;
1557 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
1558 BasicBlock
*BB
= InScop
? CI
->getParent() : R
.getEntry();
1559 auto *Dom
= InScop
? isl_set_copy(scop
->getDomainConditions(BB
).get())
1560 : isl_set_copy(scop
->getContext().get());
1561 assert(Dom
&& "Cannot propagate a nullptr.");
1562 bool Valid
= buildConditionSets(BB
, Val
, TI
, L
, Dom
, InvalidDomainMap
,
1569 isl_set
*AssumptionCtx
= nullptr;
1571 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
1572 isl_set_free(ConditionSets
[0]);
1574 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
1575 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
1578 // Project out newly introduced parameters as they are not otherwise useful.
1579 if (!NewParams
.empty()) {
1580 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
1581 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
1582 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
1585 if (!NewParams
.count(Param
))
1589 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
1592 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
1593 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
1594 isl::set newContext
=
1595 scop
->getContext().intersect(isl::manage(AssumptionCtx
));
1596 scop
->setContext(newContext
);
1600 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
1601 Value
*Val
= Inst
.getValueOperand();
1602 Type
*ElementType
= Val
->getType();
1603 Value
*Address
= Inst
.getPointerOperand();
1604 const SCEV
*AccessFunction
=
1605 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1606 const SCEVUnknown
*BasePointer
=
1607 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1608 enum MemoryAccess::AccessType AccType
=
1609 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1611 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
)) {
1612 auto *Src
= BitCast
->getOperand(0);
1613 auto *SrcTy
= Src
->getType();
1614 auto *DstTy
= BitCast
->getType();
1615 // Do not try to delinearize non-sized (opaque) pointers.
1616 if ((SrcTy
->isPointerTy() && !SrcTy
->getPointerElementType()->isSized()) ||
1617 (DstTy
->isPointerTy() && !DstTy
->getPointerElementType()->isSized())) {
1620 if (SrcTy
->isPointerTy() && DstTy
->isPointerTy() &&
1621 DL
.getTypeAllocSize(SrcTy
->getPointerElementType()) ==
1622 DL
.getTypeAllocSize(DstTy
->getPointerElementType()))
1626 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
1630 std::vector
<const SCEV
*> Subscripts
;
1631 std::vector
<int> Sizes
;
1632 std::tie(Subscripts
, Sizes
) = getIndexExpressionsFromGEP(GEP
, SE
);
1633 auto *BasePtr
= GEP
->getOperand(0);
1635 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
1636 BasePtr
= BasePtrCast
->getOperand(0);
1638 // Check for identical base pointers to ensure that we do not miss index
1639 // offsets that have been added before this GEP is applied.
1640 if (BasePtr
!= BasePointer
->getValue())
1643 std::vector
<const SCEV
*> SizesSCEV
;
1645 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1647 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1648 for (auto *Subscript
: Subscripts
) {
1649 InvariantLoadsSetTy AccessILS
;
1650 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
1654 for (LoadInst
*LInst
: AccessILS
)
1655 if (!ScopRIL
.count(LInst
))
1662 SizesSCEV
.push_back(nullptr);
1664 for (auto V
: Sizes
)
1665 SizesSCEV
.push_back(SE
.getSCEV(
1666 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
1668 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1669 true, Subscripts
, SizesSCEV
, Val
);
1673 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
1674 if (!PollyDelinearize
)
1677 Value
*Address
= Inst
.getPointerOperand();
1678 Value
*Val
= Inst
.getValueOperand();
1679 Type
*ElementType
= Val
->getType();
1680 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
1681 enum MemoryAccess::AccessType AccType
=
1682 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1684 const SCEV
*AccessFunction
=
1685 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1686 const SCEVUnknown
*BasePointer
=
1687 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1689 assert(BasePointer
&& "Could not find base pointer");
1691 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
1692 auto AccItr
= InsnToMemAcc
.find(Inst
);
1693 if (AccItr
== InsnToMemAcc
.end())
1696 std::vector
<const SCEV
*> Sizes
= {nullptr};
1698 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
1699 AccItr
->second
.Shape
->DelinearizedSizes
.end());
1701 // In case only the element size is contained in the 'Sizes' array, the
1702 // access does not access a real multi-dimensional array. Hence, we allow
1703 // the normal single-dimensional access construction to handle this.
1704 if (Sizes
.size() == 1)
1707 // Remove the element size. This information is already provided by the
1708 // ElementSize parameter. In case the element size of this access and the
1709 // element size used for delinearization differs the delinearization is
1710 // incorrect. Hence, we invalidate the scop.
1712 // TODO: Handle delinearization with differing element sizes.
1713 auto DelinearizedSize
=
1714 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
1716 if (ElementSize
!= DelinearizedSize
)
1717 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
1719 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1720 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
1724 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
1725 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
1727 if (MemIntr
== nullptr)
1730 auto *L
= LI
.getLoopFor(Inst
->getParent());
1731 auto *LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
1734 // Check if the length val is actually affine or if we overapproximate it
1735 InvariantLoadsSetTy AccessILS
;
1736 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1738 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1739 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
1740 LengthVal
, SE
, &AccessILS
);
1741 for (LoadInst
*LInst
: AccessILS
)
1742 if (!ScopRIL
.count(LInst
))
1743 LengthIsAffine
= false;
1744 if (!LengthIsAffine
)
1745 LengthVal
= nullptr;
1747 auto *DestPtrVal
= MemIntr
->getDest();
1750 auto *DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
1751 assert(DestAccFunc
);
1752 // Ignore accesses to "NULL".
1753 // TODO: We could use this to optimize the region further, e.g., intersect
1755 // isl_set_complement(isl_set_params(getDomain()))
1756 // as we know it would be undefined to execute this instruction anyway.
1757 if (DestAccFunc
->isZero())
1760 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
1761 assert(DestPtrSCEV
);
1762 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
1763 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
1764 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
1765 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
1766 Inst
.getValueOperand());
1768 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
1772 auto *SrcPtrVal
= MemTrans
->getSource();
1775 auto *SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
1777 // Ignore accesses to "NULL".
1778 // TODO: See above TODO
1779 if (SrcAccFunc
->isZero())
1782 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
1784 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
1785 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
1786 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
1787 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
1788 Inst
.getValueOperand());
1793 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
1794 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
1799 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
) || isDebugCall(CI
))
1802 bool ReadOnly
= false;
1803 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
1804 auto *CalledFunction
= CI
->getCalledFunction();
1805 switch (AA
.getModRefBehavior(CalledFunction
)) {
1806 case FMRB_UnknownModRefBehavior
:
1807 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
1808 case FMRB_DoesNotAccessMemory
:
1810 case FMRB_DoesNotReadMemory
:
1811 case FMRB_OnlyAccessesInaccessibleMem
:
1812 case FMRB_OnlyAccessesInaccessibleOrArgMem
:
1814 case FMRB_OnlyReadsMemory
:
1815 GlobalReads
.emplace_back(Stmt
, CI
);
1817 case FMRB_OnlyReadsArgumentPointees
:
1820 case FMRB_OnlyAccessesArgumentPointees
: {
1821 auto AccType
= ReadOnly
? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
1822 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
1823 for (const auto &Arg
: CI
->arg_operands()) {
1824 if (!Arg
->getType()->isPointerTy())
1827 auto *ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
1828 if (ArgSCEV
->isZero())
1831 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
1832 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
1833 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
1842 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
1843 Value
*Address
= Inst
.getPointerOperand();
1844 Value
*Val
= Inst
.getValueOperand();
1845 Type
*ElementType
= Val
->getType();
1846 enum MemoryAccess::AccessType AccType
=
1847 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1849 const SCEV
*AccessFunction
=
1850 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1851 const SCEVUnknown
*BasePointer
=
1852 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1854 assert(BasePointer
&& "Could not find base pointer");
1855 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
1857 // Check if the access depends on a loop contained in a non-affine subregion.
1858 bool isVariantInNonAffineLoop
= false;
1859 SetVector
<const Loop
*> Loops
;
1860 findLoops(AccessFunction
, Loops
);
1861 for (const Loop
*L
: Loops
)
1862 if (Stmt
->contains(L
)) {
1863 isVariantInNonAffineLoop
= true;
1867 InvariantLoadsSetTy AccessILS
;
1869 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1870 bool IsAffine
= !isVariantInNonAffineLoop
&&
1871 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
1872 AccessFunction
, SE
, &AccessILS
);
1874 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1875 for (LoadInst
*LInst
: AccessILS
)
1876 if (!ScopRIL
.count(LInst
))
1879 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
1880 AccType
= MemoryAccess::MAY_WRITE
;
1882 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1883 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
1886 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
1887 if (buildAccessMemIntrinsic(Inst
, Stmt
))
1890 if (buildAccessCallInst(Inst
, Stmt
))
1893 if (buildAccessMultiDimFixed(Inst
, Stmt
))
1896 if (buildAccessMultiDimParam(Inst
, Stmt
))
1899 buildAccessSingleDim(Inst
, Stmt
);
1902 void ScopBuilder::buildAccessFunctions() {
1903 for (auto &Stmt
: *scop
) {
1904 if (Stmt
.isBlockStmt()) {
1905 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
1909 Region
*R
= Stmt
.getRegion();
1910 for (BasicBlock
*BB
: R
->blocks())
1911 buildAccessFunctions(&Stmt
, *BB
, R
);
1914 // Build write accesses for values that are used after the SCoP.
1915 // The instructions defining them might be synthesizable and therefore not
1916 // contained in any statement, hence we iterate over the original instructions
1917 // to identify all escaping values.
1918 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
1919 for (Instruction
&Inst
: *BB
)
1920 buildEscapingDependences(&Inst
);
1924 bool ScopBuilder::shouldModelInst(Instruction
*Inst
, Loop
*L
) {
1925 return !Inst
->isTerminator() && !isIgnoredIntrinsic(Inst
) &&
1926 !canSynthesize(Inst
, *scop
, &SE
, L
);
1929 /// Generate a name for a statement.
1931 /// @param BB The basic block the statement will represent.
1932 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
1933 /// @param Count The index of the created statement in @p BB.
1934 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1935 /// no suffix will be added.
1936 /// @param IsLast Uses a special indicator for the last statement of a BB.
1937 static std::string
makeStmtName(BasicBlock
*BB
, long BBIdx
, int Count
,
1938 bool IsMain
, bool IsLast
= false) {
1941 if (UseInstructionNames
)
1945 else if (Count
< 26)
1946 Suffix
+= 'a' + Count
;
1948 Suffix
+= std::to_string(Count
);
1950 return getIslCompatibleName("Stmt", BB
, BBIdx
, Suffix
, UseInstructionNames
);
1953 /// Generate a name for a statement that represents a non-affine subregion.
1955 /// @param R The region the statement will represent.
1956 /// @param RIdx The index of the @p R relative to other BBs/regions.
1957 static std::string
makeStmtName(Region
*R
, long RIdx
) {
1958 return getIslCompatibleName("Stmt", R
->getNameStr(), RIdx
, "",
1959 UseInstructionNames
);
1962 void ScopBuilder::buildSequentialBlockStmts(BasicBlock
*BB
, bool SplitOnStore
) {
1963 Loop
*SurroundingLoop
= LI
.getLoopFor(BB
);
1966 long BBIdx
= scop
->getNextStmtIdx();
1967 std::vector
<Instruction
*> Instructions
;
1968 for (Instruction
&Inst
: *BB
) {
1969 if (shouldModelInst(&Inst
, SurroundingLoop
))
1970 Instructions
.push_back(&Inst
);
1971 if (Inst
.getMetadata("polly_split_after") ||
1972 (SplitOnStore
&& isa
<StoreInst
>(Inst
))) {
1973 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
1974 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
1976 Instructions
.clear();
1980 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
1981 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
1984 /// Is @p Inst an ordered instruction?
1986 /// An unordered instruction is an instruction, such that a sequence of
1987 /// unordered instructions can be permuted without changing semantics. Any
1988 /// instruction for which this is not always the case is ordered.
1989 static bool isOrderedInstruction(Instruction
*Inst
) {
1990 return Inst
->mayHaveSideEffects() || Inst
->mayReadOrWriteMemory();
1993 /// Join instructions to the same statement if one uses the scalar result of the
1995 static void joinOperandTree(EquivalenceClasses
<Instruction
*> &UnionFind
,
1996 ArrayRef
<Instruction
*> ModeledInsts
) {
1997 for (Instruction
*Inst
: ModeledInsts
) {
1998 if (isa
<PHINode
>(Inst
))
2001 for (Use
&Op
: Inst
->operands()) {
2002 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
.get());
2006 // Check if OpInst is in the BB and is a modeled instruction.
2007 auto OpVal
= UnionFind
.findValue(OpInst
);
2008 if (OpVal
== UnionFind
.end())
2011 UnionFind
.unionSets(Inst
, OpInst
);
2016 /// Ensure that the order of ordered instructions does not change.
2018 /// If we encounter an ordered instruction enclosed in instructions belonging to
2019 /// a different statement (which might as well contain ordered instructions, but
2020 /// this is not tested here), join them.
2022 joinOrderedInstructions(EquivalenceClasses
<Instruction
*> &UnionFind
,
2023 ArrayRef
<Instruction
*> ModeledInsts
) {
2024 SetVector
<Instruction
*> SeenLeaders
;
2025 for (Instruction
*Inst
: ModeledInsts
) {
2026 if (!isOrderedInstruction(Inst
))
2029 Instruction
*Leader
= UnionFind
.getLeaderValue(Inst
);
2030 // Since previous iterations might have merged sets, some items in
2031 // SeenLeaders are not leaders anymore. However, The new leader of
2032 // previously merged instructions must be one of the former leaders of
2033 // these merged instructions.
2034 bool Inserted
= SeenLeaders
.insert(Leader
);
2038 // Merge statements to close holes. Say, we have already seen statements A
2039 // and B, in this order. Then we see an instruction of A again and we would
2040 // see the pattern "A B A". This function joins all statements until the
2041 // only seen occurrence of A.
2042 for (Instruction
*Prev
: reverse(SeenLeaders
)) {
2043 // We are backtracking from the last element until we see Inst's leader
2044 // in SeenLeaders and merge all into one set. Although leaders of
2045 // instructions change during the execution of this loop, it's irrelevant
2046 // as we are just searching for the element that we already confirmed is
2050 UnionFind
.unionSets(Prev
, Leader
);
2055 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
2056 /// the incoming values from this block are executed after the PHI READ.
2058 /// Otherwise it could overwrite the incoming value from before the BB with the
2059 /// value for the next execution. This can happen if the PHI WRITE is added to
2060 /// the statement with the instruction that defines the incoming value (instead
2061 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
2062 /// are in order, we put both into the statement. PHI WRITEs are always executed
2063 /// after PHI READs when they are in the same statement.
2065 /// TODO: This is an overpessimization. We only have to ensure that the PHI
2066 /// WRITE is not put into a statement containing the PHI itself. That could also
2068 /// - having all (strongly connected) PHIs in a single statement,
2069 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
2070 /// has a chance of being lifted before a PHI by being in a statement with a
2071 /// PHI that comes before in the basic block), or
2072 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
2073 static void joinOrderedPHIs(EquivalenceClasses
<Instruction
*> &UnionFind
,
2074 ArrayRef
<Instruction
*> ModeledInsts
) {
2075 for (Instruction
*Inst
: ModeledInsts
) {
2076 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
2080 int Idx
= PHI
->getBasicBlockIndex(PHI
->getParent());
2084 Instruction
*IncomingVal
=
2085 dyn_cast
<Instruction
>(PHI
->getIncomingValue(Idx
));
2089 UnionFind
.unionSets(PHI
, IncomingVal
);
2093 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock
*BB
) {
2094 Loop
*L
= LI
.getLoopFor(BB
);
2096 // Extracting out modeled instructions saves us from checking
2097 // shouldModelInst() repeatedly.
2098 SmallVector
<Instruction
*, 32> ModeledInsts
;
2099 EquivalenceClasses
<Instruction
*> UnionFind
;
2100 Instruction
*MainInst
= nullptr, *MainLeader
= nullptr;
2101 for (Instruction
&Inst
: *BB
) {
2102 if (!shouldModelInst(&Inst
, L
))
2104 ModeledInsts
.push_back(&Inst
);
2105 UnionFind
.insert(&Inst
);
2107 // When a BB is split into multiple statements, the main statement is the
2108 // one containing the 'main' instruction. We select the first instruction
2109 // that is unlikely to be removed (because it has side-effects) as the main
2110 // one. It is used to ensure that at least one statement from the bb has the
2111 // same name as with -polly-stmt-granularity=bb.
2112 if (!MainInst
&& (isa
<StoreInst
>(Inst
) ||
2113 (isa
<CallInst
>(Inst
) && !isa
<IntrinsicInst
>(Inst
))))
2117 joinOperandTree(UnionFind
, ModeledInsts
);
2118 joinOrderedInstructions(UnionFind
, ModeledInsts
);
2119 joinOrderedPHIs(UnionFind
, ModeledInsts
);
2121 // The list of instructions for statement (statement represented by the leader
2123 MapVector
<Instruction
*, std::vector
<Instruction
*>> LeaderToInstList
;
2125 // The order of statements must be preserved w.r.t. their ordered
2126 // instructions. Without this explicit scan, we would also use non-ordered
2127 // instructions (whose order is arbitrary) to determine statement order.
2128 for (Instruction
&Inst
: *BB
) {
2129 if (!isOrderedInstruction(&Inst
))
2132 auto LeaderIt
= UnionFind
.findLeader(&Inst
);
2133 if (LeaderIt
== UnionFind
.member_end())
2136 // Insert element for the leader instruction.
2137 (void)LeaderToInstList
[*LeaderIt
];
2140 // Collect the instructions of all leaders. UnionFind's member iterator
2141 // unfortunately are not in any specific order.
2142 for (Instruction
&Inst
: *BB
) {
2143 auto LeaderIt
= UnionFind
.findLeader(&Inst
);
2144 if (LeaderIt
== UnionFind
.member_end())
2147 if (&Inst
== MainInst
)
2148 MainLeader
= *LeaderIt
;
2149 std::vector
<Instruction
*> &InstList
= LeaderToInstList
[*LeaderIt
];
2150 InstList
.push_back(&Inst
);
2153 // Finally build the statements.
2155 long BBIdx
= scop
->getNextStmtIdx();
2156 for (auto &Instructions
: LeaderToInstList
) {
2157 std::vector
<Instruction
*> &InstList
= Instructions
.second
;
2159 // If there is no main instruction, make the first statement the main.
2160 bool IsMain
= (MainInst
? MainLeader
== Instructions
.first
: Count
== 0);
2162 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, IsMain
);
2163 scop
->addScopStmt(BB
, Name
, L
, std::move(InstList
));
2167 // Unconditionally add an epilogue (last statement). It contains no
2168 // instructions, but holds the PHI write accesses for successor basic blocks,
2169 // if the incoming value is not defined in another statement if the same BB.
2170 // The epilogue becomes the main statement only if there is no other
2171 // statement that could become main.
2172 // The epilogue will be removed if no PHIWrite is added to it.
2173 std::string EpilogueName
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0, true);
2174 scop
->addScopStmt(BB
, EpilogueName
, L
, {});
2177 void ScopBuilder::buildStmts(Region
&SR
) {
2178 if (scop
->isNonAffineSubRegion(&SR
)) {
2179 std::vector
<Instruction
*> Instructions
;
2180 Loop
*SurroundingLoop
=
2181 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
2182 for (Instruction
&Inst
: *SR
.getEntry())
2183 if (shouldModelInst(&Inst
, SurroundingLoop
))
2184 Instructions
.push_back(&Inst
);
2185 long RIdx
= scop
->getNextStmtIdx();
2186 std::string Name
= makeStmtName(&SR
, RIdx
);
2187 scop
->addScopStmt(&SR
, Name
, SurroundingLoop
, Instructions
);
2191 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
2192 if (I
->isSubRegion())
2193 buildStmts(*I
->getNodeAs
<Region
>());
2195 BasicBlock
*BB
= I
->getNodeAs
<BasicBlock
>();
2196 switch (StmtGranularity
) {
2197 case GranularityChoice::BasicBlocks
:
2198 buildSequentialBlockStmts(BB
);
2200 case GranularityChoice::ScalarIndependence
:
2201 buildEqivClassBlockStmts(BB
);
2203 case GranularityChoice::Stores
:
2204 buildSequentialBlockStmts(BB
, true);
2210 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
2211 Region
*NonAffineSubRegion
) {
2214 "The exit BB is the only one that cannot be represented by a statement");
2215 assert(Stmt
->represents(&BB
));
2217 // We do not build access functions for error blocks, as they may contain
2218 // instructions we can not model.
2219 if (isErrorBlock(BB
, scop
->getRegion(), LI
, DT
))
2222 auto BuildAccessesForInst
= [this, Stmt
,
2223 NonAffineSubRegion
](Instruction
*Inst
) {
2224 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
2226 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, false);
2228 if (auto MemInst
= MemAccInst::dyn_cast(*Inst
)) {
2229 assert(Stmt
&& "Cannot build access function in non-existing statement");
2230 buildMemoryAccess(MemInst
, Stmt
);
2233 // PHI nodes have already been modeled above and terminators that are
2234 // not part of a non-affine subregion are fully modeled and regenerated
2235 // from the polyhedral domains. Hence, they do not need to be modeled as
2236 // explicit data dependences.
2238 buildScalarDependences(Stmt
, Inst
);
2241 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
2242 bool IsEntryBlock
= (Stmt
->getEntryBlock() == &BB
);
2244 for (Instruction
*Inst
: Stmt
->getInstructions())
2245 BuildAccessesForInst(Inst
);
2246 if (Stmt
->isRegionStmt())
2247 BuildAccessesForInst(BB
.getTerminator());
2249 for (Instruction
&Inst
: BB
) {
2250 if (isIgnoredIntrinsic(&Inst
))
2253 // Invariant loads already have been processed.
2254 if (isa
<LoadInst
>(Inst
) && RIL
.count(cast
<LoadInst
>(&Inst
)))
2257 BuildAccessesForInst(&Inst
);
2262 MemoryAccess
*ScopBuilder::addMemoryAccess(
2263 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
2264 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
2265 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
2267 bool isKnownMustAccess
= false;
2269 // Accesses in single-basic block statements are always executed.
2270 if (Stmt
->isBlockStmt())
2271 isKnownMustAccess
= true;
2273 if (Stmt
->isRegionStmt()) {
2274 // Accesses that dominate the exit block of a non-affine region are always
2275 // executed. In non-affine regions there may exist MemoryKind::Values that
2276 // do not dominate the exit. MemoryKind::Values will always dominate the
2277 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2278 // non-affine region.
2279 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
2280 isKnownMustAccess
= true;
2283 // Non-affine PHI writes do not "happen" at a particular instruction, but
2284 // after exiting the statement. Therefore they are guaranteed to execute and
2285 // overwrite the old value.
2286 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
2287 isKnownMustAccess
= true;
2289 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
2290 AccType
= MemoryAccess::MAY_WRITE
;
2292 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
2293 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
2295 scop
->addAccessFunction(Access
);
2296 Stmt
->addAccess(Access
);
2300 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
2301 MemoryAccess::AccessType AccType
,
2302 Value
*BaseAddress
, Type
*ElementType
,
2304 ArrayRef
<const SCEV
*> Subscripts
,
2305 ArrayRef
<const SCEV
*> Sizes
,
2306 Value
*AccessValue
) {
2307 ArrayBasePointers
.insert(BaseAddress
);
2308 auto *MemAccess
= addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
,
2309 ElementType
, IsAffine
, AccessValue
,
2310 Subscripts
, Sizes
, MemoryKind::Array
);
2312 if (!DetectFortranArrays
)
2315 if (Value
*FAD
= findFADAllocationInvisible(MemAccInst
))
2316 MemAccess
->setFortranArrayDescriptor(FAD
);
2317 else if (Value
*FAD
= findFADAllocationVisible(MemAccInst
))
2318 MemAccess
->setFortranArrayDescriptor(FAD
);
2321 /// Check if @p Expr is divisible by @p Size.
2322 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
2327 // Only one factor needs to be divisible.
2328 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
2329 for (auto *FactorExpr
: MulExpr
->operands())
2330 if (isDivisible(FactorExpr
, Size
, SE
))
2335 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2337 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
2338 for (auto *OpExpr
: NAryExpr
->operands())
2339 if (!isDivisible(OpExpr
, Size
, SE
))
2344 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
2345 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
2346 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
2347 return MulSCEV
== Expr
;
2350 void ScopBuilder::foldSizeConstantsToRight() {
2351 isl::union_set Accessed
= scop
->getAccesses().range();
2353 for (auto Array
: scop
->arrays()) {
2354 if (Array
->getNumberOfDimensions() <= 1)
2357 isl::space Space
= Array
->getSpace();
2358 Space
= Space
.align_params(Accessed
.get_space());
2360 if (!Accessed
.contains(Space
))
2363 isl::set Elements
= Accessed
.extract_set(Space
);
2364 isl::map Transform
= isl::map::universe(Array
->getSpace().map_from_set());
2366 std::vector
<int> Int
;
2367 int Dims
= Elements
.dim(isl::dim::set
);
2368 for (int i
= 0; i
< Dims
; i
++) {
2369 isl::set DimOnly
= isl::set(Elements
).project_out(isl::dim::set
, 0, i
);
2370 DimOnly
= DimOnly
.project_out(isl::dim::set
, 1, Dims
- i
- 1);
2371 DimOnly
= DimOnly
.lower_bound_si(isl::dim::set
, 0, 0);
2373 isl::basic_set DimHull
= DimOnly
.affine_hull();
2375 if (i
== Dims
- 1) {
2377 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
2381 if (DimHull
.dim(isl::dim::div
) == 1) {
2382 isl::aff Diff
= DimHull
.get_div(0);
2383 isl::val Val
= Diff
.get_denominator_val();
2387 auto ValAPInt
= APIntFromVal(Val
);
2388 if (ValAPInt
.isSignedIntN(32))
2389 ValInt
= ValAPInt
.getSExtValue();
2393 Int
.push_back(ValInt
);
2394 isl::constraint C
= isl::constraint::alloc_equality(
2395 isl::local_space(Transform
.get_space()));
2396 C
= C
.set_coefficient_si(isl::dim::out
, i
, ValInt
);
2397 C
= C
.set_coefficient_si(isl::dim::in
, i
, -1);
2398 Transform
= Transform
.add_constraint(C
);
2402 isl::basic_set ZeroSet
= isl::basic_set(DimHull
);
2403 ZeroSet
= ZeroSet
.fix_si(isl::dim::set
, 0, 0);
2406 if (ZeroSet
.is_equal(DimHull
)) {
2410 Int
.push_back(ValInt
);
2411 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
2414 isl::set MappedElements
= isl::map(Transform
).domain();
2415 if (!Elements
.is_subset(MappedElements
))
2418 bool CanFold
= true;
2422 unsigned NumDims
= Array
->getNumberOfDimensions();
2423 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
2424 if (Int
[0] != Int
[i
] && Int
[i
])
2430 for (auto &Access
: scop
->access_functions())
2431 if (Access
->getScopArrayInfo() == Array
)
2432 Access
->setAccessRelation(
2433 Access
->getAccessRelation().apply_range(Transform
));
2435 std::vector
<const SCEV
*> Sizes
;
2436 for (unsigned i
= 0; i
< NumDims
; i
++) {
2437 auto Size
= Array
->getDimensionSize(i
);
2439 if (i
== NumDims
- 1)
2440 Size
= SE
.getMulExpr(Size
, SE
.getConstant(Size
->getType(), Int
[0]));
2441 Sizes
.push_back(Size
);
2444 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
2448 void ScopBuilder::markFortranArrays() {
2449 for (ScopStmt
&Stmt
: *scop
) {
2450 for (MemoryAccess
*MemAcc
: Stmt
) {
2451 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
2455 // TODO: const_cast-ing to edit
2456 ScopArrayInfo
*SAI
=
2457 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
2458 assert(SAI
&& "memory access into a Fortran array does not "
2459 "have an associated ScopArrayInfo");
2460 SAI
->applyAndSetFAD(FAD
);
2465 void ScopBuilder::finalizeAccesses() {
2466 updateAccessDimensionality();
2467 foldSizeConstantsToRight();
2468 foldAccessRelations();
2469 assumeNoOutOfBounds();
2470 markFortranArrays();
2473 void ScopBuilder::updateAccessDimensionality() {
2474 // Check all array accesses for each base pointer and find a (virtual) element
2475 // size for the base pointer that divides all access functions.
2476 for (ScopStmt
&Stmt
: *scop
)
2477 for (MemoryAccess
*Access
: Stmt
) {
2478 if (!Access
->isArrayKind())
2480 ScopArrayInfo
*Array
=
2481 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
2483 if (Array
->getNumberOfDimensions() != 1)
2485 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
2486 const SCEV
*Subscript
= Access
->getSubscript(0);
2487 while (!isDivisible(Subscript
, DivisibleSize
, SE
))
2489 auto *Ty
= IntegerType::get(SE
.getContext(), DivisibleSize
* 8);
2490 Array
->updateElementType(Ty
);
2493 for (auto &Stmt
: *scop
)
2494 for (auto &Access
: Stmt
)
2495 Access
->updateDimensionality();
2498 void ScopBuilder::foldAccessRelations() {
2499 for (auto &Stmt
: *scop
)
2500 for (auto &Access
: Stmt
)
2501 Access
->foldAccessRelation();
2504 void ScopBuilder::assumeNoOutOfBounds() {
2505 for (auto &Stmt
: *scop
)
2506 for (auto &Access
: Stmt
)
2507 Access
->assumeNoOutOfBound();
2510 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
2511 // Find the statement that defines the value of Inst. That statement has to
2512 // write the value to make it available to those statements that read it.
2513 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
2515 // It is possible that the value is synthesizable within a loop (such that it
2516 // is not part of any statement), but not after the loop (where you need the
2517 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2518 // avoid this. In case the IR has no such PHI, use the last statement (where
2519 // the value is synthesizable) to write the value.
2521 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
2523 // Inst not defined within this SCoP.
2527 // Do not process further if the instruction is already written.
2528 if (Stmt
->lookupValueWriteOf(Inst
))
2531 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
2532 true, Inst
, ArrayRef
<const SCEV
*>(),
2533 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
2536 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
2537 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2538 // to be able to replace this one. Currently, there is a split responsibility.
2539 // In a first step, the MemoryAccess is created, but without the
2540 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2541 // AccessRelation is created. At least for scalar accesses, there is no new
2542 // information available at ScopStmt::buildAccessRelations(), so we could
2543 // create the AccessRelation right away. This is what
2544 // ScopStmt::ensureValueRead(Value*) does.
2546 auto *Scope
= UserStmt
->getSurroundingLoop();
2547 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
2548 switch (VUse
.getKind()) {
2549 case VirtualUse::Constant
:
2550 case VirtualUse::Block
:
2551 case VirtualUse::Synthesizable
:
2552 case VirtualUse::Hoisted
:
2553 case VirtualUse::Intra
:
2554 // Uses of these kinds do not need a MemoryAccess.
2557 case VirtualUse::ReadOnly
:
2558 // Add MemoryAccess for invariant values only if requested.
2559 if (!ModelReadOnlyScalars
)
2563 case VirtualUse::Inter
:
2565 // Do not create another MemoryAccess for reloading the value if one already
2567 if (UserStmt
->lookupValueReadOf(V
))
2570 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
2571 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2574 // Inter-statement uses need to write the value in their defining statement.
2576 ensureValueWrite(cast
<Instruction
>(V
));
2581 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
2582 BasicBlock
*IncomingBlock
,
2583 Value
*IncomingValue
, bool IsExitBlock
) {
2584 // As the incoming block might turn out to be an error statement ensure we
2585 // will create an exit PHI SAI object. It is needed during code generation
2586 // and would be created later anyway.
2588 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
2589 MemoryKind::ExitPHI
);
2591 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2592 // from outside the SCoP's region have no statement representation.
2596 // Take care for the incoming value being available in the incoming block.
2597 // This must be done before the check for multiple PHI writes because multiple
2598 // exiting edges from subregion each can be the effective written value of the
2599 // subregion. As such, all of them must be made available in the subregion
2601 ensureValueRead(IncomingValue
, IncomingStmt
);
2603 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2604 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
2605 assert(Acc
->getAccessInstruction() == PHI
);
2606 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
2610 MemoryAccess
*Acc
= addMemoryAccess(
2611 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
2612 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2613 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
2615 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
2618 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
2619 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
2620 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2624 void ScopBuilder::buildDomain(ScopStmt
&Stmt
) {
2625 isl::id Id
= isl::id::alloc(scop
->getIslCtx(), Stmt
.getBaseName(), &Stmt
);
2627 Stmt
.Domain
= scop
->getDomainConditions(&Stmt
);
2628 Stmt
.Domain
= Stmt
.Domain
.set_tuple_id(Id
);
2631 void ScopBuilder::collectSurroundingLoops(ScopStmt
&Stmt
) {
2632 isl::set Domain
= Stmt
.getDomain();
2633 BasicBlock
*BB
= Stmt
.getEntryBlock();
2635 Loop
*L
= LI
.getLoopFor(BB
);
2637 while (L
&& Stmt
.isRegionStmt() && Stmt
.getRegion()->contains(L
))
2638 L
= L
->getParentLoop();
2640 SmallVector
<llvm::Loop
*, 8> Loops
;
2642 while (L
&& Stmt
.getParent()->getRegion().contains(L
)) {
2644 L
= L
->getParentLoop();
2647 Stmt
.NestLoops
.insert(Stmt
.NestLoops
.begin(), Loops
.rbegin(), Loops
.rend());
2650 /// Return the reduction type for a given binary operator.
2651 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
2652 const Instruction
*Load
) {
2654 return MemoryAccess::RT_NONE
;
2655 switch (BinOp
->getOpcode()) {
2656 case Instruction::FAdd
:
2657 if (!BinOp
->isFast())
2658 return MemoryAccess::RT_NONE
;
2660 case Instruction::Add
:
2661 return MemoryAccess::RT_ADD
;
2662 case Instruction::Or
:
2663 return MemoryAccess::RT_BOR
;
2664 case Instruction::Xor
:
2665 return MemoryAccess::RT_BXOR
;
2666 case Instruction::And
:
2667 return MemoryAccess::RT_BAND
;
2668 case Instruction::FMul
:
2669 if (!BinOp
->isFast())
2670 return MemoryAccess::RT_NONE
;
2672 case Instruction::Mul
:
2673 if (DisableMultiplicativeReductions
)
2674 return MemoryAccess::RT_NONE
;
2675 return MemoryAccess::RT_MUL
;
2677 return MemoryAccess::RT_NONE
;
2681 void ScopBuilder::checkForReductions(ScopStmt
&Stmt
) {
2682 SmallVector
<MemoryAccess
*, 2> Loads
;
2683 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
2685 // First collect candidate load-store reduction chains by iterating over all
2686 // stores and collecting possible reduction loads.
2687 for (MemoryAccess
*StoreMA
: Stmt
) {
2688 if (StoreMA
->isRead())
2692 collectCandidateReductionLoads(StoreMA
, Loads
);
2693 for (MemoryAccess
*LoadMA
: Loads
)
2694 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
2697 // Then check each possible candidate pair.
2698 for (const auto &CandidatePair
: Candidates
) {
2700 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
2701 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
2703 // Skip those with obviously unequal base addresses.
2704 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
2708 // And check if the remaining for overlap with other memory accesses.
2709 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
2710 AllAccsRel
= AllAccsRel
.intersect_domain(Stmt
.getDomain());
2711 isl::set AllAccs
= AllAccsRel
.range();
2713 for (MemoryAccess
*MA
: Stmt
) {
2714 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
2718 MA
->getAccessRelation().intersect_domain(Stmt
.getDomain());
2719 isl::set Accs
= AccRel
.range();
2721 if (AllAccs
.has_equal_space(Accs
)) {
2722 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
2723 Valid
= Valid
&& OverlapAccs
.is_empty();
2730 const LoadInst
*Load
=
2731 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
2732 MemoryAccess::ReductionType RT
=
2733 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
2735 // If no overlapping access was found we mark the load and store as
2737 CandidatePair
.first
->markAsReductionLike(RT
);
2738 CandidatePair
.second
->markAsReductionLike(RT
);
2742 void ScopBuilder::verifyInvariantLoads() {
2743 auto &RIL
= scop
->getRequiredInvariantLoads();
2744 for (LoadInst
*LI
: RIL
) {
2745 assert(LI
&& scop
->contains(LI
));
2746 // If there exists a statement in the scop which has a memory access for
2747 // @p LI, then mark this scop as infeasible for optimization.
2748 for (ScopStmt
&Stmt
: *scop
)
2749 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
2750 scop
->invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
2756 void ScopBuilder::hoistInvariantLoads() {
2757 if (!PollyInvariantLoadHoisting
)
2760 isl::union_map Writes
= scop
->getWrites();
2761 for (ScopStmt
&Stmt
: *scop
) {
2762 InvariantAccessesTy InvariantAccesses
;
2764 for (MemoryAccess
*Access
: Stmt
)
2765 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
2766 InvariantAccesses
.push_back({Access
, NHCtx
});
2768 // Transfer the memory access from the statement to the SCoP.
2769 for (auto InvMA
: InvariantAccesses
)
2770 Stmt
.removeMemoryAccess(InvMA
.MA
);
2771 addInvariantLoads(Stmt
, InvariantAccesses
);
2775 /// Check if an access range is too complex.
2777 /// An access range is too complex, if it contains either many disjuncts or
2778 /// very complex expressions. As a simple heuristic, we assume if a set to
2779 /// be too complex if the sum of existentially quantified dimensions and
2780 /// set dimensions is larger than a threshold. This reliably detects both
2781 /// sets with many disjuncts as well as sets with many divisions as they
2784 /// @param AccessRange The range to check for complexity.
2786 /// @returns True if the access range is too complex.
2787 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
2788 int NumTotalDims
= 0;
2790 for (isl::basic_set BSet
: AccessRange
.get_basic_set_list()) {
2791 NumTotalDims
+= BSet
.dim(isl::dim::div
);
2792 NumTotalDims
+= BSet
.dim(isl::dim::set
);
2795 if (NumTotalDims
> MaxDimensionsInAccessRange
)
2801 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
2802 isl::union_map Writes
) {
2803 if (auto *BasePtrMA
= scop
->lookupBasePtrAccess(MA
)) {
2804 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
2807 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
2808 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
2809 if (!isa
<LoadInst
>(BasePtrInst
))
2810 return scop
->contains(BasePtrInst
);
2815 void ScopBuilder::addUserContext() {
2816 if (UserContextStr
.empty())
2819 isl::set UserContext
= isl::set(scop
->getIslCtx(), UserContextStr
.c_str());
2820 isl::space Space
= scop
->getParamSpace();
2821 if (Space
.dim(isl::dim::param
) != UserContext
.dim(isl::dim::param
)) {
2822 std::string SpaceStr
= Space
.to_str();
2823 errs() << "Error: the context provided in -polly-context has not the same "
2824 << "number of dimensions than the computed context. Due to this "
2825 << "mismatch, the -polly-context option is ignored. Please provide "
2826 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2830 for (unsigned i
= 0; i
< Space
.dim(isl::dim::param
); i
++) {
2831 std::string NameContext
=
2832 scop
->getContext().get_dim_name(isl::dim::param
, i
);
2833 std::string NameUserContext
= UserContext
.get_dim_name(isl::dim::param
, i
);
2835 if (NameContext
!= NameUserContext
) {
2836 std::string SpaceStr
= Space
.to_str();
2837 errs() << "Error: the name of dimension " << i
2838 << " provided in -polly-context "
2839 << "is '" << NameUserContext
<< "', but the name in the computed "
2840 << "context is '" << NameContext
2841 << "'. Due to this name mismatch, "
2842 << "the -polly-context option is ignored. Please provide "
2843 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2847 UserContext
= UserContext
.set_dim_id(isl::dim::param
, i
,
2848 Space
.get_dim_id(isl::dim::param
, i
));
2850 isl::set newContext
= scop
->getContext().intersect(UserContext
);
2851 scop
->setContext(newContext
);
2854 isl::set
ScopBuilder::getNonHoistableCtx(MemoryAccess
*Access
,
2855 isl::union_map Writes
) {
2856 // TODO: Loads that are not loop carried, hence are in a statement with
2857 // zero iterators, are by construction invariant, though we
2858 // currently "hoist" them anyway. This is necessary because we allow
2859 // them to be treated as parameters (e.g., in conditions) and our code
2860 // generation would otherwise use the old value.
2862 auto &Stmt
= *Access
->getStatement();
2863 BasicBlock
*BB
= Stmt
.getEntryBlock();
2865 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
2866 Access
->isMemoryIntrinsic())
2869 // Skip accesses that have an invariant base pointer which is defined but
2870 // not loaded inside the SCoP. This can happened e.g., if a readnone call
2871 // returns a pointer that is used as a base address. However, as we want
2872 // to hoist indirect pointers, we allow the base pointer to be defined in
2873 // the region if it is also a memory access. Each ScopArrayInfo object
2874 // that has a base pointer origin has a base pointer that is loaded and
2875 // that it is invariant, thus it will be hoisted too. However, if there is
2876 // no base pointer origin we check that the base pointer is defined
2877 // outside the region.
2878 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
2879 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
2882 isl::map AccessRelation
= Access
->getAccessRelation();
2883 assert(!AccessRelation
.is_empty());
2885 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
2888 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
2889 isl::set SafeToLoad
;
2891 auto &DL
= scop
->getFunction().getParent()->getDataLayout();
2892 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getType(),
2893 MaybeAlign(LI
->getAlignment()), DL
)) {
2894 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
2895 } else if (BB
!= LI
->getParent()) {
2896 // Skip accesses in non-affine subregions as they might not be executed
2897 // under the same condition as the entry of the non-affine subregion.
2900 SafeToLoad
= AccessRelation
.range();
2903 if (isAccessRangeTooComplex(AccessRelation
.range()))
2906 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
2907 isl::set WrittenCtx
= Written
.params();
2908 bool IsWritten
= !WrittenCtx
.is_empty();
2913 WrittenCtx
= WrittenCtx
.remove_divs();
2914 bool TooComplex
= WrittenCtx
.n_basic_set() >= MaxDisjunctsInDomain
;
2915 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
2918 scop
->addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(),
2919 AS_RESTRICTION
, LI
->getParent());
2923 static bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
2924 for (const llvm::Argument
&Arg
: F
.args())
2925 if (&Arg
== maybeParam
)
2931 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess
*MA
,
2932 bool StmtInvalidCtxIsEmpty
,
2933 bool MAInvalidCtxIsEmpty
,
2934 bool NonHoistableCtxIsEmpty
) {
2935 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
2936 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
2937 if (PollyAllowDereferenceOfAllFunctionParams
&&
2938 isAParameter(LInst
->getPointerOperand(), scop
->getFunction()))
2941 // TODO: We can provide more information for better but more expensive
2943 if (!isDereferenceableAndAlignedPointer(
2944 LInst
->getPointerOperand(), LInst
->getType(),
2945 MaybeAlign(LInst
->getAlignment()), DL
))
2948 // If the location might be overwritten we do not hoist it unconditionally.
2950 // TODO: This is probably too conservative.
2951 if (!NonHoistableCtxIsEmpty
)
2954 // If a dereferenceable load is in a statement that is modeled precisely we
2956 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
2959 // Even if the statement is not modeled precisely we can hoist the load if it
2960 // does not involve any parameters that might have been specialized by the
2961 // statement domain.
2962 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
2963 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
2968 void ScopBuilder::addInvariantLoads(ScopStmt
&Stmt
,
2969 InvariantAccessesTy
&InvMAs
) {
2973 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
2974 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
2976 // Get the context under which the statement is executed but remove the error
2977 // context under which this statement is reached.
2978 isl::set DomainCtx
= Stmt
.getDomain().params();
2979 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
2981 if (DomainCtx
.n_basic_set() >= MaxDisjunctsInDomain
) {
2982 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
2983 scop
->invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
2987 // Project out all parameters that relate to loads in the statement. Otherwise
2988 // we could have cyclic dependences on the constraints under which the
2989 // hoisted loads are executed and we could not determine an order in which to
2990 // pre-load them. This happens because not only lower bounds are part of the
2991 // domain but also upper bounds.
2992 for (auto &InvMA
: InvMAs
) {
2993 auto *MA
= InvMA
.MA
;
2994 Instruction
*AccInst
= MA
->getAccessInstruction();
2995 if (SE
.isSCEVable(AccInst
->getType())) {
2996 SetVector
<Value
*> Values
;
2997 for (const SCEV
*Parameter
: scop
->parameters()) {
2999 findValues(Parameter
, SE
, Values
);
3000 if (!Values
.count(AccInst
))
3003 if (isl::id ParamId
= scop
->getIdForParam(Parameter
)) {
3004 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3006 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3012 for (auto &InvMA
: InvMAs
) {
3013 auto *MA
= InvMA
.MA
;
3014 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3016 // Check for another invariant access that accesses the same location as
3017 // MA and if found consolidate them. Otherwise create a new equivalence
3018 // class at the end of InvariantEquivClasses.
3019 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3020 Type
*Ty
= LInst
->getType();
3021 const SCEV
*PointerSCEV
= SE
.getSCEV(LInst
->getPointerOperand());
3023 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3024 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3025 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3028 // Check if we know that this pointer can be speculatively accessed.
3029 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3030 NonHoistableCtxIsEmpty
)) {
3031 MACtx
= isl::set::universe(DomainCtx
.get_space());
3034 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3035 MACtx
= MACtx
.gist_params(scop
->getContext());
3038 bool Consolidated
= false;
3039 for (auto &IAClass
: scop
->invariantEquivClasses()) {
3040 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3043 // If the pointer and the type is equal check if the access function wrt.
3044 // to the domain is equal too. It can happen that the domain fixes
3045 // parameter values and these can be different for distinct part of the
3046 // SCoP. If this happens we cannot consolidate the loads but need to
3047 // create a new invariant load equivalence class.
3048 auto &MAs
= IAClass
.InvariantAccesses
;
3050 auto *LastMA
= MAs
.front();
3052 isl::set AR
= MA
->getAccessRelation().range();
3053 isl::set LastAR
= LastMA
->getAccessRelation().range();
3054 bool SameAR
= AR
.is_equal(LastAR
);
3060 // Add MA to the list of accesses that are in this class.
3063 Consolidated
= true;
3065 // Unify the execution context of the class and this statement.
3066 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
3067 if (IAClassDomainCtx
)
3068 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
3070 IAClassDomainCtx
= MACtx
;
3071 IAClass
.ExecutionContext
= IAClassDomainCtx
;
3078 MACtx
= MACtx
.coalesce();
3080 // If we did not consolidate MA, thus did not find an equivalence class
3081 // for it, we create a new one.
3082 scop
->addInvariantEquivClass(
3083 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3087 void ScopBuilder::collectCandidateReductionLoads(
3088 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
3089 ScopStmt
*Stmt
= StoreMA
->getStatement();
3091 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
3095 // Skip if there is not one binary operator between the load and the store
3096 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
3100 // Skip if the binary operators has multiple uses
3101 if (BinOp
->getNumUses() != 1)
3104 // Skip if the opcode of the binary operator is not commutative/associative
3105 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
3108 // Skip if the binary operator is outside the current SCoP
3109 if (BinOp
->getParent() != Store
->getParent())
3112 // Skip if it is a multiplicative reduction and we disabled them
3113 if (DisableMultiplicativeReductions
&&
3114 (BinOp
->getOpcode() == Instruction::Mul
||
3115 BinOp
->getOpcode() == Instruction::FMul
))
3118 // Check the binary operator operands for a candidate load
3119 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
3120 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
3121 if (!PossibleLoad0
&& !PossibleLoad1
)
3124 // A load is only a candidate if it cannot escape (thus has only this use)
3125 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
3126 if (PossibleLoad0
->getParent() == Store
->getParent())
3127 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad0
));
3128 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
3129 if (PossibleLoad1
->getParent() == Store
->getParent())
3130 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad1
));
3133 /// Find the canonical scop array info object for a set of invariant load
3134 /// hoisted loads. The canonical array is the one that corresponds to the
3135 /// first load in the list of accesses which is used as base pointer of a
3137 static const ScopArrayInfo
*findCanonicalArray(Scop
&S
,
3138 MemoryAccessList
&Accesses
) {
3139 for (MemoryAccess
*Access
: Accesses
) {
3140 const ScopArrayInfo
*CanonicalArray
= S
.getScopArrayInfoOrNull(
3141 Access
->getAccessInstruction(), MemoryKind::Array
);
3143 return CanonicalArray
;
3148 /// Check if @p Array severs as base array in an invariant load.
3149 static bool isUsedForIndirectHoistedLoad(Scop
&S
, const ScopArrayInfo
*Array
) {
3150 for (InvariantEquivClassTy
&EqClass2
: S
.getInvariantAccesses())
3151 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3152 if (Access2
->getScopArrayInfo() == Array
)
3157 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3158 /// with a reference to @p New.
3159 static void replaceBasePtrArrays(Scop
&S
, const ScopArrayInfo
*Old
,
3160 const ScopArrayInfo
*New
) {
3161 for (ScopStmt
&Stmt
: S
)
3162 for (MemoryAccess
*Access
: Stmt
) {
3163 if (Access
->getLatestScopArrayInfo() != Old
)
3166 isl::id Id
= New
->getBasePtrId();
3167 isl::map Map
= Access
->getAccessRelation();
3168 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3169 Access
->setAccessRelation(Map
);
3173 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3174 for (InvariantEquivClassTy
&EqClass
: scop
->InvariantEquivClasses
) {
3175 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3177 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3178 findCanonicalArray(*scop
, BasePtrAccesses
);
3180 if (!CanonicalBasePtrSAI
)
3183 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3184 const ScopArrayInfo
*BasePtrSAI
= scop
->getScopArrayInfoOrNull(
3185 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3186 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3187 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3190 // we currently do not canonicalize arrays where some accesses are
3191 // hoisted as invariant loads. If we would, we need to update the access
3192 // function of the invariant loads as well. However, as this is not a
3193 // very common situation, we leave this for now to avoid further
3194 // complexity increases.
3195 if (isUsedForIndirectHoistedLoad(*scop
, BasePtrSAI
))
3198 replaceBasePtrArrays(*scop
, BasePtrSAI
, CanonicalBasePtrSAI
);
3203 void ScopBuilder::buildAccessRelations(ScopStmt
&Stmt
) {
3204 for (MemoryAccess
*Access
: Stmt
.MemAccs
) {
3205 Type
*ElementType
= Access
->getElementType();
3208 if (Access
->isPHIKind())
3209 Ty
= MemoryKind::PHI
;
3210 else if (Access
->isExitPHIKind())
3211 Ty
= MemoryKind::ExitPHI
;
3212 else if (Access
->isValueKind())
3213 Ty
= MemoryKind::Value
;
3215 Ty
= MemoryKind::Array
;
3217 auto *SAI
= scop
->getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
3218 ElementType
, Access
->Sizes
, Ty
);
3219 Access
->buildAccessRelation(SAI
);
3220 scop
->addAccessData(Access
);
3224 /// Add the minimal/maximal access in @p Set to @p User.
3226 /// @return True if more accesses should be added, false if we reached the
3227 /// maximal number of run-time checks to be generated.
3228 static bool buildMinMaxAccess(isl::set Set
,
3229 Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
3230 isl::pw_multi_aff MinPMA
, MaxPMA
;
3231 isl::pw_aff LastDimAff
;
3235 Set
= Set
.remove_divs();
3236 polly::simplify(Set
);
3238 if (Set
.n_basic_set() > RunTimeChecksMaxAccessDisjuncts
)
3239 Set
= Set
.simple_hull();
3241 // Restrict the number of parameters involved in the access as the lexmin/
3242 // lexmax computation will take too long if this number is high.
3244 // Experiments with a simple test case using an i7 4800MQ:
3246 // #Parameters involved | Time (in sec)
3255 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
3256 unsigned InvolvedParams
= 0;
3257 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
3258 if (Set
.involves_dims(isl::dim::param
, u
, 1))
3261 if (InvolvedParams
> RunTimeChecksMaxParameters
)
3265 MinPMA
= Set
.lexmin_pw_multi_aff();
3266 MaxPMA
= Set
.lexmax_pw_multi_aff();
3268 MinPMA
= MinPMA
.coalesce();
3269 MaxPMA
= MaxPMA
.coalesce();
3271 // Adjust the last dimension of the maximal access by one as we want to
3272 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3273 // we test during code generation might now point after the end of the
3274 // allocated array but we will never dereference it anyway.
3275 assert((!MaxPMA
|| MaxPMA
.dim(isl::dim::out
)) &&
3276 "Assumed at least one output dimension");
3278 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
3279 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
3280 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
3281 OneAff
= OneAff
.add_constant_si(1);
3282 LastDimAff
= LastDimAff
.add(OneAff
);
3283 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
3285 if (!MinPMA
|| !MaxPMA
)
3288 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
3293 /// Wrapper function to calculate minimal/maximal accesses to each array.
3294 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup
,
3295 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
3296 MinMaxAccesses
.reserve(AliasGroup
.size());
3298 isl::union_set Domains
= scop
->getDomains();
3299 isl::union_map Accesses
= isl::union_map::empty(scop
->getParamSpace());
3301 for (MemoryAccess
*MA
: AliasGroup
)
3302 Accesses
= Accesses
.add_map(MA
->getAccessRelation());
3304 Accesses
= Accesses
.intersect_domain(Domains
);
3305 isl::union_set Locations
= Accesses
.range();
3307 bool LimitReached
= false;
3308 for (isl::set Set
: Locations
.get_set_list()) {
3309 LimitReached
|= !buildMinMaxAccess(Set
, MinMaxAccesses
, *scop
);
3314 return !LimitReached
;
3317 static isl::set
getAccessDomain(MemoryAccess
*MA
) {
3318 isl::set Domain
= MA
->getStatement()->getDomain();
3319 Domain
= Domain
.project_out(isl::dim::set
, 0, Domain
.n_dim());
3320 return Domain
.reset_tuple_id();
3323 bool ScopBuilder::buildAliasChecks() {
3324 if (!PollyUseRuntimeAliasChecks
)
3327 if (buildAliasGroups()) {
3328 // Aliasing assumptions do not go through addAssumption but we still want to
3329 // collect statistics so we do it here explicitly.
3330 if (scop
->getAliasGroups().size())
3331 Scop::incrementNumberOfAliasingAssumptions(1);
3335 // If a problem occurs while building the alias groups we need to delete
3336 // this SCoP and pretend it wasn't valid in the first place. To this end
3337 // we make the assumed context infeasible.
3338 scop
->invalidate(ALIASING
, DebugLoc());
3341 dbgs() << "\n\nNOTE: Run time checks for " << scop
->getNameStr()
3342 << " could not be created as the number of parameters involved "
3343 "is too high. The SCoP will be "
3344 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3345 "the maximal number of parameters but be advised that the "
3346 "compile time might increase exponentially.\n\n");
3350 std::tuple
<ScopBuilder::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3351 ScopBuilder::buildAliasGroupsForAccesses() {
3352 AliasSetTracker
AST(AA
);
3354 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3355 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3356 for (ScopStmt
&Stmt
: *scop
) {
3358 isl::set StmtDomain
= Stmt
.getDomain();
3359 bool StmtDomainEmpty
= StmtDomain
.is_empty();
3361 // Statements with an empty domain will never be executed.
3362 if (StmtDomainEmpty
)
3365 for (MemoryAccess
*MA
: Stmt
) {
3366 if (MA
->isScalarKind())
3369 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3370 MemAccInst
Acc(MA
->getAccessInstruction());
3371 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3372 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3374 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3379 AliasGroupVectorTy AliasGroups
;
3380 for (AliasSet
&AS
: AST
) {
3381 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3385 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3388 AliasGroups
.push_back(std::move(AG
));
3391 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3394 bool ScopBuilder::buildAliasGroups() {
3395 // To create sound alias checks we perform the following steps:
3396 // o) We partition each group into read only and non read only accesses.
3397 // o) For each group with more than one base pointer we then compute minimal
3398 // and maximal accesses to each array of a group in read only and non
3399 // read only partitions separately.
3400 AliasGroupVectorTy AliasGroups
;
3401 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3403 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses();
3405 splitAliasGroupsByDomain(AliasGroups
);
3407 for (AliasGroupTy
&AG
: AliasGroups
) {
3408 if (!scop
->hasFeasibleRuntimeContext())
3412 IslMaxOperationsGuard
MaxOpGuard(scop
->getIslCtx().get(), OptComputeOut
);
3413 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3417 if (isl_ctx_last_error(scop
->getIslCtx().get()) == isl_error_quota
) {
3418 scop
->invalidate(COMPLEXITY
, DebugLoc());
3426 bool ScopBuilder::buildAliasGroup(
3427 AliasGroupTy
&AliasGroup
, DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3428 AliasGroupTy ReadOnlyAccesses
;
3429 AliasGroupTy ReadWriteAccesses
;
3430 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3431 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3433 if (AliasGroup
.size() < 2)
3436 for (MemoryAccess
*Access
: AliasGroup
) {
3437 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3438 Access
->getAccessInstruction())
3439 << "Possibly aliasing pointer, use restrict keyword.");
3440 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3441 if (HasWriteAccess
.count(Array
)) {
3442 ReadWriteArrays
.insert(Array
);
3443 ReadWriteAccesses
.push_back(Access
);
3445 ReadOnlyArrays
.insert(Array
);
3446 ReadOnlyAccesses
.push_back(Access
);
3450 // If there are no read-only pointers, and less than two read-write pointers,
3451 // no alias check is needed.
3452 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3455 // If there is no read-write pointer, no alias check is needed.
3456 if (ReadWriteArrays
.empty())
3459 // For non-affine accesses, no alias check can be generated as we cannot
3460 // compute a sufficiently tight lower and upper bound: bail out.
3461 for (MemoryAccess
*MA
: AliasGroup
) {
3462 if (!MA
->isAffine()) {
3463 scop
->invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3464 MA
->getAccessInstruction()->getParent());
3469 // Ensure that for all memory accesses for which we generate alias checks,
3470 // their base pointers are available.
3471 for (MemoryAccess
*MA
: AliasGroup
) {
3472 if (MemoryAccess
*BasePtrMA
= scop
->lookupBasePtrAccess(MA
))
3473 scop
->addRequiredInvariantLoad(
3474 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3477 // scop->getAliasGroups().emplace_back();
3478 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3479 Scop::MinMaxVectorTy MinMaxAccessesReadWrite
;
3480 Scop::MinMaxVectorTy MinMaxAccessesReadOnly
;
3484 Valid
= calculateMinMaxAccess(ReadWriteAccesses
, MinMaxAccessesReadWrite
);
3489 // Bail out if the number of values we need to compare is too large.
3490 // This is important as the number of comparisons grows quadratically with
3491 // the number of values we need to compare.
3492 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3493 RunTimeChecksMaxArraysPerGroup
)
3496 Valid
= calculateMinMaxAccess(ReadOnlyAccesses
, MinMaxAccessesReadOnly
);
3498 scop
->addAliasGroup(MinMaxAccessesReadWrite
, MinMaxAccessesReadOnly
);
3505 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3506 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3508 AliasGroupTy
&AG
= AliasGroups
[u
];
3509 AliasGroupTy::iterator AGI
= AG
.begin();
3510 isl::set AGDomain
= getAccessDomain(*AGI
);
3511 while (AGI
!= AG
.end()) {
3512 MemoryAccess
*MA
= *AGI
;
3513 isl::set MADomain
= getAccessDomain(MA
);
3514 if (AGDomain
.is_disjoint(MADomain
)) {
3515 NewAG
.push_back(MA
);
3516 AGI
= AG
.erase(AGI
);
3518 AGDomain
= AGDomain
.unite(MADomain
);
3522 if (NewAG
.size() > 1)
3523 AliasGroups
.push_back(std::move(NewAG
));
3528 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
3529 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
3530 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
3531 assert(PhysUse
.getKind() == VirtUse
.getKind());
3534 /// Check the consistency of every statement's MemoryAccesses.
3536 /// The check is carried out by expecting the "physical" kind of use (derived
3537 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3538 /// kind of use (derived from a statement's MemoryAccess).
3540 /// The "physical" uses are taken by ensureValueRead to determine whether to
3541 /// create MemoryAccesses. When done, the kind of scalar access should be the
3542 /// same no matter which way it was derived.
3544 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3545 /// can intentionally influence on the kind of uses (not corresponding to the
3546 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3547 /// to pick up the virtual uses. But here in the code generator, this has not
3548 /// happened yet, such that virtual and physical uses are equivalent.
3549 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
3550 for (auto *BB
: S
->getRegion().blocks()) {
3551 for (auto &Inst
: *BB
) {
3552 auto *Stmt
= S
->getStmtFor(&Inst
);
3556 if (isIgnoredIntrinsic(&Inst
))
3559 // Branch conditions are encoded in the statement domains.
3560 if (Inst
.isTerminator() && Stmt
->isBlockStmt())
3564 for (auto &Op
: Inst
.operands())
3565 verifyUse(S
, Op
, LI
);
3567 // Stores do not produce values used by other statements.
3568 if (isa
<StoreInst
>(Inst
))
3571 // For every value defined in the block, also check that a use of that
3572 // value in the same statement would not be an inter-statement use. It can
3573 // still be synthesizable or load-hoisted, but these kind of instructions
3574 // are not directly copied in code-generation.
3576 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
3577 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
3578 VirtDef
.getKind() == VirtualUse::Intra
||
3579 VirtDef
.getKind() == VirtualUse::Hoisted
);
3583 if (S
->hasSingleExitEdge())
3586 // PHINodes in the SCoP region's exit block are also uses to be checked.
3587 if (!S
->getRegion().isTopLevelRegion()) {
3588 for (auto &Inst
: *S
->getRegion().getExit()) {
3589 if (!isa
<PHINode
>(Inst
))
3592 for (auto &Op
: Inst
.operands())
3593 verifyUse(S
, Op
, LI
);
3599 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
) {
3600 scop
.reset(new Scop(R
, SE
, LI
, DT
, *SD
.getDetectionContext(&R
), ORE
));
3604 // Create all invariant load instructions first. These are categorized as
3605 // 'synthesizable', therefore are not part of any ScopStmt but need to be
3606 // created somewhere.
3607 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
3608 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
3609 if (isErrorBlock(*BB
, scop
->getRegion(), LI
, DT
))
3612 for (Instruction
&Inst
: *BB
) {
3613 LoadInst
*Load
= dyn_cast
<LoadInst
>(&Inst
);
3617 if (!RIL
.count(Load
))
3620 // Invariant loads require a MemoryAccess to be created in some statement.
3621 // It is not important to which statement the MemoryAccess is added
3622 // because it will later be removed from the ScopStmt again. We chose the
3623 // first statement of the basic block the LoadInst is in.
3624 ArrayRef
<ScopStmt
*> List
= scop
->getStmtListFor(BB
);
3625 assert(!List
.empty());
3626 ScopStmt
*RILStmt
= List
.front();
3627 buildMemoryAccess(Load
, RILStmt
);
3630 buildAccessFunctions();
3632 // In case the region does not have an exiting block we will later (during
3633 // code generation) split the exit block. This will move potential PHI nodes
3634 // from the current exit block into the new region exiting block. Hence, PHI
3635 // nodes that are at this point not part of the region will be.
3636 // To handle these PHI nodes later we will now model their operands as scalar
3637 // accesses. Note that we do not model anything in the exit block if we have
3638 // an exiting block in the region, as there will not be any splitting later.
3639 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge()) {
3640 for (Instruction
&Inst
: *R
.getExit()) {
3641 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
3645 buildPHIAccesses(nullptr, PHI
, nullptr, true);
3649 // Create memory accesses for global reads since all arrays are now known.
3650 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
3651 for (auto GlobalReadPair
: GlobalReads
) {
3652 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
3653 Instruction
*GlobalRead
= GlobalReadPair
.second
;
3654 for (auto *BP
: ArrayBasePointers
)
3655 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
3656 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
3659 buildInvariantEquivalenceClasses();
3661 /// A map from basic blocks to their invalid domains.
3662 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
3664 if (!buildDomains(&R
, InvalidDomainMap
)) {
3666 dbgs() << "Bailing-out because buildDomains encountered problems\n");
3670 addUserAssumptions(AC
, InvalidDomainMap
);
3672 // Initialize the invalid domain.
3673 for (ScopStmt
&Stmt
: scop
->Stmts
)
3674 if (Stmt
.isBlockStmt())
3675 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()]);
3677 Stmt
.setInvalidDomain(InvalidDomainMap
[getRegionNodeBasicBlock(
3678 Stmt
.getRegion()->getNode())]);
3680 // Remove empty statements.
3681 // Exit early in case there are no executable statements left in this scop.
3682 scop
->removeStmtNotInDomainMap();
3683 scop
->simplifySCoP(false);
3684 if (scop
->isEmpty()) {
3685 LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3689 // The ScopStmts now have enough information to initialize themselves.
3690 for (ScopStmt
&Stmt
: *scop
) {
3691 collectSurroundingLoops(Stmt
);
3694 buildAccessRelations(Stmt
);
3696 if (DetectReductions
)
3697 checkForReductions(Stmt
);
3700 // Check early for a feasible runtime context.
3701 if (!scop
->hasFeasibleRuntimeContext()) {
3702 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3706 // Check early for profitability. Afterwards it cannot change anymore,
3707 // only the runtime context could become infeasible.
3708 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
3709 scop
->invalidate(PROFITABLE
, DebugLoc());
3711 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3719 scop
->realignParams();
3722 // After the context was fully constructed, thus all our knowledge about
3723 // the parameters is in there, we add all recorded assumptions to the
3724 // assumed/invalid context.
3725 addRecordedAssumptions();
3727 scop
->simplifyContexts();
3728 if (!buildAliasChecks()) {
3729 LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3733 hoistInvariantLoads();
3734 canonicalizeDynamicBasePtrs();
3735 verifyInvariantLoads();
3736 scop
->simplifySCoP(true);
3738 // Check late for a feasible runtime context because profitability did not
3740 if (!scop
->hasFeasibleRuntimeContext()) {
3741 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3746 verifyUses(scop
.get(), LI
, DT
);
3750 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AliasAnalysis
&AA
,
3751 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
3752 ScopDetection
&SD
, ScalarEvolution
&SE
,
3753 OptimizationRemarkEmitter
&ORE
)
3754 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
), ORE(ORE
) {
3756 auto P
= getBBPairForRegion(R
);
3757 getDebugLocations(P
, Beg
, End
);
3759 std::string Msg
= "SCoP begins here.";
3760 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
3765 LLVM_DEBUG(dbgs() << *scop
);
3767 if (!scop
->hasFeasibleRuntimeContext()) {
3769 Msg
= "SCoP ends here but was dismissed.";
3770 LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3773 Msg
= "SCoP ends here.";
3775 if (scop
->getMaxLoopDepth() > 0)
3779 if (R
->isTopLevelRegion())
3780 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
3783 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)