1 //===- ScopInfo.cpp -------------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/ISLTools.h"
28 #include "polly/Support/SCEVAffinator.h"
29 #include "polly/Support/SCEVValidator.h"
30 #include "polly/Support/ScopHelper.h"
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/DenseSet.h"
35 #include "llvm/ADT/PostOrderIterator.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
42 #include "llvm/ADT/StringExtras.h"
43 #include "llvm/ADT/StringMap.h"
44 #include "llvm/Analysis/AliasAnalysis.h"
45 #include "llvm/Analysis/AliasSetTracker.h"
46 #include "llvm/Analysis/AssumptionCache.h"
47 #include "llvm/Analysis/Loads.h"
48 #include "llvm/Analysis/LoopInfo.h"
49 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
50 #include "llvm/Analysis/RegionInfo.h"
51 #include "llvm/Analysis/RegionIterator.h"
52 #include "llvm/Analysis/ScalarEvolution.h"
53 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
54 #include "llvm/IR/Argument.h"
55 #include "llvm/IR/BasicBlock.h"
56 #include "llvm/IR/CFG.h"
57 #include "llvm/IR/ConstantRange.h"
58 #include "llvm/IR/Constants.h"
59 #include "llvm/IR/DataLayout.h"
60 #include "llvm/IR/DebugLoc.h"
61 #include "llvm/IR/DerivedTypes.h"
62 #include "llvm/IR/DiagnosticInfo.h"
63 #include "llvm/IR/Dominators.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/InstrTypes.h"
66 #include "llvm/IR/Instruction.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/IntrinsicInst.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/PassManager.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/User.h"
74 #include "llvm/IR/Value.h"
75 #include "llvm/Pass.h"
76 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Compiler.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/ErrorHandling.h"
81 #include "llvm/Support/MathExtras.h"
82 #include "llvm/Support/raw_ostream.h"
84 #include "isl/constraint.h"
85 #include "isl/local_space.h"
87 #include "isl/options.h"
88 #include "isl/printer.h"
89 #include "isl/schedule.h"
90 #include "isl/schedule_node.h"
92 #include "isl/union_map.h"
93 #include "isl/union_set.h"
107 using namespace llvm
;
108 using namespace polly
;
110 #define DEBUG_TYPE "polly-scops"
112 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
113 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
114 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
115 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
116 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
117 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
118 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
119 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
120 STATISTIC(AssumptionsInvariantLoad
,
121 "Number of invariant loads assumptions taken.");
122 STATISTIC(AssumptionsDelinearization
,
123 "Number of delinearization assumptions taken.");
125 STATISTIC(NumScops
, "Number of feasible SCoPs after ScopInfo");
126 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
127 STATISTIC(NumBoxedLoops
, "Number of boxed loops in SCoPs after ScopInfo");
128 STATISTIC(NumAffineLoops
, "Number of affine loops in SCoPs after ScopInfo");
130 STATISTIC(NumScopsDepthZero
, "Number of scops with maximal loop depth 0");
131 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
132 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
133 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
134 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
135 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
136 STATISTIC(NumScopsDepthLarger
,
137 "Number of scops with maximal loop depth 6 and larger");
138 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
140 STATISTIC(NumValueWrites
, "Number of scalar value writes after ScopInfo");
142 NumValueWritesInLoops
,
143 "Number of scalar value writes nested in affine loops after ScopInfo");
144 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after ScopInfo");
145 STATISTIC(NumPHIWritesInLoops
,
146 "Number of scalar phi writes nested in affine loops after ScopInfo");
147 STATISTIC(NumSingletonWrites
, "Number of singleton writes after ScopInfo");
148 STATISTIC(NumSingletonWritesInLoops
,
149 "Number of singleton writes nested in affine loops after ScopInfo");
151 // The maximal number of basic sets we allow during domain construction to
152 // be created. More complex scops will result in very high compile time and
153 // are also unlikely to result in good code
154 static int const MaxDisjunctsInDomain
= 20;
156 // The number of disjunct in the context after which we stop to add more
157 // disjuncts. This parameter is there to avoid exponential growth in the
158 // number of disjunct when adding non-convex sets to the context.
159 static int const MaxDisjunctsInContext
= 4;
161 // The maximal number of dimensions we allow during invariant load construction.
162 // More complex access ranges will result in very high compile time and are also
163 // unlikely to result in good code. This value is very high and should only
164 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
165 static int const MaxDimensionsInAccessRange
= 9;
168 OptComputeOut("polly-analysis-computeout",
169 cl::desc("Bound the scop analysis by a maximal amount of "
170 "computational steps (0 means no bound)"),
171 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
172 cl::cat(PollyCategory
));
174 static cl::opt
<bool> PollyRemarksMinimal(
175 "polly-remarks-minimal",
176 cl::desc("Do not emit remarks about assumptions that are known"),
177 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
179 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
180 "polly-rtc-max-array-disjuncts",
181 cl::desc("The maximal number of disjunts allowed in memory accesses to "
183 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
185 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
186 "polly-rtc-max-parameters",
187 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
188 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
190 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
191 "polly-rtc-max-arrays-per-group",
192 cl::desc("The maximal number of arrays to compare in each alias group."),
193 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
195 static cl::opt
<std::string
> UserContextStr(
196 "polly-context", cl::value_desc("isl parameter set"),
197 cl::desc("Provide additional constraints on the context parameters"),
198 cl::init(""), cl::cat(PollyCategory
));
201 IslOnErrorAbort("polly-on-isl-error-abort",
202 cl::desc("Abort if an isl error is encountered"),
203 cl::init(true), cl::cat(PollyCategory
));
205 static cl::opt
<bool> PollyPreciseInbounds(
206 "polly-precise-inbounds",
207 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
208 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
211 PollyIgnoreInbounds("polly-ignore-inbounds",
212 cl::desc("Do not take inbounds assumptions at all"),
213 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
215 static cl::opt
<bool> PollyIgnoreParamBounds(
216 "polly-ignore-parameter-bounds",
218 "Do not add parameter bounds and do no gist simplify sets accordingly"),
219 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
221 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
222 "polly-allow-dereference-of-all-function-parameters",
224 "Treat all parameters to functions that are pointers as dereferencible."
225 " This is useful for invariant load hoisting, since we can generate"
226 " less runtime checks. This is only valid if all pointers to functions"
227 " are always initialized, so that Polly can choose to hoist"
229 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
231 static cl::opt
<bool> PollyPreciseFoldAccesses(
232 "polly-precise-fold-accesses",
233 cl::desc("Fold memory accesses to model more possible delinearizations "
234 "(does not scale well)"),
235 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
237 bool polly::UseInstructionNames
;
239 static cl::opt
<bool, true> XUseInstructionNames(
240 "polly-use-llvm-names",
241 cl::desc("Use LLVM-IR names when deriving statement names"),
242 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
243 cl::ZeroOrMore
, cl::cat(PollyCategory
));
245 static cl::opt
<bool> PollyPrintInstructions(
246 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
247 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
249 //===----------------------------------------------------------------------===//
251 // Create a sequence of two schedules. Either argument may be null and is
252 // interpreted as the empty schedule. Can also return null if both schedules are
254 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
260 return Prev
.sequence(Succ
);
263 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
264 int dim
, isl::dim type
) {
266 isl::ctx Ctx
= S
.get_ctx();
268 // The upper and lower bound for a parameter value is derived either from
269 // the data type of the parameter or from the - possibly more restrictive -
271 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
272 S
= S
.lower_bound_val(type
, dim
, V
);
273 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
274 S
= S
.upper_bound_val(type
, dim
, V
);
276 if (Range
.isFullSet())
279 if (S
.n_basic_set() > MaxDisjunctsInContext
)
282 // In case of signed wrapping, we can refine the set of valid values by
283 // excluding the part not covered by the wrapping range.
284 if (Range
.isSignWrappedSet()) {
285 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
286 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
288 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
290 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
297 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
298 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
302 if (!S
->contains(BasePtrLI
))
305 ScalarEvolution
&SE
= *S
->getSE();
307 auto *OriginBaseSCEV
=
308 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
312 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
313 if (!OriginBaseSCEVUnknown
)
316 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
320 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl::ctx Ctx
,
321 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
322 const DataLayout
&DL
, Scop
*S
,
323 const char *BaseName
)
324 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
325 std::string BasePtrName
=
327 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
328 Kind
== MemoryKind::PHI
? "__phi" : "",
329 UseInstructionNames
);
330 Id
= isl::id::alloc(Ctx
, BasePtrName
, this);
334 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
335 BasePtrOriginSAI
= nullptr;
339 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
340 if (BasePtrOriginSAI
)
341 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
344 ScopArrayInfo::~ScopArrayInfo() = default;
346 isl::space
ScopArrayInfo::getSpace() const {
347 auto Space
= isl::space(Id
.get_ctx(), 0, getNumberOfDimensions());
348 Space
= Space
.set_tuple_id(isl::dim::set
, Id
);
352 bool ScopArrayInfo::isReadOnly() {
353 isl::union_set WriteSet
= S
.getWrites().range();
354 isl::space Space
= getSpace();
355 WriteSet
= WriteSet
.extract_set(Space
);
357 return bool(WriteSet
.is_empty());
360 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
361 if (Array
->getElementType() != getElementType())
364 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
367 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
368 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
374 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
375 if (NewElementType
== ElementType
)
378 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
379 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
381 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
384 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
385 ElementType
= NewElementType
;
387 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
388 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
392 /// Make the ScopArrayInfo model a Fortran Array
393 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
394 assert(FAD
&& "got invalid Fortran array descriptor");
396 assert(this->FAD
== FAD
&&
397 "receiving different array descriptors for same array");
401 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
405 isl::space
Space(S
.getIslCtx(), 1, 0);
407 std::string param_name
= getName();
408 param_name
+= "_fortranarr_size";
409 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
, this);
411 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
413 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
415 DimensionSizesPw
[0] = PwAff
;
418 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
419 bool CheckConsistency
) {
420 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
421 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
422 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
424 if (CheckConsistency
) {
425 for (int i
= 0; i
< SharedDims
; i
++) {
426 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
427 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
428 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
432 if (DimensionSizes
.size() >= NewSizes
.size())
436 DimensionSizes
.clear();
437 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
439 DimensionSizesPw
.clear();
440 for (const SCEV
*Expr
: DimensionSizes
) {
442 DimensionSizesPw
.push_back(nullptr);
445 isl::pw_aff Size
= S
.getPwAffOnly(Expr
);
446 DimensionSizesPw
.push_back(Size
);
451 std::string
ScopArrayInfo::getName() const { return Id
.get_name(); }
453 int ScopArrayInfo::getElemSizeInBytes() const {
454 return DL
.getTypeAllocSize(ElementType
);
457 isl::id
ScopArrayInfo::getBasePtrId() const { return Id
; }
459 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
460 LLVM_DUMP_METHOD
void ScopArrayInfo::dump() const { print(errs()); }
463 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
464 OS
.indent(8) << *getElementType() << " " << getName();
466 // If this is a Fortran array, then we can print the outermost dimension
467 // as a isl_pw_aff even though there is no SCEV information.
468 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
470 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
471 !getDimensionSize(0)) {
475 for (; u
< getNumberOfDimensions(); u
++) {
479 isl::pw_aff Size
= getDimensionSizePw(u
);
480 OS
<< " " << Size
<< " ";
482 OS
<< *getDimensionSize(u
);
490 if (BasePtrOriginSAI
)
491 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
493 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
496 const ScopArrayInfo
*
497 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA
) {
498 isl::id Id
= PMA
.get_tuple_id(isl::dim::out
);
499 assert(!Id
.is_null() && "Output dimension didn't have an ID");
500 return getFromId(Id
);
503 const ScopArrayInfo
*ScopArrayInfo::getFromId(isl::id Id
) {
504 void *User
= Id
.get_user();
505 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
509 void MemoryAccess::wrapConstantDimensions() {
510 auto *SAI
= getScopArrayInfo();
511 isl::space ArraySpace
= SAI
->getSpace();
512 isl::ctx Ctx
= ArraySpace
.get_ctx();
513 unsigned DimsArray
= SAI
->getNumberOfDimensions();
515 isl::multi_aff DivModAff
= isl::multi_aff::identity(
516 ArraySpace
.map_from_domain_and_range(ArraySpace
));
517 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
519 // Begin with last dimension, to iteratively carry into higher dimensions.
520 for (int i
= DimsArray
- 1; i
> 0; i
--) {
521 auto *DimSize
= SAI
->getDimensionSize(i
);
522 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
524 // This transformation is not applicable to dimensions with dynamic size.
528 // This transformation is not applicable to dimensions of size zero.
529 if (DimSize
->isZero())
532 isl::val DimSizeVal
=
533 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
534 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
536 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
538 // Compute: index % size
539 // Modulo must apply in the divide of the previous iteration, if any.
540 isl::aff Modulo
= Var
.mod(DimSizeVal
);
541 Modulo
= Modulo
.pullback(DivModAff
);
543 // Compute: floor(index / size)
544 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
545 Divide
= Divide
.floor();
546 Divide
= Divide
.add(PrevVar
);
547 Divide
= Divide
.pullback(DivModAff
);
549 // Apply Modulo and Divide.
550 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
551 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
554 // Apply all modulo/divides on the accesses.
555 isl::map Relation
= AccessRelation
;
556 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
557 Relation
= Relation
.detect_equalities();
558 AccessRelation
= Relation
;
561 void MemoryAccess::updateDimensionality() {
562 auto *SAI
= getScopArrayInfo();
563 isl::space ArraySpace
= SAI
->getSpace();
564 isl::space AccessSpace
= AccessRelation
.get_space().range();
565 isl::ctx Ctx
= ArraySpace
.get_ctx();
567 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
568 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
569 auto DimsMissing
= DimsArray
- DimsAccess
;
571 auto *BB
= getStatement()->getEntryBlock();
572 auto &DL
= BB
->getModule()->getDataLayout();
573 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
574 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
576 isl::map Map
= isl::map::from_domain_and_range(
577 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
579 for (unsigned i
= 0; i
< DimsMissing
; i
++)
580 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
582 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
583 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
585 AccessRelation
= AccessRelation
.apply_range(Map
);
587 // For the non delinearized arrays, divide the access function of the last
588 // subscript by the size of the elements in the array.
590 // A stride one array access in C expressed as A[i] is expressed in
591 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
592 // two subsequent values of 'i' index two values that are stored next to
593 // each other in memory. By this division we make this characteristic
594 // obvious again. If the base pointer was accessed with offsets not divisible
595 // by the accesses element size, we will have chosen a smaller ArrayElemSize
596 // that divides the offsets of all accesses to this base pointer.
597 if (DimsAccess
== 1) {
598 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
599 AccessRelation
= AccessRelation
.floordiv_val(V
);
602 // We currently do this only if we added at least one dimension, which means
603 // some dimension's indices have not been specified, an indicator that some
604 // index values have been added together.
605 // TODO: Investigate general usefulness; Effect on unit tests is to make index
606 // expressions more complicated.
608 wrapConstantDimensions();
611 computeBoundsOnAccessRelation(ArrayElemSize
);
613 // Introduce multi-element accesses in case the type loaded by this memory
614 // access is larger than the canonical element type of the array.
616 // An access ((float *)A)[i] to an array char *A is modeled as
617 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
618 if (ElemBytes
> ArrayElemSize
) {
619 assert(ElemBytes
% ArrayElemSize
== 0 &&
620 "Loaded element size should be multiple of canonical element size");
621 isl::map Map
= isl::map::from_domain_and_range(
622 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
623 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
624 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
629 LS
= isl::local_space(Map
.get_space());
630 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
632 C
= isl::constraint::alloc_inequality(LS
);
633 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
634 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
635 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
636 Map
= Map
.add_constraint(C
);
638 C
= isl::constraint::alloc_inequality(LS
);
639 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
640 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
641 C
= C
.set_constant_val(isl::val(Ctx
, 0));
642 Map
= Map
.add_constraint(C
);
643 AccessRelation
= AccessRelation
.apply_range(Map
);
648 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
650 case MemoryAccess::RT_NONE
:
651 llvm_unreachable("Requested a reduction operator string for a memory "
652 "access which isn't a reduction");
653 case MemoryAccess::RT_ADD
:
655 case MemoryAccess::RT_MUL
:
657 case MemoryAccess::RT_BOR
:
659 case MemoryAccess::RT_BXOR
:
661 case MemoryAccess::RT_BAND
:
664 llvm_unreachable("Unknown reduction type");
667 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
668 isl::id ArrayId
= getArrayId();
669 void *User
= ArrayId
.get_user();
670 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
674 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
675 isl::id ArrayId
= getLatestArrayId();
676 void *User
= ArrayId
.get_user();
677 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
681 isl::id
MemoryAccess::getOriginalArrayId() const {
682 return AccessRelation
.get_tuple_id(isl::dim::out
);
685 isl::id
MemoryAccess::getLatestArrayId() const {
686 if (!hasNewAccessRelation())
687 return getOriginalArrayId();
688 return NewAccessRelation
.get_tuple_id(isl::dim::out
);
691 isl::map
MemoryAccess::getAddressFunction() const {
692 return getAccessRelation().lexmin();
696 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule
) const {
697 isl::map Schedule
, ScheduledAccRel
;
698 isl::union_set UDomain
;
700 UDomain
= getStatement()->getDomain();
701 USchedule
= USchedule
.intersect_domain(UDomain
);
702 Schedule
= isl::map::from_union_map(USchedule
);
703 ScheduledAccRel
= getAddressFunction().apply_domain(Schedule
);
704 return isl::pw_multi_aff::from_map(ScheduledAccRel
);
707 isl::map
MemoryAccess::getOriginalAccessRelation() const {
708 return AccessRelation
;
711 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
712 return AccessRelation
.to_str();
715 isl::space
MemoryAccess::getOriginalAccessRelationSpace() const {
716 return AccessRelation
.get_space();
719 isl::map
MemoryAccess::getNewAccessRelation() const {
720 return NewAccessRelation
;
723 std::string
MemoryAccess::getNewAccessRelationStr() const {
724 return NewAccessRelation
.to_str();
727 std::string
MemoryAccess::getAccessRelationStr() const {
728 return getAccessRelation().to_str();
731 isl::basic_map
MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
732 isl::space Space
= isl::space(Statement
->getIslCtx(), 0, 1);
733 Space
= Space
.align_params(Statement
->getDomainSpace());
735 return isl::basic_map::from_domain_and_range(
736 isl::basic_set::universe(Statement
->getDomainSpace()),
737 isl::basic_set::universe(Space
));
740 // Formalize no out-of-bound access assumption
742 // When delinearizing array accesses we optimistically assume that the
743 // delinearized accesses do not access out of bound locations (the subscript
744 // expression of each array evaluates for each statement instance that is
745 // executed to a value that is larger than zero and strictly smaller than the
746 // size of the corresponding dimension). The only exception is the outermost
747 // dimension for which we do not need to assume any upper bound. At this point
748 // we formalize this assumption to ensure that at code generation time the
749 // relevant run-time checks can be generated.
751 // To find the set of constraints necessary to avoid out of bound accesses, we
752 // first build the set of data locations that are not within array bounds. We
753 // then apply the reverse access relation to obtain the set of iterations that
754 // may contain invalid accesses and reduce this set of iterations to the ones
755 // that are actually executed by intersecting them with the domain of the
756 // statement. If we now project out all loop dimensions, we obtain a set of
757 // parameters that may cause statement instances to be executed that may
758 // possibly yield out of bound memory accesses. The complement of these
759 // constraints is the set of constraints that needs to be assumed to ensure such
760 // statement instances are never executed.
761 void MemoryAccess::assumeNoOutOfBound() {
762 if (PollyIgnoreInbounds
)
764 auto *SAI
= getScopArrayInfo();
765 isl::space Space
= getOriginalAccessRelationSpace().range();
766 isl::set Outside
= isl::set::empty(Space
);
767 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
768 isl::local_space
LS(Space
);
769 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
770 isl::pw_aff Zero
= isl::pw_aff(LS
);
772 isl::set DimOutside
= Var
.lt_set(Zero
);
773 isl::pw_aff SizeE
= SAI
->getDimensionSizePw(i
);
774 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
775 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
776 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
778 Outside
= Outside
.unite(DimOutside
);
781 Outside
= Outside
.apply(getAccessRelation().reverse());
782 Outside
= Outside
.intersect(Statement
->getDomain());
783 Outside
= Outside
.params();
785 // Remove divs to avoid the construction of overly complicated assumptions.
786 // Doing so increases the set of parameter combinations that are assumed to
787 // not appear. This is always save, but may make the resulting run-time check
788 // bail out more often than strictly necessary.
789 Outside
= Outside
.remove_divs();
790 Outside
= Outside
.complement();
791 const auto &Loc
= getAccessInstruction()
792 ? getAccessInstruction()->getDebugLoc()
794 if (!PollyPreciseInbounds
)
795 Outside
= Outside
.gist_params(Statement
->getDomain().params());
796 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
, Loc
,
800 void MemoryAccess::buildMemIntrinsicAccessRelation() {
801 assert(isMemoryIntrinsic());
802 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
804 isl::pw_aff SubscriptPWA
= getPwAff(Subscripts
[0]);
805 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
808 if (Subscripts
[1] == nullptr) {
809 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
811 isl::pw_aff LengthPWA
= getPwAff(Subscripts
[1]);
812 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
813 isl::space RangeSpace
= LengthMap
.get_space().range();
814 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
816 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
817 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
818 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
819 LengthMap
= LengthMap
.sum(SubscriptMap
);
821 LengthMap
.set_tuple_id(isl::dim::in
, getStatement()->getDomainId());
824 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
825 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
827 auto MAI
= MemAccInst(getAccessInstruction());
828 if (isa
<MemIntrinsic
>(MAI
))
831 Value
*Ptr
= MAI
.getPointerOperand();
832 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
835 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
836 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
839 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
840 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
841 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
843 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
844 if (Range
.isFullSet())
847 if (Range
.isWrappedSet() || Range
.isSignWrappedSet())
850 bool isWrapping
= Range
.isSignWrappedSet();
852 unsigned BW
= Range
.getBitWidth();
853 const auto One
= APInt(BW
, 1);
854 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
855 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
857 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
858 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
860 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
862 isl::map Relation
= AccessRelation
;
863 isl::set AccessRange
= Relation
.range();
864 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
866 AccessRelation
= Relation
.intersect_range(AccessRange
);
869 void MemoryAccess::foldAccessRelation() {
870 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
873 int Size
= Subscripts
.size();
875 isl::map NewAccessRelation
= AccessRelation
;
877 for (int i
= Size
- 2; i
>= 0; --i
) {
879 isl::map MapOne
, MapTwo
;
880 isl::pw_aff DimSize
= getPwAff(Sizes
[i
+ 1]);
882 isl::space SpaceSize
= DimSize
.get_space();
883 isl::id ParamId
= SpaceSize
.get_dim_id(isl::dim::param
, 0);
885 Space
= AccessRelation
.get_space();
886 Space
= Space
.range().map_from_set();
887 Space
= Space
.align_params(SpaceSize
);
889 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
891 MapOne
= isl::map::universe(Space
);
892 for (int j
= 0; j
< Size
; ++j
)
893 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
894 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
896 MapTwo
= isl::map::universe(Space
);
897 for (int j
= 0; j
< Size
; ++j
)
898 if (j
< i
|| j
> i
+ 1)
899 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
901 isl::local_space
LS(Space
);
903 C
= isl::constraint::alloc_equality(LS
);
904 C
= C
.set_constant_si(-1);
905 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
906 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
907 MapTwo
= MapTwo
.add_constraint(C
);
908 C
= isl::constraint::alloc_equality(LS
);
909 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
910 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
911 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
912 MapTwo
= MapTwo
.add_constraint(C
);
913 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
915 MapOne
= MapOne
.unite(MapTwo
);
916 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
919 isl::id BaseAddrId
= getScopArrayInfo()->getBasePtrId();
920 isl::space Space
= Statement
->getDomainSpace();
921 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
922 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
923 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
924 NewAccessRelation
= NewAccessRelation
.gist_domain(Statement
->getDomain());
926 // Access dimension folding might in certain cases increase the number of
927 // disjuncts in the memory access, which can possibly complicate the generated
928 // run-time checks and can lead to costly compilation.
929 if (!PollyPreciseFoldAccesses
&&
930 isl_map_n_basic_map(NewAccessRelation
.get()) >
931 isl_map_n_basic_map(AccessRelation
.get())) {
933 AccessRelation
= NewAccessRelation
;
937 /// Check if @p Expr is divisible by @p Size.
938 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
943 // Only one factor needs to be divisible.
944 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
945 for (auto *FactorExpr
: MulExpr
->operands())
946 if (isDivisible(FactorExpr
, Size
, SE
))
951 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
953 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
954 for (auto *OpExpr
: NAryExpr
->operands())
955 if (!isDivisible(OpExpr
, Size
, SE
))
960 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
961 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
962 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
963 return MulSCEV
== Expr
;
966 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
967 assert(AccessRelation
.is_null() && "AccessRelation already built");
969 // Initialize the invalid domain which describes all iterations for which the
970 // access relation is not modeled correctly.
971 isl::set StmtInvalidDomain
= getStatement()->getInvalidDomain();
972 InvalidDomain
= isl::set::empty(StmtInvalidDomain
.get_space());
974 isl::ctx Ctx
= Id
.get_ctx();
975 isl::id BaseAddrId
= SAI
->getBasePtrId();
977 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
978 buildMemIntrinsicAccessRelation();
979 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
984 // We overapproximate non-affine accesses with a possible access to the
985 // whole array. For read accesses it does not make a difference, if an
986 // access must or may happen. However, for write accesses it is important to
987 // differentiate between writes that must happen and writes that may happen.
988 if (AccessRelation
.is_null())
989 AccessRelation
= createBasicAccessMap(Statement
);
991 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
995 isl::space Space
= isl::space(Ctx
, 0, Statement
->getNumIterators(), 0);
996 AccessRelation
= isl::map::universe(Space
);
998 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
999 isl::pw_aff Affine
= getPwAff(Subscripts
[i
]);
1000 isl::map SubscriptMap
= isl::map::from_pw_aff(Affine
);
1001 AccessRelation
= AccessRelation
.flat_range_product(SubscriptMap
);
1004 Space
= Statement
->getDomainSpace();
1005 AccessRelation
= AccessRelation
.set_tuple_id(
1006 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
1007 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1009 AccessRelation
= AccessRelation
.gist_domain(Statement
->getDomain());
1012 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1013 AccessType AccType
, Value
*BaseAddress
,
1014 Type
*ElementType
, bool Affine
,
1015 ArrayRef
<const SCEV
*> Subscripts
,
1016 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1018 : Kind(Kind
), AccType(AccType
), Statement(Stmt
), InvalidDomain(nullptr),
1019 BaseAddr(BaseAddress
), ElementType(ElementType
),
1020 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1021 AccessValue(AccessValue
), IsAffine(Affine
),
1022 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1023 NewAccessRelation(nullptr), FAD(nullptr) {
1024 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1025 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1027 std::string IdName
= Stmt
->getBaseName() + Access
;
1028 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1031 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
, isl::map AccRel
)
1032 : Kind(MemoryKind::Array
), AccType(AccType
), Statement(Stmt
),
1033 InvalidDomain(nullptr), AccessRelation(nullptr),
1034 NewAccessRelation(AccRel
), FAD(nullptr) {
1035 isl::id ArrayInfoId
= NewAccessRelation
.get_tuple_id(isl::dim::out
);
1036 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1037 Sizes
.push_back(nullptr);
1038 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1039 Sizes
.push_back(SAI
->getDimensionSize(i
));
1040 ElementType
= SAI
->getElementType();
1041 BaseAddr
= SAI
->getBasePtr();
1042 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1043 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1045 std::string IdName
= Stmt
->getBaseName() + Access
;
1046 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1049 MemoryAccess::~MemoryAccess() = default;
1051 void MemoryAccess::realignParams() {
1052 isl::set Ctx
= Statement
->getParent()->getContext();
1053 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1054 AccessRelation
= AccessRelation
.gist_params(Ctx
);
1057 const std::string
MemoryAccess::getReductionOperatorStr() const {
1058 return MemoryAccess::getReductionOperatorStr(getReductionType());
1061 isl::id
MemoryAccess::getId() const { return Id
; }
1063 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1064 MemoryAccess::ReductionType RT
) {
1065 if (RT
== MemoryAccess::RT_NONE
)
1068 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1072 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1074 void MemoryAccess::print(raw_ostream
&OS
) const {
1077 OS
.indent(12) << "ReadAccess :=\t";
1080 OS
.indent(12) << "MustWriteAccess :=\t";
1083 OS
.indent(12) << "MayWriteAccess :=\t";
1087 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1090 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1094 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1095 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1096 if (hasNewAccessRelation())
1097 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 LLVM_DUMP_METHOD
void MemoryAccess::dump() const { print(errs()); }
1104 isl::pw_aff
MemoryAccess::getPwAff(const SCEV
*E
) {
1105 auto *Stmt
= getStatement();
1106 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1107 isl::set StmtDom
= getStatement()->getDomain();
1108 StmtDom
= StmtDom
.reset_tuple_id();
1109 isl::set NewInvalidDom
= StmtDom
.intersect(PWAC
.second
);
1110 InvalidDomain
= InvalidDomain
.unite(NewInvalidDom
);
1114 // Create a map in the size of the provided set domain, that maps from the
1115 // one element of the provided set domain to another element of the provided
1117 // The mapping is limited to all points that are equal in all but the last
1118 // dimension and for which the last dimension of the input is strict smaller
1119 // than the last dimension of the output.
1121 // getEqualAndLarger(set[i0, i1, ..., iX]):
1123 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1124 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1126 static isl::map
getEqualAndLarger(isl::space SetDomain
) {
1127 isl::space Space
= SetDomain
.map_from_set();
1128 isl::map Map
= isl::map::universe(Space
);
1129 unsigned lastDimension
= Map
.dim(isl::dim::in
) - 1;
1131 // Set all but the last dimension to be equal for the input and output
1133 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1134 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1135 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1136 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
1138 // Set the last dimension of the input to be strict smaller than the
1139 // last dimension of the output.
1141 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1142 Map
= Map
.order_lt(isl::dim::in
, lastDimension
, isl::dim::out
, lastDimension
);
1146 isl::set
MemoryAccess::getStride(isl::map Schedule
) const {
1147 isl::map AccessRelation
= getAccessRelation();
1148 isl::space Space
= Schedule
.get_space().range();
1149 isl::map NextScatt
= getEqualAndLarger(Space
);
1151 Schedule
= Schedule
.reverse();
1152 NextScatt
= NextScatt
.lexmin();
1154 NextScatt
= NextScatt
.apply_range(Schedule
);
1155 NextScatt
= NextScatt
.apply_range(AccessRelation
);
1156 NextScatt
= NextScatt
.apply_domain(Schedule
);
1157 NextScatt
= NextScatt
.apply_domain(AccessRelation
);
1159 isl::set Deltas
= NextScatt
.deltas();
1163 bool MemoryAccess::isStrideX(isl::map Schedule
, int StrideWidth
) const {
1164 isl::set Stride
, StrideX
;
1167 Stride
= getStride(Schedule
);
1168 StrideX
= isl::set::universe(Stride
.get_space());
1169 for (unsigned i
= 0; i
< StrideX
.dim(isl::dim::set
) - 1; i
++)
1170 StrideX
= StrideX
.fix_si(isl::dim::set
, i
, 0);
1171 StrideX
= StrideX
.fix_si(isl::dim::set
, StrideX
.dim(isl::dim::set
) - 1,
1173 IsStrideX
= Stride
.is_subset(StrideX
);
1178 bool MemoryAccess::isStrideZero(isl::map Schedule
) const {
1179 return isStrideX(Schedule
, 0);
1182 bool MemoryAccess::isStrideOne(isl::map Schedule
) const {
1183 return isStrideX(Schedule
, 1);
1186 void MemoryAccess::setAccessRelation(isl::map NewAccess
) {
1187 AccessRelation
= NewAccess
;
1190 void MemoryAccess::setNewAccessRelation(isl::map NewAccess
) {
1194 // Check domain space compatibility.
1195 isl::space NewSpace
= NewAccess
.get_space();
1196 isl::space NewDomainSpace
= NewSpace
.domain();
1197 isl::space OriginalDomainSpace
= getStatement()->getDomainSpace();
1198 assert(OriginalDomainSpace
.has_equal_tuples(NewDomainSpace
));
1200 // Reads must be executed unconditionally. Writes might be executed in a
1203 // Check whether there is an access for every statement instance.
1204 isl::set StmtDomain
= getStatement()->getDomain();
1206 StmtDomain
.intersect_params(getStatement()->getParent()->getContext());
1207 isl::set NewDomain
= NewAccess
.domain();
1208 assert(StmtDomain
.is_subset(NewDomain
) &&
1209 "Partial READ accesses not supported");
1212 isl::space NewAccessSpace
= NewAccess
.get_space();
1213 assert(NewAccessSpace
.has_tuple_id(isl::dim::set
) &&
1214 "Must specify the array that is accessed");
1215 isl::id NewArrayId
= NewAccessSpace
.get_tuple_id(isl::dim::set
);
1216 auto *SAI
= static_cast<ScopArrayInfo
*>(NewArrayId
.get_user());
1217 assert(SAI
&& "Must set a ScopArrayInfo");
1219 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1220 InvariantEquivClassTy
*EqClass
=
1221 getStatement()->getParent()->lookupInvariantEquivClass(
1224 "Access functions to indirect arrays must have an invariant and "
1225 "hoisted base pointer");
1228 // Check whether access dimensions correspond to number of dimensions of the
1230 auto Dims
= SAI
->getNumberOfDimensions();
1231 assert(NewAccessSpace
.dim(isl::dim::set
) == Dims
&&
1232 "Access dims must match array dims");
1235 NewAccess
= NewAccess
.gist_domain(getStatement()->getDomain());
1236 NewAccessRelation
= NewAccess
;
1239 bool MemoryAccess::isLatestPartialAccess() const {
1240 isl::set StmtDom
= getStatement()->getDomain();
1241 isl::set AccDom
= getLatestAccessRelation().domain();
1243 return !StmtDom
.is_subset(AccDom
);
1246 //===----------------------------------------------------------------------===//
1248 isl::map
ScopStmt::getSchedule() const {
1249 isl::set Domain
= getDomain();
1250 if (Domain
.is_empty())
1251 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1252 auto Schedule
= getParent()->getSchedule();
1255 Schedule
= Schedule
.intersect_domain(isl::union_set(Domain
));
1256 if (Schedule
.is_empty())
1257 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1258 isl::map M
= M
.from_union_map(Schedule
);
1260 M
= M
.gist_domain(Domain
);
1265 void ScopStmt::restrictDomain(isl::set NewDomain
) {
1266 assert(NewDomain
.is_subset(Domain
) &&
1267 "New domain is not a subset of old domain!");
1271 void ScopStmt::addAccess(MemoryAccess
*Access
, bool Prepend
) {
1272 Instruction
*AccessInst
= Access
->getAccessInstruction();
1274 if (Access
->isArrayKind()) {
1275 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1276 MAL
.emplace_front(Access
);
1277 } else if (Access
->isValueKind() && Access
->isWrite()) {
1278 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1279 assert(!ValueWrites
.lookup(AccessVal
));
1281 ValueWrites
[AccessVal
] = Access
;
1282 } else if (Access
->isValueKind() && Access
->isRead()) {
1283 Value
*AccessVal
= Access
->getAccessValue();
1284 assert(!ValueReads
.lookup(AccessVal
));
1286 ValueReads
[AccessVal
] = Access
;
1287 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1288 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1289 assert(!PHIWrites
.lookup(PHI
));
1291 PHIWrites
[PHI
] = Access
;
1292 } else if (Access
->isAnyPHIKind() && Access
->isRead()) {
1293 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1294 assert(!PHIReads
.lookup(PHI
));
1296 PHIReads
[PHI
] = Access
;
1300 MemAccs
.insert(MemAccs
.begin(), Access
);
1303 MemAccs
.push_back(Access
);
1306 void ScopStmt::realignParams() {
1307 for (MemoryAccess
*MA
: *this)
1308 MA
->realignParams();
1310 isl::set Ctx
= Parent
.getContext();
1311 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1312 Domain
= Domain
.gist_params(Ctx
);
1315 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
1316 static isl::set
collectBoundedParts(isl::set S
) {
1317 isl::set BoundedParts
= isl::set::empty(S
.get_space());
1318 S
.foreach_basic_set([&BoundedParts
](isl::basic_set BSet
) -> isl::stat
{
1319 if (BSet
.is_bounded()) {
1320 BoundedParts
= BoundedParts
.unite(isl::set(BSet
));
1322 return isl::stat::ok
;
1324 return BoundedParts
;
1327 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1329 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1330 /// both with regards to the dimension @p Dim.
1331 static std::pair
<isl::set
, isl::set
> partitionSetParts(isl::set S
,
1333 for (unsigned u
= 0, e
= S
.n_dim(); u
< e
; u
++)
1334 S
= S
.lower_bound_si(isl::dim::set
, u
, 0);
1336 unsigned NumDimsS
= S
.n_dim();
1337 isl::set OnlyDimS
= S
;
1339 // Remove dimensions that are greater than Dim as they are not interesting.
1340 assert(NumDimsS
>= Dim
+ 1);
1341 OnlyDimS
= OnlyDimS
.project_out(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1343 // Create artificial parametric upper bounds for dimensions smaller than Dim
1344 // as we are not interested in them.
1345 OnlyDimS
= OnlyDimS
.insert_dims(isl::dim::param
, 0, Dim
);
1347 for (unsigned u
= 0; u
< Dim
; u
++) {
1348 isl::constraint C
= isl::constraint::alloc_inequality(
1349 isl::local_space(OnlyDimS
.get_space()));
1350 C
= C
.set_coefficient_si(isl::dim::param
, u
, 1);
1351 C
= C
.set_coefficient_si(isl::dim::set
, u
, -1);
1352 OnlyDimS
= OnlyDimS
.add_constraint(C
);
1355 // Collect all bounded parts of OnlyDimS.
1356 isl::set BoundedParts
= collectBoundedParts(OnlyDimS
);
1358 // Create the dimensions greater than Dim again.
1360 BoundedParts
.insert_dims(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1362 // Remove the artificial upper bound parameters again.
1363 BoundedParts
= BoundedParts
.remove_dims(isl::dim::param
, 0, Dim
);
1365 isl::set UnboundedParts
= S
.subtract(BoundedParts
);
1366 return std::make_pair(UnboundedParts
, BoundedParts
);
1369 /// Create the conditions under which @p L @p Pred @p R is true.
1370 static isl::set
buildConditionSet(ICmpInst::Predicate Pred
, isl::pw_aff L
,
1373 case ICmpInst::ICMP_EQ
:
1375 case ICmpInst::ICMP_NE
:
1377 case ICmpInst::ICMP_SLT
:
1379 case ICmpInst::ICMP_SLE
:
1381 case ICmpInst::ICMP_SGT
:
1383 case ICmpInst::ICMP_SGE
:
1385 case ICmpInst::ICMP_ULT
:
1387 case ICmpInst::ICMP_UGT
:
1389 case ICmpInst::ICMP_ULE
:
1391 case ICmpInst::ICMP_UGE
:
1394 llvm_unreachable("Non integer predicate not supported");
1398 /// Compute the isl representation for the SCEV @p E in this BB.
1400 /// @param S The Scop in which @p BB resides in.
1401 /// @param BB The BB for which isl representation is to be
1403 /// @param InvalidDomainMap A map of BB to their invalid domains.
1404 /// @param E The SCEV that should be translated.
1405 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1407 /// Note that this function will also adjust the invalid context accordingly.
1409 __isl_give isl_pw_aff
*
1410 getPwAff(Scop
&S
, BasicBlock
*BB
,
1411 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
, const SCEV
*E
,
1412 bool NonNegative
= false) {
1413 PWACtx PWAC
= S
.getPwAff(E
, BB
, NonNegative
);
1414 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
1415 return PWAC
.first
.release();
1418 /// Build the conditions sets for the switch @p SI in the @p Domain.
1420 /// This will fill @p ConditionSets with the conditions under which control
1421 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1422 /// have as many elements as @p SI has successors.
1423 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
,
1424 __isl_keep isl_set
*Domain
,
1425 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1426 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1427 Value
*Condition
= getConditionFromTerminator(SI
);
1428 assert(Condition
&& "No condition for switch");
1430 ScalarEvolution
&SE
= *S
.getSE();
1431 isl_pw_aff
*LHS
, *RHS
;
1432 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
1434 unsigned NumSuccessors
= SI
->getNumSuccessors();
1435 ConditionSets
.resize(NumSuccessors
);
1436 for (auto &Case
: SI
->cases()) {
1437 unsigned Idx
= Case
.getSuccessorIndex();
1438 ConstantInt
*CaseValue
= Case
.getCaseValue();
1440 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
1441 isl_set
*CaseConditionSet
=
1442 buildConditionSet(ICmpInst::ICMP_EQ
, isl::manage_copy(LHS
),
1445 ConditionSets
[Idx
] = isl_set_coalesce(
1446 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1449 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1450 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1451 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1453 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1454 ConditionSets
[0] = isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
);
1456 isl_pw_aff_free(LHS
);
1461 /// Build condition sets for unsigned ICmpInst(s).
1462 /// Special handling is required for unsigned operands to ensure that if
1463 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1464 /// it should wrap around.
1466 /// @param IsStrictUpperBound holds information on the predicate relation
1467 /// between TestVal and UpperBound, i.e,
1468 /// TestVal < UpperBound OR TestVal <= UpperBound
1469 __isl_give isl_set
*
1470 buildUnsignedConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1471 __isl_keep isl_set
*Domain
, const SCEV
*SCEV_TestVal
,
1472 const SCEV
*SCEV_UpperBound
,
1473 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1474 bool IsStrictUpperBound
) {
1475 // Do not take NonNeg assumption on TestVal
1476 // as it might have MSB (Sign bit) set.
1477 isl_pw_aff
*TestVal
= getPwAff(S
, BB
, InvalidDomainMap
, SCEV_TestVal
, false);
1478 // Take NonNeg assumption on UpperBound.
1479 isl_pw_aff
*UpperBound
=
1480 getPwAff(S
, BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
1484 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1485 isl_pw_aff_get_domain_space(TestVal
))),
1486 isl_pw_aff_copy(TestVal
));
1489 if (IsStrictUpperBound
)
1490 // TestVal < UpperBound
1491 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
1493 // TestVal <= UpperBound
1494 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
1496 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
1497 return ConsequenceCondSet
;
1500 /// Build the conditions sets for the branch condition @p Condition in
1503 /// This will fill @p ConditionSets with the conditions under which control
1504 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1505 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1506 /// context under which @p Condition is true/false will be returned as the
1507 /// new elements of @p ConditionSets.
1508 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1509 TerminatorInst
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
1510 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1511 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1512 ScalarEvolution
&SE
= *S
.getSE();
1513 isl_set
*ConsequenceCondSet
= nullptr;
1515 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
1516 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
1517 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
1518 bool NonNeg
= false;
1519 isl_pw_aff
*LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
1520 isl_pw_aff
*RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
1521 ConsequenceCondSet
= buildConditionSet(ICmpInst::ICMP_SLE
, isl::manage(LHS
),
1524 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
1525 auto *Unique
= dyn_cast
<ConstantInt
>(
1526 getUniqueNonErrorValue(PHI
, &S
.getRegion(), *S
.getLI(), *S
.getDT()));
1528 if (Unique
->isZero())
1529 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1531 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1532 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1533 if (CCond
->isZero())
1534 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1536 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1537 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1538 auto Opcode
= BinOp
->getOpcode();
1539 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1541 bool Valid
= buildConditionSets(S
, BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
1542 InvalidDomainMap
, ConditionSets
) &&
1543 buildConditionSets(S
, BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
1544 InvalidDomainMap
, ConditionSets
);
1546 while (!ConditionSets
.empty())
1547 isl_set_free(ConditionSets
.pop_back_val());
1551 isl_set_free(ConditionSets
.pop_back_val());
1552 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1553 isl_set_free(ConditionSets
.pop_back_val());
1554 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1556 if (Opcode
== Instruction::And
)
1557 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1559 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1561 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1563 "Condition of exiting branch was neither constant nor ICmp!");
1565 LoopInfo
&LI
= *S
.getLI();
1566 DominatorTree
&DT
= *S
.getDT();
1567 Region
&R
= S
.getRegion();
1569 isl_pw_aff
*LHS
, *RHS
;
1570 // For unsigned comparisons we assumed the signed bit of neither operand
1571 // to be set. The comparison is equal to a signed comparison under this
1573 bool NonNeg
= ICond
->isUnsigned();
1574 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
1575 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
1577 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, LI
, DT
);
1578 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, LI
, DT
);
1580 switch (ICond
->getPredicate()) {
1581 case ICmpInst::ICMP_ULT
:
1582 ConsequenceCondSet
=
1583 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1584 RightOperand
, InvalidDomainMap
, true);
1586 case ICmpInst::ICMP_ULE
:
1587 ConsequenceCondSet
=
1588 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1589 RightOperand
, InvalidDomainMap
, false);
1591 case ICmpInst::ICMP_UGT
:
1592 ConsequenceCondSet
=
1593 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1594 LeftOperand
, InvalidDomainMap
, true);
1596 case ICmpInst::ICMP_UGE
:
1597 ConsequenceCondSet
=
1598 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1599 LeftOperand
, InvalidDomainMap
, false);
1602 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
1603 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
1604 ConsequenceCondSet
= buildConditionSet(ICond
->getPredicate(),
1605 isl::manage(LHS
), isl::manage(RHS
))
1611 // If no terminator was given we are only looking for parameter constraints
1612 // under which @p Condition is true/false.
1614 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1615 assert(ConsequenceCondSet
);
1616 ConsequenceCondSet
= isl_set_coalesce(
1617 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1619 isl_set
*AlternativeCondSet
= nullptr;
1621 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1624 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1625 isl_set_copy(ConsequenceCondSet
));
1627 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1631 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
1632 TI
? TI
->getParent() : nullptr /* BasicBlock */);
1633 isl_set_free(AlternativeCondSet
);
1634 isl_set_free(ConsequenceCondSet
);
1638 ConditionSets
.push_back(ConsequenceCondSet
);
1639 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1644 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1646 /// This will fill @p ConditionSets with the conditions under which control
1647 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1648 /// have as many elements as @p TI has successors.
1649 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, TerminatorInst
*TI
, Loop
*L
,
1650 __isl_keep isl_set
*Domain
,
1651 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1652 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1653 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1654 return buildConditionSets(S
, BB
, SI
, L
, Domain
, InvalidDomainMap
,
1657 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1659 if (TI
->getNumSuccessors() == 1) {
1660 ConditionSets
.push_back(isl_set_copy(Domain
));
1664 Value
*Condition
= getConditionFromTerminator(TI
);
1665 assert(Condition
&& "No condition for Terminator");
1667 return buildConditionSets(S
, BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
1671 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, StringRef Name
,
1672 Loop
*SurroundingLoop
,
1673 std::vector
<Instruction
*> EntryBlockInstructions
)
1674 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), R(&R
),
1675 Build(nullptr), BaseName(Name
), SurroundingLoop(SurroundingLoop
),
1676 Instructions(EntryBlockInstructions
) {}
1678 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, StringRef Name
,
1679 Loop
*SurroundingLoop
,
1680 std::vector
<Instruction
*> Instructions
)
1681 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1682 Build(nullptr), BaseName(Name
), SurroundingLoop(SurroundingLoop
),
1683 Instructions(Instructions
) {}
1685 ScopStmt::ScopStmt(Scop
&parent
, isl::map SourceRel
, isl::map TargetRel
,
1687 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
),
1689 BaseName
= getIslCompatibleName("CopyStmt_", "",
1690 std::to_string(parent
.getCopyStmtsNum()));
1691 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1692 Domain
= Domain
.set_tuple_id(Id
);
1693 TargetRel
= TargetRel
.set_tuple_id(isl::dim::in
, Id
);
1695 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1696 parent
.addAccessFunction(Access
);
1698 SourceRel
= SourceRel
.set_tuple_id(isl::dim::in
, Id
);
1699 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1700 parent
.addAccessFunction(Access
);
1704 ScopStmt::~ScopStmt() = default;
1706 std::string
ScopStmt::getDomainStr() const { return Domain
.to_str(); }
1708 std::string
ScopStmt::getScheduleStr() const {
1709 auto *S
= getSchedule().release();
1712 auto Str
= stringFromIslObj(S
);
1717 void ScopStmt::setInvalidDomain(isl::set ID
) { InvalidDomain
= ID
; }
1719 BasicBlock
*ScopStmt::getEntryBlock() const {
1721 return getBasicBlock();
1722 return getRegion()->getEntry();
1725 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1727 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1729 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1730 return NestLoops
[Dimension
];
1733 isl::ctx
ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1735 isl::set
ScopStmt::getDomain() const { return Domain
; }
1737 isl::space
ScopStmt::getDomainSpace() const { return Domain
.get_space(); }
1739 isl::id
ScopStmt::getDomainId() const { return Domain
.get_tuple_id(); }
1741 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1742 OS
<< "Instructions {\n";
1744 for (Instruction
*Inst
: Instructions
)
1745 OS
.indent(16) << *Inst
<< "\n";
1747 OS
.indent(12) << "}\n";
1750 void ScopStmt::print(raw_ostream
&OS
, bool PrintInstructions
) const {
1751 OS
<< "\t" << getBaseName() << "\n";
1752 OS
.indent(12) << "Domain :=\n";
1755 OS
.indent(16) << getDomainStr() << ";\n";
1757 OS
.indent(16) << "n/a\n";
1759 OS
.indent(12) << "Schedule :=\n";
1762 OS
.indent(16) << getScheduleStr() << ";\n";
1764 OS
.indent(16) << "n/a\n";
1766 for (MemoryAccess
*Access
: MemAccs
)
1769 if (PrintInstructions
)
1770 printInstructions(OS
.indent(12));
1773 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1774 LLVM_DUMP_METHOD
void ScopStmt::dump() const { print(dbgs(), true); }
1777 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
1778 if (MA
->isRead() && MA
->isOriginalValueKind()) {
1779 bool Found
= ValueReads
.erase(MA
->getAccessValue());
1781 assert(Found
&& "Expected access data not found");
1783 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
1784 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
1786 assert(Found
&& "Expected access data not found");
1788 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
1789 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1791 assert(Found
&& "Expected access data not found");
1793 if (MA
->isRead() && MA
->isOriginalAnyPHIKind()) {
1794 bool Found
= PHIReads
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1796 assert(Found
&& "Expected access data not found");
1800 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
1801 // Remove the memory accesses from this statement together with all scalar
1802 // accesses that were caused by it. MemoryKind::Value READs have no access
1803 // instruction, hence would not be removed by this function. However, it is
1804 // only used for invariant LoadInst accesses, its arguments are always affine,
1805 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1806 // accesses to be removed.
1807 auto Predicate
= [&](MemoryAccess
*Acc
) {
1808 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
1810 for (auto *MA
: MemAccs
) {
1811 if (Predicate(MA
)) {
1812 removeAccessData(MA
);
1813 Parent
.removeAccessData(MA
);
1816 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
1818 InstructionToAccess
.erase(MA
->getAccessInstruction());
1821 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
, bool AfterHoisting
) {
1822 if (AfterHoisting
) {
1823 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
1824 assert(MAIt
!= MemAccs
.end());
1825 MemAccs
.erase(MAIt
);
1827 removeAccessData(MA
);
1828 Parent
.removeAccessData(MA
);
1831 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
1832 if (It
!= InstructionToAccess
.end()) {
1833 It
->second
.remove(MA
);
1834 if (It
->second
.empty())
1835 InstructionToAccess
.erase(MA
->getAccessInstruction());
1839 MemoryAccess
*ScopStmt::ensureValueRead(Value
*V
) {
1840 MemoryAccess
*Access
= lookupInputAccessOf(V
);
1844 ScopArrayInfo
*SAI
=
1845 Parent
.getOrCreateScopArrayInfo(V
, V
->getType(), {}, MemoryKind::Value
);
1846 Access
= new MemoryAccess(this, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1847 true, {}, {}, V
, MemoryKind::Value
);
1848 Parent
.addAccessFunction(Access
);
1849 Access
->buildAccessRelation(SAI
);
1851 Parent
.addAccessData(Access
);
1855 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const ScopStmt
&S
) {
1856 S
.print(OS
, PollyPrintInstructions
);
1860 //===----------------------------------------------------------------------===//
1861 /// Scop class implement
1863 void Scop::setContext(isl::set NewContext
) {
1864 Context
= NewContext
.align_params(Context
.get_space());
1869 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1870 struct SCEVSensitiveParameterRewriter
1871 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
1872 const ValueToValueMap
&VMap
;
1875 SCEVSensitiveParameterRewriter(const ValueToValueMap
&VMap
,
1876 ScalarEvolution
&SE
)
1877 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
1879 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
1880 const ValueToValueMap
&VMap
) {
1881 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
1882 return SSPR
.visit(E
);
1885 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
1886 auto *Start
= visit(E
->getStart());
1887 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
1888 visit(E
->getStepRecurrence(SE
)),
1889 E
->getLoop(), SCEV::FlagAnyWrap
);
1890 return SE
.getAddExpr(Start
, AddRec
);
1893 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
1894 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
1895 return SE
.getUnknown(NewValue
);
1900 /// Check whether we should remap a SCEV expression.
1901 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
1902 const ValueToValueMap
&VMap
;
1903 bool FoundInside
= false;
1907 SCEVFindInsideScop(const ValueToValueMap
&VMap
, ScalarEvolution
&SE
,
1909 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
1911 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
1912 const ValueToValueMap
&VMap
, const Scop
*S
) {
1913 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
1915 return SFIS
.FoundInside
;
1918 bool follow(const SCEV
*E
) {
1919 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
1920 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
1921 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
1922 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
1923 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
1925 return !FoundInside
;
1928 bool isDone() { return FoundInside
; }
1930 } // end anonymous namespace
1932 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) const {
1933 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
1934 // doesn't like addition between an AddRec and an expression that
1935 // doesn't have a dominance relationship with it.)
1936 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
1940 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
1943 // This table of function names is used to translate parameter names in more
1944 // human-readable names. This makes it easier to interpret Polly analysis
1946 StringMap
<std::string
> KnownNames
= {
1947 {"_Z13get_global_idj", "global_id"},
1948 {"_Z12get_local_idj", "local_id"},
1949 {"_Z15get_global_sizej", "global_size"},
1950 {"_Z14get_local_sizej", "local_size"},
1951 {"_Z12get_work_dimv", "work_dim"},
1952 {"_Z17get_global_offsetj", "global_offset"},
1953 {"_Z12get_group_idj", "group_id"},
1954 {"_Z14get_num_groupsj", "num_groups"},
1957 static std::string
getCallParamName(CallInst
*Call
) {
1959 raw_string_ostream
OS(Result
);
1960 std::string Name
= Call
->getCalledFunction()->getName();
1962 auto Iterator
= KnownNames
.find(Name
);
1963 if (Iterator
!= KnownNames
.end())
1964 Name
= "__" + Iterator
->getValue();
1966 for (auto &Operand
: Call
->arg_operands()) {
1967 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
1968 OS
<< "_" << Op
->getValue();
1974 void Scop::createParameterId(const SCEV
*Parameter
) {
1975 assert(Parameters
.count(Parameter
));
1976 assert(!ParameterIds
.count(Parameter
));
1978 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
1980 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
1981 Value
*Val
= ValueParameter
->getValue();
1982 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
1984 if (Call
&& isConstCall(Call
)) {
1985 ParameterName
= getCallParamName(Call
);
1986 } else if (UseInstructionNames
) {
1987 // If this parameter references a specific Value and this value has a name
1988 // we use this name as it is likely to be unique and more useful than just
1991 ParameterName
= Val
->getName();
1992 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
1993 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
1994 if (LoadOrigin
->hasName()) {
1995 ParameterName
+= "_loaded_from_";
1997 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
2002 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2005 isl::id Id
= isl::id::alloc(getIslCtx(), ParameterName
,
2006 const_cast<void *>((const void *)Parameter
));
2007 ParameterIds
[Parameter
] = Id
;
2010 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2011 for (const SCEV
*Parameter
: NewParameters
) {
2012 // Normalize the SCEV to get the representing element for an invariant load.
2013 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2014 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2016 if (Parameters
.insert(Parameter
))
2017 createParameterId(Parameter
);
2021 isl::id
Scop::getIdForParam(const SCEV
*Parameter
) const {
2022 // Normalize the SCEV to get the representing element for an invariant load.
2023 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2024 return ParameterIds
.lookup(Parameter
);
2027 isl::set
Scop::addNonEmptyDomainConstraints(isl::set C
) const {
2028 isl::set DomainContext
= getDomains().params();
2029 return C
.intersect_params(DomainContext
);
2032 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2033 return DT
.dominates(BB
, getEntry());
2036 void Scop::addUserAssumptions(
2037 AssumptionCache
&AC
, DominatorTree
&DT
, LoopInfo
&LI
,
2038 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2039 for (auto &Assumption
: AC
.assumptions()) {
2040 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2041 if (!CI
|| CI
->getNumArgOperands() != 1)
2044 bool InScop
= contains(CI
);
2045 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2048 auto *L
= LI
.getLoopFor(CI
->getParent());
2049 auto *Val
= CI
->getArgOperand(0);
2050 ParameterSetTy DetectedParams
;
2051 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2053 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
2054 << "Non-affine user assumption ignored.");
2058 // Collect all newly introduced parameters.
2059 ParameterSetTy NewParams
;
2060 for (auto *Param
: DetectedParams
) {
2061 Param
= extractConstantFactor(Param
, *SE
).second
;
2062 Param
= getRepresentingInvariantLoadSCEV(Param
);
2063 if (Parameters
.count(Param
))
2065 NewParams
.insert(Param
);
2068 SmallVector
<isl_set
*, 2> ConditionSets
;
2069 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2070 BasicBlock
*BB
= InScop
? CI
->getParent() : getRegion().getEntry();
2071 auto *Dom
= InScop
? DomainMap
[BB
].copy() : Context
.copy();
2072 assert(Dom
&& "Cannot propagate a nullptr.");
2073 bool Valid
= buildConditionSets(*this, BB
, Val
, TI
, L
, Dom
,
2074 InvalidDomainMap
, ConditionSets
);
2080 isl_set
*AssumptionCtx
= nullptr;
2082 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2083 isl_set_free(ConditionSets
[0]);
2085 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2086 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2089 // Project out newly introduced parameters as they are not otherwise useful.
2090 if (!NewParams
.empty()) {
2091 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2092 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2093 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2096 if (!NewParams
.count(Param
))
2100 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2103 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
2104 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
2105 Context
= Context
.intersect(isl::manage(AssumptionCtx
));
2109 void Scop::addUserContext() {
2110 if (UserContextStr
.empty())
2113 isl::set UserContext
= isl::set(getIslCtx(), UserContextStr
.c_str());
2114 isl::space Space
= getParamSpace();
2115 if (Space
.dim(isl::dim::param
) != UserContext
.dim(isl::dim::param
)) {
2116 std::string SpaceStr
= Space
.to_str();
2117 errs() << "Error: the context provided in -polly-context has not the same "
2118 << "number of dimensions than the computed context. Due to this "
2119 << "mismatch, the -polly-context option is ignored. Please provide "
2120 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2124 for (unsigned i
= 0; i
< Space
.dim(isl::dim::param
); i
++) {
2125 std::string NameContext
= Context
.get_dim_name(isl::dim::param
, i
);
2126 std::string NameUserContext
= UserContext
.get_dim_name(isl::dim::param
, i
);
2128 if (NameContext
!= NameUserContext
) {
2129 std::string SpaceStr
= Space
.to_str();
2130 errs() << "Error: the name of dimension " << i
2131 << " provided in -polly-context "
2132 << "is '" << NameUserContext
<< "', but the name in the computed "
2133 << "context is '" << NameContext
2134 << "'. Due to this name mismatch, "
2135 << "the -polly-context option is ignored. Please provide "
2136 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2140 UserContext
= UserContext
.set_dim_id(isl::dim::param
, i
,
2141 Space
.get_dim_id(isl::dim::param
, i
));
2144 Context
= Context
.intersect(UserContext
);
2147 void Scop::buildInvariantEquivalenceClasses() {
2148 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2150 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2151 for (LoadInst
*LInst
: RIL
) {
2152 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2154 Type
*Ty
= LInst
->getType();
2155 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2157 InvEquivClassVMap
[LInst
] = ClassRep
;
2162 InvariantEquivClasses
.emplace_back(
2163 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2167 void Scop::buildContext() {
2168 isl::space Space
= isl::space::params_alloc(getIslCtx(), 0);
2169 Context
= isl::set::universe(Space
);
2170 InvalidContext
= isl::set::empty(Space
);
2171 AssumedContext
= isl::set::universe(Space
);
2174 void Scop::addParameterBounds() {
2176 for (auto *Parameter
: Parameters
) {
2177 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2178 Context
= addRangeBoundsToSet(Context
, SRange
, PDim
++, isl::dim::param
);
2182 static std::vector
<isl::id
> getFortranArrayIds(Scop::array_range Arrays
) {
2183 std::vector
<isl::id
> OutermostSizeIds
;
2184 for (auto Array
: Arrays
) {
2185 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2186 // for its outermost dimension. Fortran arrays will have this since the
2187 // outermost dimension size can be picked up from their runtime description.
2188 // TODO: actually need to check if it has a FAD, but for now this works.
2189 if (Array
->getNumberOfDimensions() > 0) {
2190 isl::pw_aff PwAff
= Array
->getDimensionSizePw(0);
2194 isl::id Id
= PwAff
.get_dim_id(isl::dim::param
, 0);
2195 assert(!Id
.is_null() &&
2196 "Invalid Id for PwAff expression in Fortran array");
2197 OutermostSizeIds
.push_back(Id
);
2200 return OutermostSizeIds
;
2203 // The FORTRAN array size parameters are known to be non-negative.
2204 static isl::set
boundFortranArrayParams(isl::set Context
,
2205 Scop::array_range Arrays
) {
2206 std::vector
<isl::id
> OutermostSizeIds
;
2207 OutermostSizeIds
= getFortranArrayIds(Arrays
);
2209 for (isl::id Id
: OutermostSizeIds
) {
2210 int dim
= Context
.find_dim_by_id(isl::dim::param
, Id
);
2211 Context
= Context
.lower_bound_si(isl::dim::param
, dim
, 0);
2217 void Scop::realignParams() {
2218 if (PollyIgnoreParamBounds
)
2221 // Add all parameters into a common model.
2222 isl::space Space
= getFullParamSpace();
2224 // Align the parameters of all data structures to the model.
2225 Context
= Context
.align_params(Space
);
2227 // Bound the size of the fortran array dimensions.
2228 Context
= boundFortranArrayParams(Context
, arrays());
2230 // As all parameters are known add bounds to them.
2231 addParameterBounds();
2233 for (ScopStmt
&Stmt
: *this)
2234 Stmt
.realignParams();
2235 // Simplify the schedule according to the context too.
2236 Schedule
= Schedule
.gist_domain_params(getContext());
2239 static isl::set
simplifyAssumptionContext(isl::set AssumptionContext
,
2241 // If we have modeled all blocks in the SCoP that have side effects we can
2242 // simplify the context with the constraints that are needed for anything to
2243 // be executed at all. However, if we have error blocks in the SCoP we already
2244 // assumed some parameter combinations cannot occur and removed them from the
2245 // domains, thus we cannot use the remaining domain to simplify the
2247 if (!S
.hasErrorBlock()) {
2248 auto DomainParameters
= S
.getDomains().params();
2249 AssumptionContext
= AssumptionContext
.gist_params(DomainParameters
);
2252 AssumptionContext
= AssumptionContext
.gist_params(S
.getContext());
2253 return AssumptionContext
;
2256 void Scop::simplifyContexts() {
2257 // The parameter constraints of the iteration domains give us a set of
2258 // constraints that need to hold for all cases where at least a single
2259 // statement iteration is executed in the whole scop. We now simplify the
2260 // assumed context under the assumption that such constraints hold and at
2261 // least a single statement iteration is executed. For cases where no
2262 // statement instances are executed, the assumptions we have taken about
2263 // the executed code do not matter and can be changed.
2265 // WARNING: This only holds if the assumptions we have taken do not reduce
2266 // the set of statement instances that are executed. Otherwise we
2267 // may run into a case where the iteration domains suggest that
2268 // for a certain set of parameter constraints no code is executed,
2269 // but in the original program some computation would have been
2270 // performed. In such a case, modifying the run-time conditions and
2271 // possibly influencing the run-time check may cause certain scops
2272 // to not be executed.
2276 // When delinearizing the following code:
2278 // for (long i = 0; i < 100; i++)
2279 // for (long j = 0; j < m; j++)
2282 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2283 // otherwise we would access out of bound data. Now, knowing that code is
2284 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2285 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2286 InvalidContext
= InvalidContext
.align_params(getParamSpace());
2289 /// Add the minimal/maximal access in @p Set to @p User.
2291 buildMinMaxAccess(isl::set Set
, Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2292 isl::pw_multi_aff MinPMA
, MaxPMA
;
2293 isl::pw_aff LastDimAff
;
2297 Set
= Set
.remove_divs();
2298 polly::simplify(Set
);
2300 if (Set
.n_basic_set() > RunTimeChecksMaxAccessDisjuncts
)
2301 Set
= Set
.simple_hull();
2303 // Restrict the number of parameters involved in the access as the lexmin/
2304 // lexmax computation will take too long if this number is high.
2306 // Experiments with a simple test case using an i7 4800MQ:
2308 // #Parameters involved | Time (in sec)
2317 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2318 unsigned InvolvedParams
= 0;
2319 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2320 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2323 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2324 return isl::stat::error
;
2327 MinPMA
= Set
.lexmin_pw_multi_aff();
2328 MaxPMA
= Set
.lexmax_pw_multi_aff();
2330 MinPMA
= MinPMA
.coalesce();
2331 MaxPMA
= MaxPMA
.coalesce();
2333 // Adjust the last dimension of the maximal access by one as we want to
2334 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2335 // we test during code generation might now point after the end of the
2336 // allocated array but we will never dereference it anyway.
2337 assert((!MaxPMA
|| MaxPMA
.dim(isl::dim::out
)) &&
2338 "Assumed at least one output dimension");
2340 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2341 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2342 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2343 OneAff
= OneAff
.add_constant_si(1);
2344 LastDimAff
= LastDimAff
.add(OneAff
);
2345 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2347 if (!MinPMA
|| !MaxPMA
)
2348 return isl::stat::error
;
2350 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
2352 return isl::stat::ok
;
2355 static isl::set
getAccessDomain(MemoryAccess
*MA
) {
2356 isl::set Domain
= MA
->getStatement()->getDomain();
2357 Domain
= Domain
.project_out(isl::dim::set
, 0, Domain
.n_dim());
2358 return Domain
.reset_tuple_id();
2361 /// Wrapper function to calculate minimal/maximal accesses to each array.
2362 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2363 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2364 MinMaxAccesses
.reserve(AliasGroup
.size());
2366 isl::union_set Domains
= S
.getDomains();
2367 isl::union_map Accesses
= isl::union_map::empty(S
.getParamSpace());
2369 for (MemoryAccess
*MA
: AliasGroup
)
2370 Accesses
= Accesses
.add_map(MA
->getAccessRelation());
2372 Accesses
= Accesses
.intersect_domain(Domains
);
2373 isl::union_set Locations
= Accesses
.range();
2375 auto Lambda
= [&MinMaxAccesses
, &S
](isl::set Set
) -> isl::stat
{
2376 return buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2378 return Locations
.foreach_set(Lambda
) == isl::stat::ok
;
2381 /// Helper to treat non-affine regions and basic blocks the same.
2385 /// Return the block that is the representing block for @p RN.
2386 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2387 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2388 : RN
->getNodeAs
<BasicBlock
>();
2391 /// Return the @p idx'th block that is executed after @p RN.
2392 static inline BasicBlock
*
2393 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2394 if (RN
->isSubRegion()) {
2396 return RN
->getNodeAs
<Region
>()->getExit();
2398 return TI
->getSuccessor(idx
);
2401 /// Return the smallest loop surrounding @p RN.
2402 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2403 if (!RN
->isSubRegion()) {
2404 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2405 Loop
*L
= LI
.getLoopFor(BB
);
2407 // Unreachable statements are not considered to belong to a LLVM loop, as
2408 // they are not part of an actual loop in the control flow graph.
2409 // Nevertheless, we handle certain unreachable statements that are common
2410 // when modeling run-time bounds checks as being part of the loop to be
2411 // able to model them and to later eliminate the run-time bounds checks.
2413 // Specifically, for basic blocks that terminate in an unreachable and
2414 // where the immediate predecessor is part of a loop, we assume these
2415 // basic blocks belong to the loop the predecessor belongs to. This
2416 // allows us to model the following code.
2418 // for (i = 0; i < N; i++) {
2420 // abort(); <- this abort might be translated to an
2425 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2426 L
= LI
.getLoopFor(BB
->getPrevNode());
2430 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2431 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2432 while (L
&& NonAffineSubRegion
->contains(L
))
2433 L
= L
->getParentLoop();
2437 /// Get the number of blocks in @p L.
2439 /// The number of blocks in a loop are the number of basic blocks actually
2440 /// belonging to the loop, as well as all single basic blocks that the loop
2441 /// exits to and which terminate in an unreachable instruction. We do not
2442 /// allow such basic blocks in the exit of a scop, hence they belong to the
2443 /// scop and represent run-time conditions which we want to model and
2444 /// subsequently speculate away.
2446 /// @see getRegionNodeLoop for additional details.
2447 unsigned getNumBlocksInLoop(Loop
*L
) {
2448 unsigned NumBlocks
= L
->getNumBlocks();
2449 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
2450 L
->getExitBlocks(ExitBlocks
);
2452 for (auto ExitBlock
: ExitBlocks
) {
2453 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2459 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2460 if (!RN
->isSubRegion())
2463 Region
*R
= RN
->getNodeAs
<Region
>();
2464 return std::distance(R
->block_begin(), R
->block_end());
2467 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2468 const DominatorTree
&DT
) {
2469 if (!RN
->isSubRegion())
2470 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2471 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2472 if (isErrorBlock(*BB
, R
, LI
, DT
))
2479 isl::set
Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2480 return getDomainConditions(Stmt
->getEntryBlock());
2483 isl::set
Scop::getDomainConditions(BasicBlock
*BB
) const {
2484 auto DIt
= DomainMap
.find(BB
);
2485 if (DIt
!= DomainMap
.end())
2486 return DIt
->getSecond();
2488 auto &RI
= *R
.getRegionInfo();
2489 auto *BBR
= RI
.getRegionFor(BB
);
2490 while (BBR
->getEntry() == BB
)
2491 BBR
= BBR
->getParent();
2492 return getDomainConditions(BBR
->getEntry());
2495 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2496 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2497 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2498 auto *EntryBB
= R
->getEntry();
2499 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2500 int LD
= getRelativeLoopDepth(L
);
2501 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD
+ 1));
2504 L
= L
->getParentLoop();
2507 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
2508 DomainMap
[EntryBB
] = isl::manage(S
);
2510 if (IsOnlyNonAffineRegion
)
2511 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2513 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
, InvalidDomainMap
))
2516 if (!propagateDomainConstraints(R
, DT
, LI
, InvalidDomainMap
))
2519 // Error blocks and blocks dominated by them have been assumed to never be
2520 // executed. Representing them in the Scop does not add any value. In fact,
2521 // it is likely to cause issues during construction of the ScopStmts. The
2522 // contents of error blocks have not been verified to be expressible and
2523 // will cause problems when building up a ScopStmt for them.
2524 // Furthermore, basic blocks dominated by error blocks may reference
2525 // instructions in the error block which, if the error block is not modeled,
2526 // can themselves not be constructed properly. To this end we will replace
2527 // the domains of error blocks and those only reachable via error blocks
2528 // with an empty set. Additionally, we will record for each block under which
2529 // parameter combination it would be reached via an error block in its
2530 // InvalidDomain. This information is needed during load hoisting.
2531 if (!propagateInvalidStmtDomains(R
, DT
, LI
, InvalidDomainMap
))
2537 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2538 /// to be compatible to domains constructed for loop @p NewL.
2540 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2541 /// edge from @p OldL to @p NewL.
2542 static isl::set
adjustDomainDimensions(Scop
&S
, isl::set Dom
, Loop
*OldL
, Loop
2544 // If the loops are the same there is nothing to do.
2548 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2549 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2550 // If both loops are non-affine loops there is nothing to do.
2551 if (OldDepth
== -1 && NewDepth
== -1)
2554 // Distinguish three cases:
2555 // 1) The depth is the same but the loops are not.
2556 // => One loop was left one was entered.
2557 // 2) The depth increased from OldL to NewL.
2558 // => One loop was entered, none was left.
2559 // 3) The depth decreased from OldL to NewL.
2560 // => Loops were left were difference of the depths defines how many.
2561 if (OldDepth
== NewDepth
) {
2562 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2563 Dom
= Dom
.project_out(isl::dim::set
, NewDepth
, 1);
2564 Dom
= Dom
.add_dims(isl::dim::set
, 1);
2565 } else if (OldDepth
< NewDepth
) {
2566 assert(OldDepth
+ 1 == NewDepth
);
2567 auto &R
= S
.getRegion();
2569 assert(NewL
->getParentLoop() == OldL
||
2570 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2571 Dom
= Dom
.add_dims(isl::dim::set
, 1);
2573 assert(OldDepth
> NewDepth
);
2574 int Diff
= OldDepth
- NewDepth
;
2575 int NumDim
= Dom
.n_dim();
2576 assert(NumDim
>= Diff
);
2577 Dom
= Dom
.project_out(isl::dim::set
, NumDim
- Diff
, Diff
);
2583 bool Scop::propagateInvalidStmtDomains(
2584 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2585 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2586 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2587 for (auto *RN
: RTraversal
) {
2589 // Recurse for affine subregions but go on for basic blocks and non-affine
2591 if (RN
->isSubRegion()) {
2592 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2593 if (!isNonAffineSubRegion(SubRegion
)) {
2594 propagateInvalidStmtDomains(SubRegion
, DT
, LI
, InvalidDomainMap
);
2599 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2600 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2601 isl::set
&Domain
= DomainMap
[BB
];
2602 assert(Domain
&& "Cannot propagate a nullptr");
2604 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
2606 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
2608 if (!IsInvalidBlock
) {
2609 InvalidDomain
= InvalidDomain
.intersect(Domain
);
2611 InvalidDomain
= Domain
;
2612 isl::set DomPar
= Domain
.params();
2613 recordAssumption(ERRORBLOCK
, DomPar
, BB
->getTerminator()->getDebugLoc(),
2618 if (InvalidDomain
.is_empty()) {
2619 InvalidDomainMap
[BB
] = InvalidDomain
;
2623 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2624 auto *TI
= BB
->getTerminator();
2625 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2626 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2627 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2629 // Skip successors outside the SCoP.
2630 if (!contains(SuccBB
))
2634 if (DT
.dominates(SuccBB
, BB
))
2637 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2639 auto AdjustedInvalidDomain
= adjustDomainDimensions(*this, InvalidDomain
,
2640 BBLoop
, SuccBBLoop
);
2642 isl::set SuccInvalidDomain
= InvalidDomainMap
[SuccBB
];
2643 SuccInvalidDomain
= SuccInvalidDomain
.unite(AdjustedInvalidDomain
);
2644 SuccInvalidDomain
= SuccInvalidDomain
.coalesce();
2645 unsigned NumConjucts
= SuccInvalidDomain
.n_basic_set();
2647 InvalidDomainMap
[SuccBB
] = SuccInvalidDomain
;
2649 // Check if the maximal number of domain disjunctions was reached.
2650 // In case this happens we will bail.
2651 if (NumConjucts
< MaxDisjunctsInDomain
)
2654 InvalidDomainMap
.erase(BB
);
2655 invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
2659 InvalidDomainMap
[BB
] = InvalidDomain
;
2665 void Scop::propagateDomainConstraintsToRegionExit(
2666 BasicBlock
*BB
, Loop
*BBLoop
,
2667 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
,
2668 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2669 // Check if the block @p BB is the entry of a region. If so we propagate it's
2670 // domain to the exit block of the region. Otherwise we are done.
2671 auto *RI
= R
.getRegionInfo();
2672 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2673 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2674 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2677 // Do not propagate the domain if there is a loop backedge inside the region
2678 // that would prevent the exit block from being executed.
2680 while (L
&& contains(L
)) {
2681 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2682 BBLoop
->getLoopLatches(LatchBBs
);
2683 for (auto *LatchBB
: LatchBBs
)
2684 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2686 L
= L
->getParentLoop();
2689 isl::set Domain
= DomainMap
[BB
];
2690 assert(Domain
&& "Cannot propagate a nullptr");
2692 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, getBoxedLoops());
2694 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2695 // adjust the domain before we can propagate it.
2696 isl::set AdjustedDomain
= adjustDomainDimensions(*this, Domain
, BBLoop
,
2698 isl::set
&ExitDomain
= DomainMap
[ExitBB
];
2700 // If the exit domain is not yet created we set it otherwise we "add" the
2702 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
2704 // Initialize the invalid domain.
2705 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
2707 FinishedExitBlocks
.insert(ExitBB
);
2710 bool Scop::buildDomainsWithBranchConstraints(
2711 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2712 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2713 // To create the domain for each block in R we iterate over all blocks and
2714 // subregions in R and propagate the conditions under which the current region
2715 // element is executed. To this end we iterate in reverse post order over R as
2716 // it ensures that we first visit all predecessors of a region node (either a
2717 // basic block or a subregion) before we visit the region node itself.
2718 // Initially, only the domain for the SCoP region entry block is set and from
2719 // there we propagate the current domain to all successors, however we add the
2720 // condition that the successor is actually executed next.
2721 // As we are only interested in non-loop carried constraints here we can
2722 // simply skip loop back edges.
2724 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2725 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2726 for (auto *RN
: RTraversal
) {
2727 // Recurse for affine subregions but go on for basic blocks and non-affine
2729 if (RN
->isSubRegion()) {
2730 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2731 if (!isNonAffineSubRegion(SubRegion
)) {
2732 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
,
2739 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
2740 HasErrorBlock
= true;
2742 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2743 TerminatorInst
*TI
= BB
->getTerminator();
2745 if (isa
<UnreachableInst
>(TI
))
2748 isl::set Domain
= DomainMap
.lookup(BB
);
2751 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
.get()));
2753 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2754 // Propagate the domain from BB directly to blocks that have a superset
2755 // domain, at the moment only region exit nodes of regions that start in BB.
2756 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
,
2759 // If all successors of BB have been set a domain through the propagation
2760 // above we do not need to build condition sets but can just skip this
2761 // block. However, it is important to note that this is a local property
2762 // with regards to the region @p R. To this end FinishedExitBlocks is a
2764 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
2765 return FinishedExitBlocks
.count(SuccBB
);
2767 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
2770 // Build the condition sets for the successor nodes of the current region
2771 // node. If it is a non-affine subregion we will always execute the single
2772 // exit node, hence the single entry node domain is the condition set. For
2773 // basic blocks we use the helper function buildConditionSets.
2774 SmallVector
<isl_set
*, 8> ConditionSets
;
2775 if (RN
->isSubRegion())
2776 ConditionSets
.push_back(Domain
.copy());
2777 else if (!buildConditionSets(*this, BB
, TI
, BBLoop
, Domain
.get(),
2778 InvalidDomainMap
, ConditionSets
))
2781 // Now iterate over the successors and set their initial domain based on
2782 // their condition set. We skip back edges here and have to be careful when
2783 // we leave a loop not to keep constraints over a dimension that doesn't
2785 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
2786 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
2787 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
2788 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2790 // Skip blocks outside the region.
2791 if (!contains(SuccBB
))
2794 // If we propagate the domain of some block to "SuccBB" we do not have to
2795 // adjust the domain.
2796 if (FinishedExitBlocks
.count(SuccBB
))
2800 if (DT
.dominates(SuccBB
, BB
))
2803 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2805 CondSet
= adjustDomainDimensions(*this, CondSet
, BBLoop
, SuccBBLoop
);
2807 // Set the domain for the successor or merge it with an existing domain in
2808 // case there are multiple paths (without loop back edges) to the
2810 isl::set
&SuccDomain
= DomainMap
[SuccBB
];
2813 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
2815 // Initialize the invalid domain.
2816 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
2817 SuccDomain
= CondSet
;
2820 SuccDomain
= SuccDomain
.detect_equalities();
2822 // Check if the maximal number of domain disjunctions was reached.
2823 // In case this happens we will clean up and bail.
2824 if (SuccDomain
.n_basic_set() < MaxDisjunctsInDomain
)
2827 invalidate(COMPLEXITY
, DebugLoc());
2828 while (++u
< ConditionSets
.size())
2829 isl_set_free(ConditionSets
[u
]);
2837 isl::set
Scop::getPredecessorDomainConstraints(BasicBlock
*BB
, isl::set Domain
,
2840 // If @p BB is the ScopEntry we are done
2841 if (R
.getEntry() == BB
)
2842 return isl::set::universe(Domain
.get_space());
2844 // The region info of this function.
2845 auto &RI
= *R
.getRegionInfo();
2847 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, getBoxedLoops());
2849 // A domain to collect all predecessor domains, thus all conditions under
2850 // which the block is executed. To this end we start with the empty domain.
2851 isl::set PredDom
= isl::set::empty(Domain
.get_space());
2853 // Set of regions of which the entry block domain has been propagated to BB.
2854 // all predecessors inside any of the regions can be skipped.
2855 SmallSet
<Region
*, 8> PropagatedRegions
;
2857 for (auto *PredBB
: predecessors(BB
)) {
2859 if (DT
.dominates(BB
, PredBB
))
2862 // If the predecessor is in a region we used for propagation we can skip it.
2863 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
2864 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
2869 // Check if there is a valid region we can use for propagation, thus look
2870 // for a region that contains the predecessor and has @p BB as exit block.
2871 auto *PredR
= RI
.getRegionFor(PredBB
);
2872 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
2875 // If a valid region for propagation was found use the entry of that region
2876 // for propagation, otherwise the PredBB directly.
2877 if (PredR
->getExit() == BB
) {
2878 PredBB
= PredR
->getEntry();
2879 PropagatedRegions
.insert(PredR
);
2882 isl::set PredBBDom
= getDomainConditions(PredBB
);
2883 Loop
*PredBBLoop
= getFirstNonBoxedLoopFor(PredBB
, LI
, getBoxedLoops());
2884 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
2885 PredDom
= PredDom
.unite(PredBBDom
);
2891 bool Scop::propagateDomainConstraints(
2892 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2893 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2894 // Iterate over the region R and propagate the domain constrains from the
2895 // predecessors to the current node. In contrast to the
2896 // buildDomainsWithBranchConstraints function, this one will pull the domain
2897 // information from the predecessors instead of pushing it to the successors.
2898 // Additionally, we assume the domains to be already present in the domain
2899 // map here. However, we iterate again in reverse post order so we know all
2900 // predecessors have been visited before a block or non-affine subregion is
2903 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2904 for (auto *RN
: RTraversal
) {
2905 // Recurse for affine subregions but go on for basic blocks and non-affine
2907 if (RN
->isSubRegion()) {
2908 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2909 if (!isNonAffineSubRegion(SubRegion
)) {
2910 if (!propagateDomainConstraints(SubRegion
, DT
, LI
, InvalidDomainMap
))
2916 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2917 isl::set
&Domain
= DomainMap
[BB
];
2920 // Under the union of all predecessor conditions we can reach this block.
2921 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
2922 Domain
= Domain
.intersect(PredDom
).coalesce();
2923 Domain
= Domain
.align_params(getParamSpace());
2925 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
2926 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
2927 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
, InvalidDomainMap
))
2934 /// Create a map to map from a given iteration to a subsequent iteration.
2936 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
2937 /// is incremented by one and all other dimensions are equal, e.g.,
2938 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
2940 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
2941 static isl::map
createNextIterationMap(isl::space SetSpace
, unsigned Dim
) {
2942 isl::space MapSpace
= SetSpace
.map_from_set();
2943 isl::map NextIterationMap
= isl::map::universe(MapSpace
);
2944 for (unsigned u
= 0; u
< NextIterationMap
.dim(isl::dim::in
); u
++)
2947 NextIterationMap
.equate(isl::dim::in
, u
, isl::dim::out
, u
);
2949 isl::constraint::alloc_equality(isl::local_space(MapSpace
));
2950 C
= C
.set_constant_si(1);
2951 C
= C
.set_coefficient_si(isl::dim::in
, Dim
, 1);
2952 C
= C
.set_coefficient_si(isl::dim::out
, Dim
, -1);
2953 NextIterationMap
= NextIterationMap
.add_constraint(C
);
2954 return NextIterationMap
;
2957 bool Scop::addLoopBoundsToHeaderDomain(
2958 Loop
*L
, LoopInfo
&LI
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2959 int LoopDepth
= getRelativeLoopDepth(L
);
2960 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
2962 BasicBlock
*HeaderBB
= L
->getHeader();
2963 assert(DomainMap
.count(HeaderBB
));
2964 isl::set
&HeaderBBDom
= DomainMap
[HeaderBB
];
2966 isl::map NextIterationMap
=
2967 createNextIterationMap(HeaderBBDom
.get_space(), LoopDepth
);
2969 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
2971 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
2972 L
->getLoopLatches(LatchBlocks
);
2974 for (BasicBlock
*LatchBB
: LatchBlocks
) {
2975 // If the latch is only reachable via error statements we skip it.
2976 isl::set LatchBBDom
= DomainMap
.lookup(LatchBB
);
2980 isl::set BackedgeCondition
= nullptr;
2982 TerminatorInst
*TI
= LatchBB
->getTerminator();
2983 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
2984 assert(BI
&& "Only branch instructions allowed in loop latches");
2986 if (BI
->isUnconditional())
2987 BackedgeCondition
= LatchBBDom
;
2989 SmallVector
<isl_set
*, 8> ConditionSets
;
2990 int idx
= BI
->getSuccessor(0) != HeaderBB
;
2991 if (!buildConditionSets(*this, LatchBB
, TI
, L
, LatchBBDom
.get(),
2992 InvalidDomainMap
, ConditionSets
))
2995 // Free the non back edge condition set as we do not need it.
2996 isl_set_free(ConditionSets
[1 - idx
]);
2998 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
3001 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3002 assert(LatchLoopDepth
>= LoopDepth
);
3003 BackedgeCondition
= BackedgeCondition
.project_out(
3004 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
3005 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
3008 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
3009 for (int i
= 0; i
< LoopDepth
; i
++)
3010 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3012 isl::set UnionBackedgeConditionComplement
=
3013 UnionBackedgeCondition
.complement();
3014 UnionBackedgeConditionComplement
=
3015 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
3017 UnionBackedgeConditionComplement
=
3018 UnionBackedgeConditionComplement
.apply(ForwardMap
);
3019 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
3020 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
3022 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
3023 HeaderBBDom
= Parts
.second
;
3025 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3026 // the bounded assumptions to the context as they are already implied by the
3028 if (Affinator
.hasNSWAddRecForLoop(L
))
3031 isl::set UnboundedCtx
= Parts
.first
.params();
3032 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3033 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3037 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3038 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3040 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3041 if (!PointerBaseInst
)
3044 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3048 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3051 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3052 isl::union_map Writes
) {
3053 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3054 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3057 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3058 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3059 if (!isa
<LoadInst
>(BasePtrInst
))
3060 return contains(BasePtrInst
);
3065 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3066 if (!PollyUseRuntimeAliasChecks
)
3069 if (buildAliasGroups(AA
)) {
3070 // Aliasing assumptions do not go through addAssumption but we still want to
3071 // collect statistics so we do it here explicitly.
3072 if (MinMaxAliasGroups
.size())
3073 AssumptionsAliasing
++;
3077 // If a problem occurs while building the alias groups we need to delete
3078 // this SCoP and pretend it wasn't valid in the first place. To this end
3079 // we make the assumed context infeasible.
3080 invalidate(ALIASING
, DebugLoc());
3083 dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3084 << " could not be created as the number of parameters involved "
3085 "is too high. The SCoP will be "
3086 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3087 "the maximal number of parameters but be advised that the "
3088 "compile time might increase exponentially.\n\n");
3092 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3093 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3094 AliasSetTracker
AST(AA
);
3096 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3097 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3098 for (ScopStmt
&Stmt
: *this) {
3100 isl::set StmtDomain
= Stmt
.getDomain();
3101 bool StmtDomainEmpty
= StmtDomain
.is_empty();
3103 // Statements with an empty domain will never be executed.
3104 if (StmtDomainEmpty
)
3107 for (MemoryAccess
*MA
: Stmt
) {
3108 if (MA
->isScalarKind())
3111 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3112 MemAccInst
Acc(MA
->getAccessInstruction());
3113 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3114 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3116 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3121 AliasGroupVectorTy AliasGroups
;
3122 for (AliasSet
&AS
: AST
) {
3123 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3127 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3130 AliasGroups
.push_back(std::move(AG
));
3133 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3136 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3137 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3139 AliasGroupTy
&AG
= AliasGroups
[u
];
3140 AliasGroupTy::iterator AGI
= AG
.begin();
3141 isl::set AGDomain
= getAccessDomain(*AGI
);
3142 while (AGI
!= AG
.end()) {
3143 MemoryAccess
*MA
= *AGI
;
3144 isl::set MADomain
= getAccessDomain(MA
);
3145 if (AGDomain
.is_disjoint(MADomain
)) {
3146 NewAG
.push_back(MA
);
3147 AGI
= AG
.erase(AGI
);
3149 AGDomain
= AGDomain
.unite(MADomain
);
3153 if (NewAG
.size() > 1)
3154 AliasGroups
.push_back(std::move(NewAG
));
3158 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3159 // To create sound alias checks we perform the following steps:
3160 // o) We partition each group into read only and non read only accesses.
3161 // o) For each group with more than one base pointer we then compute minimal
3162 // and maximal accesses to each array of a group in read only and non
3163 // read only partitions separately.
3164 AliasGroupVectorTy AliasGroups
;
3165 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3167 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3169 splitAliasGroupsByDomain(AliasGroups
);
3171 for (AliasGroupTy
&AG
: AliasGroups
) {
3172 if (!hasFeasibleRuntimeContext())
3176 IslMaxOperationsGuard
MaxOpGuard(getIslCtx().get(), OptComputeOut
);
3177 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3181 if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota
) {
3182 invalidate(COMPLEXITY
, DebugLoc());
3190 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3191 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3192 AliasGroupTy ReadOnlyAccesses
;
3193 AliasGroupTy ReadWriteAccesses
;
3194 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3195 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3197 if (AliasGroup
.size() < 2)
3200 for (MemoryAccess
*Access
: AliasGroup
) {
3201 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3202 Access
->getAccessInstruction())
3203 << "Possibly aliasing pointer, use restrict keyword.");
3204 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3205 if (HasWriteAccess
.count(Array
)) {
3206 ReadWriteArrays
.insert(Array
);
3207 ReadWriteAccesses
.push_back(Access
);
3209 ReadOnlyArrays
.insert(Array
);
3210 ReadOnlyAccesses
.push_back(Access
);
3214 // If there are no read-only pointers, and less than two read-write pointers,
3215 // no alias check is needed.
3216 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3219 // If there is no read-write pointer, no alias check is needed.
3220 if (ReadWriteArrays
.empty())
3223 // For non-affine accesses, no alias check can be generated as we cannot
3224 // compute a sufficiently tight lower and upper bound: bail out.
3225 for (MemoryAccess
*MA
: AliasGroup
) {
3226 if (!MA
->isAffine()) {
3227 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3228 MA
->getAccessInstruction()->getParent());
3233 // Ensure that for all memory accesses for which we generate alias checks,
3234 // their base pointers are available.
3235 for (MemoryAccess
*MA
: AliasGroup
) {
3236 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3237 addRequiredInvariantLoad(
3238 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3241 MinMaxAliasGroups
.emplace_back();
3242 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3243 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3244 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3249 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3254 // Bail out if the number of values we need to compare is too large.
3255 // This is important as the number of comparisons grows quadratically with
3256 // the number of values we need to compare.
3257 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3258 RunTimeChecksMaxArraysPerGroup
)
3262 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3270 /// Get the smallest loop that contains @p S but is not in @p S.
3271 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3272 // Start with the smallest loop containing the entry and expand that
3273 // loop until it contains all blocks in the region. If there is a loop
3274 // containing all blocks in the region check if it is itself contained
3275 // and if so take the parent loop as it will be the smallest containing
3276 // the region but not contained by it.
3277 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3279 bool AllContained
= true;
3280 for (auto *BB
: S
.blocks())
3281 AllContained
&= L
->contains(BB
);
3284 L
= L
->getParentLoop();
3287 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3290 int Scop::NextScopID
= 0;
3292 std::string
Scop::CurrentFunc
;
3294 int Scop::getNextID(std::string ParentFunc
) {
3295 if (ParentFunc
!= CurrentFunc
) {
3296 CurrentFunc
= ParentFunc
;
3299 return NextScopID
++;
3302 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3303 DominatorTree
&DT
, ScopDetection::DetectionContext
&DC
,
3304 OptimizationRemarkEmitter
&ORE
)
3305 : IslCtx(isl_ctx_alloc(), isl_ctx_free
), SE(&ScalarEvolution
), DT(&DT
),
3306 R(R
), name(None
), HasSingleExitEdge(R
.getExitingBlock()), DC(DC
),
3307 ORE(ORE
), Affinator(this, LI
),
3308 ID(getNextID((*R
.getEntry()->getParent()).getName().str())) {
3309 if (IslOnErrorAbort
)
3310 isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT
);
3314 Scop::~Scop() = default;
3316 void Scop::foldSizeConstantsToRight() {
3317 isl_union_set
*Accessed
= isl_union_map_range(getAccesses().release());
3319 for (auto Array
: arrays()) {
3320 if (Array
->getNumberOfDimensions() <= 1)
3323 isl_space
*Space
= Array
->getSpace().release();
3325 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3327 if (!isl_union_set_contains(Accessed
, Space
)) {
3328 isl_space_free(Space
);
3332 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3334 isl_map
*Transform
=
3335 isl_map_universe(isl_space_map_from_set(Array
->getSpace().release()));
3337 std::vector
<int> Int
;
3339 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3340 for (int i
= 0; i
< Dims
; i
++) {
3342 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3343 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3344 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3346 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3348 if (i
== Dims
- 1) {
3350 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3351 isl_basic_set_free(DimHull
);
3355 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3356 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3357 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3362 if (isl_val_is_int(Val
)) {
3363 auto ValAPInt
= APIntFromVal(Val
);
3364 if (ValAPInt
.isSignedIntN(32))
3365 ValInt
= ValAPInt
.getSExtValue();
3370 Int
.push_back(ValInt
);
3372 isl_constraint
*C
= isl_constraint_alloc_equality(
3373 isl_local_space_from_space(isl_map_get_space(Transform
)));
3374 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3375 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3376 Transform
= isl_map_add_constraint(Transform
, C
);
3377 isl_basic_set_free(DimHull
);
3381 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3382 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3385 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3389 Int
.push_back(ValInt
);
3390 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3391 isl_basic_set_free(DimHull
);
3392 isl_basic_set_free(ZeroSet
);
3395 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3397 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3398 isl_set_free(Elements
);
3399 isl_set_free(MappedElements
);
3400 isl_map_free(Transform
);
3404 isl_set_free(MappedElements
);
3406 bool CanFold
= true;
3411 unsigned NumDims
= Array
->getNumberOfDimensions();
3412 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3413 if (Int
[0] != Int
[i
] && Int
[i
])
3417 isl_set_free(Elements
);
3418 isl_map_free(Transform
);
3422 for (auto &Access
: AccessFunctions
)
3423 if (Access
->getScopArrayInfo() == Array
)
3424 Access
->setAccessRelation(Access
->getAccessRelation().apply_range(
3425 isl::manage_copy(Transform
)));
3427 isl_map_free(Transform
);
3429 std::vector
<const SCEV
*> Sizes
;
3430 for (unsigned i
= 0; i
< NumDims
; i
++) {
3431 auto Size
= Array
->getDimensionSize(i
);
3433 if (i
== NumDims
- 1)
3434 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3435 Sizes
.push_back(Size
);
3438 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3440 isl_set_free(Elements
);
3442 isl_union_set_free(Accessed
);
3445 void Scop::markFortranArrays() {
3446 for (ScopStmt
&Stmt
: Stmts
) {
3447 for (MemoryAccess
*MemAcc
: Stmt
) {
3448 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3452 // TODO: const_cast-ing to edit
3453 ScopArrayInfo
*SAI
=
3454 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3455 assert(SAI
&& "memory access into a Fortran array does not "
3456 "have an associated ScopArrayInfo");
3457 SAI
->applyAndSetFAD(FAD
);
3462 void Scop::finalizeAccesses() {
3463 updateAccessDimensionality();
3464 foldSizeConstantsToRight();
3465 foldAccessRelations();
3466 assumeNoOutOfBounds();
3467 markFortranArrays();
3470 void Scop::updateAccessDimensionality() {
3471 // Check all array accesses for each base pointer and find a (virtual) element
3472 // size for the base pointer that divides all access functions.
3473 for (ScopStmt
&Stmt
: *this)
3474 for (MemoryAccess
*Access
: Stmt
) {
3475 if (!Access
->isArrayKind())
3477 ScopArrayInfo
*Array
=
3478 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3480 if (Array
->getNumberOfDimensions() != 1)
3482 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3483 const SCEV
*Subscript
= Access
->getSubscript(0);
3484 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3486 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3487 Array
->updateElementType(Ty
);
3490 for (auto &Stmt
: *this)
3491 for (auto &Access
: Stmt
)
3492 Access
->updateDimensionality();
3495 void Scop::foldAccessRelations() {
3496 for (auto &Stmt
: *this)
3497 for (auto &Access
: Stmt
)
3498 Access
->foldAccessRelation();
3501 void Scop::assumeNoOutOfBounds() {
3502 for (auto &Stmt
: *this)
3503 for (auto &Access
: Stmt
)
3504 Access
->assumeNoOutOfBound();
3507 void Scop::removeFromStmtMap(ScopStmt
&Stmt
) {
3508 for (Instruction
*Inst
: Stmt
.getInstructions())
3509 InstStmtMap
.erase(Inst
);
3511 if (Stmt
.isRegionStmt()) {
3512 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks()) {
3514 // Skip entry basic block, as its instructions are already deleted as
3515 // part of the statement's instruction list.
3516 if (BB
== Stmt
.getEntryBlock())
3518 for (Instruction
&Inst
: *BB
)
3519 InstStmtMap
.erase(&Inst
);
3522 auto StmtMapIt
= StmtMap
.find(Stmt
.getBasicBlock());
3523 if (StmtMapIt
!= StmtMap
.end())
3524 StmtMapIt
->second
.erase(std::remove(StmtMapIt
->second
.begin(),
3525 StmtMapIt
->second
.end(), &Stmt
),
3526 StmtMapIt
->second
.end());
3527 for (Instruction
*Inst
: Stmt
.getInstructions())
3528 InstStmtMap
.erase(Inst
);
3532 void Scop::removeStmts(std::function
<bool(ScopStmt
&)> ShouldDelete
,
3533 bool AfterHoisting
) {
3534 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3535 if (!ShouldDelete(*StmtIt
)) {
3540 // Start with removing all of the statement's accesses including erasing it
3541 // from all maps that are pointing to them.
3542 // Make a temporary copy because removing MAs invalidates the iterator.
3543 SmallVector
<MemoryAccess
*, 16> MAList(StmtIt
->begin(), StmtIt
->end());
3544 for (MemoryAccess
*MA
: MAList
)
3545 StmtIt
->removeSingleMemoryAccess(MA
, AfterHoisting
);
3547 removeFromStmtMap(*StmtIt
);
3548 StmtIt
= Stmts
.erase(StmtIt
);
3552 void Scop::removeStmtNotInDomainMap() {
3553 auto ShouldDelete
= [this](ScopStmt
&Stmt
) -> bool {
3554 return !this->DomainMap
.lookup(Stmt
.getEntryBlock());
3556 removeStmts(ShouldDelete
, false);
3559 void Scop::simplifySCoP(bool AfterHoisting
) {
3560 auto ShouldDelete
= [AfterHoisting
](ScopStmt
&Stmt
) -> bool {
3561 // Never delete statements that contain calls to debug functions.
3562 if (hasDebugCall(&Stmt
))
3565 bool RemoveStmt
= Stmt
.isEmpty();
3567 // Remove read only statements only after invariant load hoisting.
3568 if (!RemoveStmt
&& AfterHoisting
) {
3569 bool OnlyRead
= true;
3570 for (MemoryAccess
*MA
: Stmt
) {
3578 RemoveStmt
= OnlyRead
;
3583 removeStmts(ShouldDelete
, AfterHoisting
);
3586 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3587 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3591 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3592 LInst
= cast
<LoadInst
>(Rep
);
3594 Type
*Ty
= LInst
->getType();
3595 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3596 for (auto &IAClass
: InvariantEquivClasses
) {
3597 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3600 auto &MAs
= IAClass
.InvariantAccesses
;
3601 for (auto *MA
: MAs
)
3602 if (MA
->getAccessInstruction() == Val
)
3609 bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
3610 for (const llvm::Argument
&Arg
: F
.args())
3611 if (&Arg
== maybeParam
)
3617 bool Scop::canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3618 bool MAInvalidCtxIsEmpty
,
3619 bool NonHoistableCtxIsEmpty
) {
3620 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3621 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3622 if (PollyAllowDereferenceOfAllFunctionParams
&&
3623 isAParameter(LInst
->getPointerOperand(), getFunction()))
3626 // TODO: We can provide more information for better but more expensive
3628 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3629 LInst
->getAlignment(), DL
))
3632 // If the location might be overwritten we do not hoist it unconditionally.
3634 // TODO: This is probably too conservative.
3635 if (!NonHoistableCtxIsEmpty
)
3638 // If a dereferenceable load is in a statement that is modeled precisely we
3640 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3643 // Even if the statement is not modeled precisely we can hoist the load if it
3644 // does not involve any parameters that might have been specialized by the
3645 // statement domain.
3646 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3647 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3652 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3656 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3657 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3659 // Get the context under which the statement is executed but remove the error
3660 // context under which this statement is reached.
3661 isl::set DomainCtx
= Stmt
.getDomain().params();
3662 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3664 if (DomainCtx
.n_basic_set() >= MaxDisjunctsInDomain
) {
3665 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3666 invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3670 // Project out all parameters that relate to loads in the statement. Otherwise
3671 // we could have cyclic dependences on the constraints under which the
3672 // hoisted loads are executed and we could not determine an order in which to
3673 // pre-load them. This happens because not only lower bounds are part of the
3674 // domain but also upper bounds.
3675 for (auto &InvMA
: InvMAs
) {
3676 auto *MA
= InvMA
.MA
;
3677 Instruction
*AccInst
= MA
->getAccessInstruction();
3678 if (SE
->isSCEVable(AccInst
->getType())) {
3679 SetVector
<Value
*> Values
;
3680 for (const SCEV
*Parameter
: Parameters
) {
3682 findValues(Parameter
, *SE
, Values
);
3683 if (!Values
.count(AccInst
))
3686 if (isl::id ParamId
= getIdForParam(Parameter
)) {
3687 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3689 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3695 for (auto &InvMA
: InvMAs
) {
3696 auto *MA
= InvMA
.MA
;
3697 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3699 // Check for another invariant access that accesses the same location as
3700 // MA and if found consolidate them. Otherwise create a new equivalence
3701 // class at the end of InvariantEquivClasses.
3702 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3703 Type
*Ty
= LInst
->getType();
3704 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3706 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3707 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3708 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3711 // Check if we know that this pointer can be speculatively accessed.
3712 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3713 NonHoistableCtxIsEmpty
)) {
3714 MACtx
= isl::set::universe(DomainCtx
.get_space());
3717 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3718 MACtx
= MACtx
.gist_params(getContext());
3721 bool Consolidated
= false;
3722 for (auto &IAClass
: InvariantEquivClasses
) {
3723 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3726 // If the pointer and the type is equal check if the access function wrt.
3727 // to the domain is equal too. It can happen that the domain fixes
3728 // parameter values and these can be different for distinct part of the
3729 // SCoP. If this happens we cannot consolidate the loads but need to
3730 // create a new invariant load equivalence class.
3731 auto &MAs
= IAClass
.InvariantAccesses
;
3733 auto *LastMA
= MAs
.front();
3735 isl::set AR
= MA
->getAccessRelation().range();
3736 isl::set LastAR
= LastMA
->getAccessRelation().range();
3737 bool SameAR
= AR
.is_equal(LastAR
);
3743 // Add MA to the list of accesses that are in this class.
3746 Consolidated
= true;
3748 // Unify the execution context of the class and this statement.
3749 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
3750 if (IAClassDomainCtx
)
3751 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
3753 IAClassDomainCtx
= MACtx
;
3754 IAClass
.ExecutionContext
= IAClassDomainCtx
;
3761 // If we did not consolidate MA, thus did not find an equivalence class
3762 // for it, we create a new one.
3763 InvariantEquivClasses
.emplace_back(
3764 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3768 /// Check if an access range is too complex.
3770 /// An access range is too complex, if it contains either many disjuncts or
3771 /// very complex expressions. As a simple heuristic, we assume if a set to
3772 /// be too complex if the sum of existentially quantified dimensions and
3773 /// set dimensions is larger than a threshold. This reliably detects both
3774 /// sets with many disjuncts as well as sets with many divisions as they
3777 /// @param AccessRange The range to check for complexity.
3779 /// @returns True if the access range is too complex.
3780 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
3781 unsigned NumTotalDims
= 0;
3783 auto CountDimensions
= [&NumTotalDims
](isl::basic_set BSet
) -> isl::stat
{
3784 NumTotalDims
+= BSet
.dim(isl::dim::div
);
3785 NumTotalDims
+= BSet
.dim(isl::dim::set
);
3786 return isl::stat::ok
;
3789 AccessRange
.foreach_basic_set(CountDimensions
);
3791 if (NumTotalDims
> MaxDimensionsInAccessRange
)
3797 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
3798 // TODO: Loads that are not loop carried, hence are in a statement with
3799 // zero iterators, are by construction invariant, though we
3800 // currently "hoist" them anyway. This is necessary because we allow
3801 // them to be treated as parameters (e.g., in conditions) and our code
3802 // generation would otherwise use the old value.
3804 auto &Stmt
= *Access
->getStatement();
3805 BasicBlock
*BB
= Stmt
.getEntryBlock();
3807 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
3808 Access
->isMemoryIntrinsic())
3811 // Skip accesses that have an invariant base pointer which is defined but
3812 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3813 // returns a pointer that is used as a base address. However, as we want
3814 // to hoist indirect pointers, we allow the base pointer to be defined in
3815 // the region if it is also a memory access. Each ScopArrayInfo object
3816 // that has a base pointer origin has a base pointer that is loaded and
3817 // that it is invariant, thus it will be hoisted too. However, if there is
3818 // no base pointer origin we check that the base pointer is defined
3819 // outside the region.
3820 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
3821 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
3824 isl::map AccessRelation
= Access
->getAccessRelation();
3825 assert(!AccessRelation
.is_empty());
3827 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
3830 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
3831 isl::set SafeToLoad
;
3833 auto &DL
= getFunction().getParent()->getDataLayout();
3834 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
3836 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
3837 } else if (BB
!= LI
->getParent()) {
3838 // Skip accesses in non-affine subregions as they might not be executed
3839 // under the same condition as the entry of the non-affine subregion.
3842 SafeToLoad
= AccessRelation
.range();
3845 if (isAccessRangeTooComplex(AccessRelation
.range()))
3848 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
3849 isl::set WrittenCtx
= Written
.params();
3850 bool IsWritten
= !WrittenCtx
.is_empty();
3855 WrittenCtx
= WrittenCtx
.remove_divs();
3856 bool TooComplex
= WrittenCtx
.n_basic_set() >= MaxDisjunctsInDomain
;
3857 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
3860 addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(), AS_RESTRICTION
,
3865 void Scop::verifyInvariantLoads() {
3866 auto &RIL
= getRequiredInvariantLoads();
3867 for (LoadInst
*LI
: RIL
) {
3868 assert(LI
&& contains(LI
));
3869 // If there exists a statement in the scop which has a memory access for
3870 // @p LI, then mark this scop as infeasible for optimization.
3871 for (ScopStmt
&Stmt
: Stmts
)
3872 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
3873 invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
3879 void Scop::hoistInvariantLoads() {
3880 if (!PollyInvariantLoadHoisting
)
3883 isl::union_map Writes
= getWrites();
3884 for (ScopStmt
&Stmt
: *this) {
3885 InvariantAccessesTy InvariantAccesses
;
3887 for (MemoryAccess
*Access
: Stmt
)
3888 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
3889 InvariantAccesses
.push_back({Access
, NHCtx
});
3891 // Transfer the memory access from the statement to the SCoP.
3892 for (auto InvMA
: InvariantAccesses
)
3893 Stmt
.removeMemoryAccess(InvMA
.MA
);
3894 addInvariantLoads(Stmt
, InvariantAccesses
);
3898 /// Find the canonical scop array info object for a set of invariant load
3899 /// hoisted loads. The canonical array is the one that corresponds to the
3900 /// first load in the list of accesses which is used as base pointer of a
3902 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
3903 MemoryAccessList
&Accesses
) {
3904 for (MemoryAccess
*Access
: Accesses
) {
3905 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
3906 Access
->getAccessInstruction(), MemoryKind::Array
);
3908 return CanonicalArray
;
3913 /// Check if @p Array severs as base array in an invariant load.
3914 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
3915 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
3916 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3917 if (Access2
->getScopArrayInfo() == Array
)
3922 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3923 /// with a reference to @p New.
3924 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
3925 const ScopArrayInfo
*New
) {
3926 for (ScopStmt
&Stmt
: *S
)
3927 for (MemoryAccess
*Access
: Stmt
) {
3928 if (Access
->getLatestScopArrayInfo() != Old
)
3931 isl::id Id
= New
->getBasePtrId();
3932 isl::map Map
= Access
->getAccessRelation();
3933 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3934 Access
->setAccessRelation(Map
);
3938 void Scop::canonicalizeDynamicBasePtrs() {
3939 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
3940 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3942 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3943 findCanonicalArray(this, BasePtrAccesses
);
3945 if (!CanonicalBasePtrSAI
)
3948 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3949 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
3950 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3951 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3952 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3955 // we currently do not canonicalize arrays where some accesses are
3956 // hoisted as invariant loads. If we would, we need to update the access
3957 // function of the invariant loads as well. However, as this is not a
3958 // very common situation, we leave this for now to avoid further
3959 // complexity increases.
3960 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
3963 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
3968 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
3969 ArrayRef
<const SCEV
*> Sizes
,
3971 const char *BaseName
) {
3972 assert((BasePtr
|| BaseName
) &&
3973 "BasePtr and BaseName can not be nullptr at the same time.");
3974 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
3975 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
3976 : ScopArrayNameMap
[BaseName
];
3978 auto &DL
= getFunction().getParent()->getDataLayout();
3979 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
3980 DL
, this, BaseName
));
3981 ScopArrayInfoSet
.insert(SAI
.get());
3983 SAI
->updateElementType(ElementType
);
3984 // In case of mismatching array sizes, we bail out by setting the run-time
3985 // context to false.
3986 if (!SAI
->updateSizes(Sizes
))
3987 invalidate(DELINEARIZATION
, DebugLoc());
3992 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
3993 const std::string
&BaseName
,
3994 const std::vector
<unsigned> &Sizes
) {
3995 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
3996 std::vector
<const SCEV
*> SCEVSizes
;
3998 for (auto size
: Sizes
)
4000 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
4002 SCEVSizes
.push_back(nullptr);
4004 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
4005 MemoryKind::Array
, BaseName
.c_str());
4009 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
4011 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
4015 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
4016 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
4017 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
4021 std::string
Scop::getContextStr() const { return getContext().to_str(); }
4023 std::string
Scop::getAssumedContextStr() const {
4024 assert(AssumedContext
&& "Assumed context not yet built");
4025 return AssumedContext
.to_str();
4028 std::string
Scop::getInvalidContextStr() const {
4029 return InvalidContext
.to_str();
4032 std::string
Scop::getNameStr() const {
4033 std::string ExitName
, EntryName
;
4034 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4035 return EntryName
+ "---" + ExitName
;
4038 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4039 std::string ExitName
, EntryName
;
4040 raw_string_ostream
ExitStr(ExitName
);
4041 raw_string_ostream
EntryStr(EntryName
);
4043 R
.getEntry()->printAsOperand(EntryStr
, false);
4047 R
.getExit()->printAsOperand(ExitStr
, false);
4050 ExitName
= "FunctionExit";
4052 return std::make_pair(EntryName
, ExitName
);
4055 isl::set
Scop::getContext() const { return Context
; }
4056 isl::space
Scop::getParamSpace() const { return getContext().get_space(); }
4058 isl::space
Scop::getFullParamSpace() const {
4059 std::vector
<isl::id
> FortranIDs
;
4060 FortranIDs
= getFortranArrayIds(arrays());
4062 isl::space Space
= isl::space::params_alloc(
4063 getIslCtx(), ParameterIds
.size() + FortranIDs
.size());
4066 for (const SCEV
*Parameter
: Parameters
) {
4067 isl::id Id
= getIdForParam(Parameter
);
4068 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4071 for (isl::id Id
: FortranIDs
)
4072 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4077 isl::set
Scop::getAssumedContext() const {
4078 assert(AssumedContext
&& "Assumed context not yet built");
4079 return AssumedContext
;
4082 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4083 if (PollyProcessUnprofitable
)
4089 unsigned OptimizableStmtsOrLoops
= 0;
4090 for (auto &Stmt
: *this) {
4091 if (Stmt
.getNumIterators() == 0)
4094 bool ContainsArrayAccs
= false;
4095 bool ContainsScalarAccs
= false;
4096 for (auto *MA
: Stmt
) {
4099 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4100 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4103 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4104 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4107 return OptimizableStmtsOrLoops
> 1;
4110 bool Scop::hasFeasibleRuntimeContext() const {
4111 auto PositiveContext
= getAssumedContext();
4112 auto NegativeContext
= getInvalidContext();
4113 PositiveContext
= addNonEmptyDomainConstraints(PositiveContext
);
4114 // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain
4115 if (!PositiveContext
)
4118 bool IsFeasible
= !(PositiveContext
.is_empty() ||
4119 PositiveContext
.is_subset(NegativeContext
));
4123 auto DomainContext
= getDomains().params();
4124 IsFeasible
= !DomainContext
.is_subset(NegativeContext
);
4125 IsFeasible
&= !Context
.is_subset(NegativeContext
);
4130 static std::string
toString(AssumptionKind Kind
) {
4133 return "No-aliasing";
4137 return "No-overflows";
4139 return "Signed-unsigned";
4141 return "Low complexity";
4143 return "Profitable";
4147 return "Finite loop";
4149 return "Invariant load";
4150 case DELINEARIZATION
:
4151 return "Delinearization";
4153 llvm_unreachable("Unknown AssumptionKind!");
4156 bool Scop::isEffectiveAssumption(isl::set Set
, AssumptionSign Sign
) {
4157 if (Sign
== AS_ASSUMPTION
) {
4158 if (Context
.is_subset(Set
))
4161 if (AssumedContext
.is_subset(Set
))
4164 if (Set
.is_disjoint(Context
))
4167 if (Set
.is_subset(InvalidContext
))
4173 bool Scop::trackAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4174 AssumptionSign Sign
, BasicBlock
*BB
) {
4175 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4178 // Do never emit trivial assumptions as they only clutter the output.
4179 if (!PollyRemarksMinimal
) {
4181 if (Sign
== AS_ASSUMPTION
)
4182 Univ
= isl::set::universe(Set
.get_space());
4184 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& Set
.is_empty()) ||
4185 (Sign
== AS_ASSUMPTION
&& Univ
.is_equal(Set
));
4193 AssumptionsAliasing
++;
4196 AssumptionsInbounds
++;
4199 AssumptionsWrapping
++;
4202 AssumptionsUnsigned
++;
4205 AssumptionsComplexity
++;
4208 AssumptionsUnprofitable
++;
4211 AssumptionsErrorBlock
++;
4214 AssumptionsInfiniteLoop
++;
4217 AssumptionsInvariantLoad
++;
4219 case DELINEARIZATION
:
4220 AssumptionsDelinearization
++;
4224 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4225 std::string Msg
= toString(Kind
) + Suffix
+ Set
.to_str();
4227 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
, BB
)
4230 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
,
4236 void Scop::addAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4237 AssumptionSign Sign
, BasicBlock
*BB
) {
4238 // Simplify the assumptions/restrictions first.
4239 Set
= Set
.gist_params(getContext());
4241 if (!trackAssumption(Kind
, Set
, Loc
, Sign
, BB
))
4244 if (Sign
== AS_ASSUMPTION
)
4245 AssumedContext
= AssumedContext
.intersect(Set
).coalesce();
4247 InvalidContext
= InvalidContext
.unite(Set
).coalesce();
4250 void Scop::recordAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4251 AssumptionSign Sign
, BasicBlock
*BB
) {
4252 assert((Set
.is_params() || BB
) &&
4253 "Assumptions without a basic block must be parameter sets");
4254 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4257 void Scop::addRecordedAssumptions() {
4258 while (!RecordedAssumptions
.empty()) {
4259 Assumption AS
= RecordedAssumptions
.pop_back_val();
4262 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
, nullptr /* BasicBlock */);
4266 // If the domain was deleted the assumptions are void.
4267 isl_set
*Dom
= getDomainConditions(AS
.BB
).release();
4271 // If a basic block was given use its domain to simplify the assumption.
4272 // In case of restrictions we know they only have to hold on the domain,
4273 // thus we can intersect them with the domain of the block. However, for
4274 // assumptions the domain has to imply them, thus:
4276 // Dom => S <==> A v B <==> A - B
4278 // To avoid the complement we will register A - B as a restriction not an
4280 isl_set
*S
= AS
.Set
.copy();
4281 if (AS
.Sign
== AS_RESTRICTION
)
4282 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4283 else /* (AS.Sign == AS_ASSUMPTION) */
4284 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4286 addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
4290 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
, BasicBlock
*BB
) {
4291 LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind
<< "\n");
4292 addAssumption(Kind
, isl::set::empty(getParamSpace()), Loc
, AS_ASSUMPTION
, BB
);
4295 isl::set
Scop::getInvalidContext() const { return InvalidContext
; }
4297 void Scop::printContext(raw_ostream
&OS
) const {
4299 OS
.indent(4) << Context
<< "\n";
4301 OS
.indent(4) << "Assumed Context:\n";
4302 OS
.indent(4) << AssumedContext
<< "\n";
4304 OS
.indent(4) << "Invalid Context:\n";
4305 OS
.indent(4) << InvalidContext
<< "\n";
4308 for (const SCEV
*Parameter
: Parameters
)
4309 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4312 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4314 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4315 if (Pair
.second
.size() == 0)
4318 noOfGroups
+= Pair
.second
.size();
4321 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4322 if (MinMaxAliasGroups
.empty()) {
4323 OS
.indent(8) << "n/a\n";
4327 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4329 // If the group has no read only accesses print the write accesses.
4330 if (Pair
.second
.empty()) {
4331 OS
.indent(8) << "[[";
4332 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4333 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4339 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4340 OS
.indent(8) << "[[";
4341 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4342 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4343 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4351 void Scop::printStatements(raw_ostream
&OS
, bool PrintInstructions
) const {
4352 OS
<< "Statements {\n";
4354 for (const ScopStmt
&Stmt
: *this) {
4356 Stmt
.print(OS
, PrintInstructions
);
4359 OS
.indent(4) << "}\n";
4362 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4365 for (auto &Array
: arrays())
4368 OS
.indent(4) << "}\n";
4370 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4372 for (auto &Array
: arrays())
4373 Array
->print(OS
, /* SizeAsPwAff */ true);
4375 OS
.indent(4) << "}\n";
4378 void Scop::print(raw_ostream
&OS
, bool PrintInstructions
) const {
4379 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4380 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4381 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4382 OS
.indent(4) << "Invariant Accesses: {\n";
4383 for (const auto &IAClass
: InvariantEquivClasses
) {
4384 const auto &MAs
= IAClass
.InvariantAccesses
;
4386 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4388 MAs
.front()->print(OS
);
4389 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4393 OS
.indent(4) << "}\n";
4394 printContext(OS
.indent(4));
4395 printArrayInfo(OS
.indent(4));
4396 printAliasAssumptions(OS
);
4397 printStatements(OS
.indent(4), PrintInstructions
);
4400 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4401 LLVM_DUMP_METHOD
void Scop::dump() const { print(dbgs(), true); }
4404 isl::ctx
Scop::getIslCtx() const { return IslCtx
.get(); }
4406 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4408 // First try to use the SCEVAffinator to generate a piecewise defined
4409 // affine function from @p E in the context of @p BB. If that tasks becomes to
4410 // complex the affinator might return a nullptr. In such a case we invalidate
4411 // the SCoP and return a dummy value. This way we do not need to add error
4412 // handling code to all users of this function.
4413 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4415 // TODO: We could use a heuristic and either use:
4416 // SCEVAffinator::takeNonNegativeAssumption
4418 // SCEVAffinator::interpretAsUnsigned
4419 // to deal with unsigned or "NonNegative" SCEVs.
4421 Affinator
.takeNonNegativeAssumption(PWAC
);
4425 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4426 invalidate(COMPLEXITY
, DL
, BB
);
4427 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4430 isl::union_set
Scop::getDomains() const {
4431 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx().get(), 0);
4432 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4434 for (const ScopStmt
&Stmt
: *this)
4435 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain().release());
4437 return isl::manage(Domain
);
4440 isl::pw_aff
Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4441 PWACtx PWAC
= getPwAff(E
, BB
);
4446 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4447 isl::union_map Accesses
= isl::union_map::empty(getParamSpace());
4449 for (ScopStmt
&Stmt
: *this) {
4450 for (MemoryAccess
*MA
: Stmt
) {
4451 if (!Predicate(*MA
))
4454 isl::set Domain
= Stmt
.getDomain();
4455 isl::map AccessDomain
= MA
->getAccessRelation();
4456 AccessDomain
= AccessDomain
.intersect_domain(Domain
);
4457 Accesses
= Accesses
.add_map(AccessDomain
);
4461 return Accesses
.coalesce();
4464 isl::union_map
Scop::getMustWrites() {
4465 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4468 isl::union_map
Scop::getMayWrites() {
4469 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4472 isl::union_map
Scop::getWrites() {
4473 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4476 isl::union_map
Scop::getReads() {
4477 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4480 isl::union_map
Scop::getAccesses() {
4481 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4484 isl::union_map
Scop::getAccesses(ScopArrayInfo
*Array
) {
4485 return getAccessesOfType(
4486 [Array
](MemoryAccess
&MA
) { return MA
.getScopArrayInfo() == Array
; });
4489 // Check whether @p Node is an extension node.
4491 // @return true if @p Node is an extension node.
4492 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4493 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4494 return isl_bool_error
;
4496 return isl_bool_true
;
4499 bool Scop::containsExtensionNode(isl::schedule Schedule
) {
4500 return isl_schedule_foreach_schedule_node_top_down(
4501 Schedule
.get(), isNotExtNode
, nullptr) == isl_stat_error
;
4504 isl::union_map
Scop::getSchedule() const {
4505 auto Tree
= getScheduleTree();
4506 if (containsExtensionNode(Tree
))
4509 return Tree
.get_map();
4512 isl::schedule
Scop::getScheduleTree() const {
4513 return Schedule
.intersect_domain(getDomains());
4516 void Scop::setSchedule(isl::union_map NewSchedule
) {
4517 auto S
= isl::schedule::from_domain(getDomains());
4518 Schedule
= S
.insert_partial_schedule(
4519 isl::multi_union_pw_aff::from_union_map(NewSchedule
));
4520 ScheduleModified
= true;
4523 void Scop::setScheduleTree(isl::schedule NewSchedule
) {
4524 Schedule
= NewSchedule
;
4525 ScheduleModified
= true;
4528 bool Scop::restrictDomains(isl::union_set Domain
) {
4529 bool Changed
= false;
4530 for (ScopStmt
&Stmt
: *this) {
4531 isl::union_set StmtDomain
= isl::union_set(Stmt
.getDomain());
4532 isl::union_set NewStmtDomain
= StmtDomain
.intersect(Domain
);
4534 if (StmtDomain
.is_subset(NewStmtDomain
))
4539 NewStmtDomain
= NewStmtDomain
.coalesce();
4541 if (NewStmtDomain
.is_empty())
4542 Stmt
.restrictDomain(isl::set::empty(Stmt
.getDomainSpace()));
4544 Stmt
.restrictDomain(isl::set(NewStmtDomain
));
4549 ScalarEvolution
*Scop::getSE() const { return SE
; }
4551 // Create an isl_multi_union_aff that defines an identity mapping from the
4552 // elements of USet to their N-th dimension.
4556 // Domain: { A[i,j]; B[i,j,k] }
4559 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4561 // @param USet A union set describing the elements for which to generate a
4563 // @param N The dimension to map to.
4564 // @returns A mapping from USet to its N-th dimension.
4565 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4568 assert(!USet
.is_empty());
4570 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4572 auto Lambda
= [&Result
, N
](isl::set S
) -> isl::stat
{
4573 int Dim
= S
.dim(isl::dim::set
);
4574 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4577 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4579 Result
= Result
.add_pw_multi_aff(PMA
);
4580 return isl::stat::ok
;
4583 isl::stat Res
= USet
.foreach_set(Lambda
);
4586 assert(Res
== isl::stat::ok
);
4588 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4591 void Scop::addScopStmt(BasicBlock
*BB
, StringRef Name
, Loop
*SurroundingLoop
,
4592 std::vector
<Instruction
*> Instructions
) {
4593 assert(BB
&& "Unexpected nullptr!");
4594 Stmts
.emplace_back(*this, *BB
, Name
, SurroundingLoop
, Instructions
);
4595 auto *Stmt
= &Stmts
.back();
4596 StmtMap
[BB
].push_back(Stmt
);
4597 for (Instruction
*Inst
: Instructions
) {
4598 assert(!InstStmtMap
.count(Inst
) &&
4599 "Unexpected statement corresponding to the instruction.");
4600 InstStmtMap
[Inst
] = Stmt
;
4604 void Scop::addScopStmt(Region
*R
, StringRef Name
, Loop
*SurroundingLoop
,
4605 std::vector
<Instruction
*> Instructions
) {
4606 assert(R
&& "Unexpected nullptr!");
4607 Stmts
.emplace_back(*this, *R
, Name
, SurroundingLoop
, Instructions
);
4608 auto *Stmt
= &Stmts
.back();
4610 for (Instruction
*Inst
: Instructions
) {
4611 assert(!InstStmtMap
.count(Inst
) &&
4612 "Unexpected statement corresponding to the instruction.");
4613 InstStmtMap
[Inst
] = Stmt
;
4616 for (BasicBlock
*BB
: R
->blocks()) {
4617 StmtMap
[BB
].push_back(Stmt
);
4618 if (BB
== R
->getEntry())
4620 for (Instruction
&Inst
: *BB
) {
4621 assert(!InstStmtMap
.count(&Inst
) &&
4622 "Unexpected statement corresponding to the instruction.");
4623 InstStmtMap
[&Inst
] = Stmt
;
4628 ScopStmt
*Scop::addScopStmt(isl::map SourceRel
, isl::map TargetRel
,
4631 isl::set SourceDomain
= SourceRel
.domain();
4632 isl::set TargetDomain
= TargetRel
.domain();
4633 assert(Domain
.is_subset(TargetDomain
) &&
4634 "Target access not defined for complete statement domain");
4635 assert(Domain
.is_subset(SourceDomain
) &&
4636 "Source access not defined for complete statement domain");
4638 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4640 return &(Stmts
.back());
4643 void Scop::buildSchedule(LoopInfo
&LI
) {
4644 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4645 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4646 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4647 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4648 Schedule
= LoopStack
[0].Schedule
;
4651 /// To generate a schedule for the elements in a Region we traverse the Region
4652 /// in reverse-post-order and add the contained RegionNodes in traversal order
4653 /// to the schedule of the loop that is currently at the top of the LoopStack.
4654 /// For loop-free codes, this results in a correct sequential ordering.
4665 /// Including loops requires additional processing. Whenever a loop header is
4666 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4667 /// from an empty schedule, we first process all RegionNodes that are within
4668 /// this loop and complete the sequential schedule at this loop-level before
4669 /// processing about any other nodes. To implement this
4670 /// loop-nodes-first-processing, the reverse post-order traversal is
4671 /// insufficient. Hence, we additionally check if the traversal yields
4672 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4673 /// These region-nodes are then queue and only traverse after the all nodes
4674 /// within the current loop have been processed.
4675 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4676 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4678 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4679 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4680 std::deque
<RegionNode
*> DelayList
;
4681 bool LastRNWaiting
= false;
4683 // Iterate over the region @p R in reverse post-order but queue
4684 // sub-regions/blocks iff they are not part of the last encountered but not
4685 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4686 // that we queued the last sub-region/block from the reverse post-order
4687 // iterator. If it is set we have to explore the next sub-region/block from
4688 // the iterator (if any) to guarantee progress. If it is not set we first try
4689 // the next queued sub-region/blocks.
4690 while (!WorkList
.empty() || !DelayList
.empty()) {
4693 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
4694 RN
= WorkList
.front();
4695 WorkList
.pop_front();
4696 LastRNWaiting
= false;
4698 RN
= DelayList
.front();
4699 DelayList
.pop_front();
4702 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4706 Loop
*LastLoop
= LoopStack
.back().L
;
4707 if (LastLoop
!= L
) {
4708 if (LastLoop
&& !LastLoop
->contains(L
)) {
4709 LastRNWaiting
= true;
4710 DelayList
.push_back(RN
);
4713 LoopStack
.push_back({L
, nullptr, 0});
4715 buildSchedule(RN
, LoopStack
, LI
);
4719 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4720 if (RN
->isSubRegion()) {
4721 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
4722 if (!isNonAffineSubRegion(LocalRegion
)) {
4723 buildSchedule(LocalRegion
, LoopStack
, LI
);
4728 assert(LoopStack
.rbegin() != LoopStack
.rend());
4729 auto LoopData
= LoopStack
.rbegin();
4730 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
4732 for (auto *Stmt
: getStmtListFor(RN
)) {
4733 isl::union_set UDomain
{Stmt
->getDomain()};
4734 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
4735 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
4738 // Check if we just processed the last node in this loop. If we did, finalize
4741 // - adding new schedule dimensions
4742 // - folding the resulting schedule into the parent loop schedule
4743 // - dropping the loop schedule from the LoopStack.
4745 // Then continue to check surrounding loops, which might also have been
4746 // completed by this node.
4747 size_t Dimension
= LoopStack
.size();
4748 while (LoopData
->L
&&
4749 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
4750 isl::schedule Schedule
= LoopData
->Schedule
;
4751 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
4753 assert(std::next(LoopData
) != LoopStack
.rend());
4758 isl::union_set Domain
= Schedule
.get_domain();
4759 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
4760 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
4761 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
4764 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
4766 // Now pop all loops processed up there from the LoopStack
4767 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
4770 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(BasicBlock
*BB
) const {
4771 auto StmtMapIt
= StmtMap
.find(BB
);
4772 if (StmtMapIt
== StmtMap
.end())
4774 return StmtMapIt
->second
;
4777 ScopStmt
*Scop::getIncomingStmtFor(const Use
&U
) const {
4778 auto *PHI
= cast
<PHINode
>(U
.getUser());
4779 BasicBlock
*IncomingBB
= PHI
->getIncomingBlock(U
);
4781 // If the value is a non-synthesizable from the incoming block, use the
4782 // statement that contains it as user statement.
4783 if (auto *IncomingInst
= dyn_cast
<Instruction
>(U
.get())) {
4784 if (IncomingInst
->getParent() == IncomingBB
) {
4785 if (ScopStmt
*IncomingStmt
= getStmtFor(IncomingInst
))
4786 return IncomingStmt
;
4790 // Otherwise, use the epilogue/last statement.
4791 return getLastStmtFor(IncomingBB
);
4794 ScopStmt
*Scop::getLastStmtFor(BasicBlock
*BB
) const {
4795 ArrayRef
<ScopStmt
*> StmtList
= getStmtListFor(BB
);
4796 if (!StmtList
.empty())
4797 return StmtList
.back();
4801 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(RegionNode
*RN
) const {
4802 if (RN
->isSubRegion())
4803 return getStmtListFor(RN
->getNodeAs
<Region
>());
4804 return getStmtListFor(RN
->getNodeAs
<BasicBlock
>());
4807 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(Region
*R
) const {
4808 return getStmtListFor(R
->getEntry());
4811 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
4812 if (!L
|| !R
.contains(L
))
4814 // outermostLoopInRegion always returns nullptr for top level regions
4815 if (R
.isTopLevelRegion()) {
4816 // LoopInfo's depths start at 1, we start at 0
4817 return L
->getLoopDepth() - 1;
4819 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
4821 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
4825 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
4826 for (auto &SAI
: arrays()) {
4827 if (SAI
->getName() == BaseName
)
4833 void Scop::addAccessData(MemoryAccess
*Access
) {
4834 const ScopArrayInfo
*SAI
= Access
->getOriginalScopArrayInfo();
4835 assert(SAI
&& "can only use after access relations have been constructed");
4837 if (Access
->isOriginalValueKind() && Access
->isRead())
4838 ValueUseAccs
[SAI
].push_back(Access
);
4839 else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite())
4840 PHIIncomingAccs
[SAI
].push_back(Access
);
4843 void Scop::removeAccessData(MemoryAccess
*Access
) {
4844 if (Access
->isOriginalValueKind() && Access
->isWrite()) {
4845 ValueDefAccs
.erase(Access
->getAccessValue());
4846 } else if (Access
->isOriginalValueKind() && Access
->isRead()) {
4847 auto &Uses
= ValueUseAccs
[Access
->getScopArrayInfo()];
4848 auto NewEnd
= std::remove(Uses
.begin(), Uses
.end(), Access
);
4849 Uses
.erase(NewEnd
, Uses
.end());
4850 } else if (Access
->isOriginalPHIKind() && Access
->isRead()) {
4851 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessInstruction());
4852 PHIReadAccs
.erase(PHI
);
4853 } else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite()) {
4854 auto &Incomings
= PHIIncomingAccs
[Access
->getScopArrayInfo()];
4855 auto NewEnd
= std::remove(Incomings
.begin(), Incomings
.end(), Access
);
4856 Incomings
.erase(NewEnd
, Incomings
.end());
4860 MemoryAccess
*Scop::getValueDef(const ScopArrayInfo
*SAI
) const {
4861 assert(SAI
->isValueKind());
4863 Instruction
*Val
= dyn_cast
<Instruction
>(SAI
->getBasePtr());
4867 return ValueDefAccs
.lookup(Val
);
4870 ArrayRef
<MemoryAccess
*> Scop::getValueUses(const ScopArrayInfo
*SAI
) const {
4871 assert(SAI
->isValueKind());
4872 auto It
= ValueUseAccs
.find(SAI
);
4873 if (It
== ValueUseAccs
.end())
4878 MemoryAccess
*Scop::getPHIRead(const ScopArrayInfo
*SAI
) const {
4879 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4881 if (SAI
->isExitPHIKind())
4884 PHINode
*PHI
= cast
<PHINode
>(SAI
->getBasePtr());
4885 return PHIReadAccs
.lookup(PHI
);
4888 ArrayRef
<MemoryAccess
*> Scop::getPHIIncomings(const ScopArrayInfo
*SAI
) const {
4889 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4890 auto It
= PHIIncomingAccs
.find(SAI
);
4891 if (It
== PHIIncomingAccs
.end())
4896 bool Scop::isEscaping(Instruction
*Inst
) {
4897 assert(contains(Inst
) && "The concept of escaping makes only sense for "
4898 "values defined inside the SCoP");
4900 for (Use
&Use
: Inst
->uses()) {
4901 BasicBlock
*UserBB
= getUseBlock(Use
);
4902 if (!contains(UserBB
))
4905 // When the SCoP region exit needs to be simplified, PHIs in the region exit
4906 // move to a new basic block such that its incoming blocks are not in the
4908 if (hasSingleExitEdge() && isa
<PHINode
>(Use
.getUser()) &&
4909 isExit(cast
<PHINode
>(Use
.getUser())->getParent()))
4915 Scop::ScopStatistics
Scop::getStatistics() const {
4916 ScopStatistics Result
;
4917 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4918 auto LoopStat
= ScopDetection::countBeneficialLoops(&R
, *SE
, *getLI(), 0);
4920 int NumTotalLoops
= LoopStat
.NumLoops
;
4921 Result
.NumBoxedLoops
= getBoxedLoops().size();
4922 Result
.NumAffineLoops
= NumTotalLoops
- Result
.NumBoxedLoops
;
4924 for (const ScopStmt
&Stmt
: *this) {
4925 isl::set Domain
= Stmt
.getDomain().intersect_params(getContext());
4926 bool IsInLoop
= Stmt
.getNumIterators() >= 1;
4927 for (MemoryAccess
*MA
: Stmt
) {
4931 if (MA
->isLatestValueKind()) {
4932 Result
.NumValueWrites
+= 1;
4934 Result
.NumValueWritesInLoops
+= 1;
4937 if (MA
->isLatestAnyPHIKind()) {
4938 Result
.NumPHIWrites
+= 1;
4940 Result
.NumPHIWritesInLoops
+= 1;
4944 MA
->getAccessRelation().intersect_domain(Domain
).range();
4945 if (AccSet
.is_singleton()) {
4946 Result
.NumSingletonWrites
+= 1;
4948 Result
.NumSingletonWritesInLoops
+= 1;
4956 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const Scop
&scop
) {
4957 scop
.print(OS
, PollyPrintInstructions
);
4961 //===----------------------------------------------------------------------===//
4962 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4963 AU
.addRequired
<LoopInfoWrapperPass
>();
4964 AU
.addRequired
<RegionInfoPass
>();
4965 AU
.addRequired
<DominatorTreeWrapperPass
>();
4966 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
4967 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
4968 AU
.addRequired
<AAResultsWrapperPass
>();
4969 AU
.addRequired
<AssumptionCacheTracker
>();
4970 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
4971 AU
.setPreservesAll();
4974 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
,
4975 Scop::ScopStatistics ScopStats
) {
4976 assert(Stats
.NumLoops
== ScopStats
.NumAffineLoops
+ ScopStats
.NumBoxedLoops
);
4979 NumLoopsInScop
+= Stats
.NumLoops
;
4981 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
4983 if (Stats
.MaxDepth
== 0)
4984 NumScopsDepthZero
++;
4985 else if (Stats
.MaxDepth
== 1)
4987 else if (Stats
.MaxDepth
== 2)
4989 else if (Stats
.MaxDepth
== 3)
4990 NumScopsDepthThree
++;
4991 else if (Stats
.MaxDepth
== 4)
4992 NumScopsDepthFour
++;
4993 else if (Stats
.MaxDepth
== 5)
4994 NumScopsDepthFive
++;
4996 NumScopsDepthLarger
++;
4998 NumAffineLoops
+= ScopStats
.NumAffineLoops
;
4999 NumBoxedLoops
+= ScopStats
.NumBoxedLoops
;
5001 NumValueWrites
+= ScopStats
.NumValueWrites
;
5002 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
5003 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
5004 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
5005 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
5006 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
5009 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
5010 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5012 if (!SD
.isMaxRegionInScop(*R
))
5015 Function
*F
= R
->getEntry()->getParent();
5016 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5017 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5018 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5019 auto const &DL
= F
->getParent()->getDataLayout();
5020 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5021 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
5022 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5024 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5025 S
= SB
.getScop(); // take ownership of scop object
5027 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5029 ScopDetection::LoopStats Stats
=
5030 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5031 updateLoopCountStatistic(Stats
, S
->getStatistics());
5038 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
5040 S
->print(OS
, PollyPrintInstructions
);
5042 OS
<< "Invalid Scop!\n";
5045 char ScopInfoRegionPass::ID
= 0;
5047 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5049 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
5050 "Polly - Create polyhedral description of Scops", false,
5052 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5053 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5054 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5055 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5056 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5057 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5058 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5059 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
5060 "Polly - Create polyhedral description of Scops", false,
5063 //===----------------------------------------------------------------------===//
5064 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
5065 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
5066 AssumptionCache
&AC
, OptimizationRemarkEmitter
&ORE
)
5067 : DL(DL
), SD(SD
), SE(SE
), LI(LI
), AA(AA
), DT(DT
), AC(AC
), ORE(ORE
) {
5071 void ScopInfo::recompute() {
5072 RegionToScopMap
.clear();
5073 /// Create polyhedral description of scops for all the valid regions of a
5075 for (auto &It
: SD
) {
5076 Region
*R
= const_cast<Region
*>(It
);
5077 if (!SD
.isMaxRegionInScop(*R
))
5080 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5081 std::unique_ptr
<Scop
> S
= SB
.getScop();
5084 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5085 ScopDetection::LoopStats Stats
=
5086 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5087 updateLoopCountStatistic(Stats
, S
->getStatistics());
5089 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5090 assert(Inserted
&& "Building Scop for the same region twice!");
5095 bool ScopInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
5096 FunctionAnalysisManager::Invalidator
&Inv
) {
5097 // Check whether the analysis, all analyses on functions have been preserved
5098 // or anything we're holding references to is being invalidated
5099 auto PAC
= PA
.getChecker
<ScopInfoAnalysis
>();
5100 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>()) ||
5101 Inv
.invalidate
<ScopAnalysis
>(F
, PA
) ||
5102 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
5103 Inv
.invalidate
<LoopAnalysis
>(F
, PA
) ||
5104 Inv
.invalidate
<AAManager
>(F
, PA
) ||
5105 Inv
.invalidate
<DominatorTreeAnalysis
>(F
, PA
) ||
5106 Inv
.invalidate
<AssumptionAnalysis
>(F
, PA
);
5109 AnalysisKey
ScopInfoAnalysis::Key
;
5111 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5112 FunctionAnalysisManager
&FAM
) {
5113 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5114 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5115 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5116 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5117 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5118 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5119 auto &DL
= F
.getParent()->getDataLayout();
5120 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
5121 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
};
5124 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5125 FunctionAnalysisManager
&FAM
) {
5126 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5127 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5128 // order here to keep the output persistent
5129 for (auto &It
: reverse(SI
)) {
5131 It
.second
->print(Stream
, PollyPrintInstructions
);
5133 Stream
<< "Invalid Scop!\n";
5135 return PreservedAnalyses::all();
5138 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5139 AU
.addRequired
<LoopInfoWrapperPass
>();
5140 AU
.addRequired
<RegionInfoPass
>();
5141 AU
.addRequired
<DominatorTreeWrapperPass
>();
5142 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5143 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5144 AU
.addRequired
<AAResultsWrapperPass
>();
5145 AU
.addRequired
<AssumptionCacheTracker
>();
5146 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5147 AU
.setPreservesAll();
5150 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5151 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5152 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5153 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5154 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5155 auto const &DL
= F
.getParent()->getDataLayout();
5156 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5157 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5158 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5160 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
});
5164 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5165 for (auto &It
: *Result
) {
5167 It
.second
->print(OS
, PollyPrintInstructions
);
5169 OS
<< "Invalid Scop!\n";
5173 char ScopInfoWrapperPass::ID
= 0;
5175 Pass
*polly::createScopInfoWrapperPassPass() {
5176 return new ScopInfoWrapperPass();
5179 INITIALIZE_PASS_BEGIN(
5180 ScopInfoWrapperPass
, "polly-function-scops",
5181 "Polly - Create polyhedral description of all Scops of a function", false,
5183 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5184 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5185 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5186 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5187 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5188 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5189 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5190 INITIALIZE_PASS_END(
5191 ScopInfoWrapperPass
, "polly-function-scops",
5192 "Polly - Create polyhedral description of all Scops of a function", false,