1 //===- ScopInfo.cpp -------------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/ISLTools.h"
28 #include "polly/Support/SCEVAffinator.h"
29 #include "polly/Support/SCEVValidator.h"
30 #include "polly/Support/ScopHelper.h"
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/DenseSet.h"
35 #include "llvm/ADT/PostOrderIterator.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
42 #include "llvm/ADT/StringExtras.h"
43 #include "llvm/ADT/StringMap.h"
44 #include "llvm/Analysis/AliasAnalysis.h"
45 #include "llvm/Analysis/AliasSetTracker.h"
46 #include "llvm/Analysis/AssumptionCache.h"
47 #include "llvm/Analysis/Loads.h"
48 #include "llvm/Analysis/LoopInfo.h"
49 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
50 #include "llvm/Analysis/RegionInfo.h"
51 #include "llvm/Analysis/RegionIterator.h"
52 #include "llvm/Analysis/ScalarEvolution.h"
53 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
54 #include "llvm/IR/Argument.h"
55 #include "llvm/IR/BasicBlock.h"
56 #include "llvm/IR/CFG.h"
57 #include "llvm/IR/ConstantRange.h"
58 #include "llvm/IR/Constants.h"
59 #include "llvm/IR/DataLayout.h"
60 #include "llvm/IR/DebugLoc.h"
61 #include "llvm/IR/DerivedTypes.h"
62 #include "llvm/IR/DiagnosticInfo.h"
63 #include "llvm/IR/Dominators.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/InstrTypes.h"
66 #include "llvm/IR/Instruction.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/IntrinsicInst.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/PassManager.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/User.h"
74 #include "llvm/IR/Value.h"
75 #include "llvm/Pass.h"
76 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Compiler.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/ErrorHandling.h"
81 #include "llvm/Support/MathExtras.h"
82 #include "llvm/Support/raw_ostream.h"
84 #include "isl/constraint.h"
85 #include "isl/local_space.h"
87 #include "isl/options.h"
88 #include "isl/printer.h"
89 #include "isl/schedule.h"
90 #include "isl/schedule_node.h"
92 #include "isl/union_map.h"
93 #include "isl/union_set.h"
107 using namespace llvm
;
108 using namespace polly
;
110 #define DEBUG_TYPE "polly-scops"
112 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
113 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
114 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
115 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
116 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
117 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
118 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
119 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
120 STATISTIC(AssumptionsInvariantLoad
,
121 "Number of invariant loads assumptions taken.");
122 STATISTIC(AssumptionsDelinearization
,
123 "Number of delinearization assumptions taken.");
125 STATISTIC(NumScops
, "Number of feasible SCoPs after ScopInfo");
126 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
127 STATISTIC(NumBoxedLoops
, "Number of boxed loops in SCoPs after ScopInfo");
128 STATISTIC(NumAffineLoops
, "Number of affine loops in SCoPs after ScopInfo");
130 STATISTIC(NumScopsDepthZero
, "Number of scops with maximal loop depth 0");
131 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
132 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
133 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
134 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
135 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
136 STATISTIC(NumScopsDepthLarger
,
137 "Number of scops with maximal loop depth 6 and larger");
138 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
140 STATISTIC(NumValueWrites
, "Number of scalar value writes after ScopInfo");
142 NumValueWritesInLoops
,
143 "Number of scalar value writes nested in affine loops after ScopInfo");
144 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after ScopInfo");
145 STATISTIC(NumPHIWritesInLoops
,
146 "Number of scalar phi writes nested in affine loops after ScopInfo");
147 STATISTIC(NumSingletonWrites
, "Number of singleton writes after ScopInfo");
148 STATISTIC(NumSingletonWritesInLoops
,
149 "Number of singleton writes nested in affine loops after ScopInfo");
151 // The maximal number of basic sets we allow during domain construction to
152 // be created. More complex scops will result in very high compile time and
153 // are also unlikely to result in good code
154 static int const MaxDisjunctsInDomain
= 20;
156 // The number of disjunct in the context after which we stop to add more
157 // disjuncts. This parameter is there to avoid exponential growth in the
158 // number of disjunct when adding non-convex sets to the context.
159 static int const MaxDisjunctsInContext
= 4;
161 // The maximal number of dimensions we allow during invariant load construction.
162 // More complex access ranges will result in very high compile time and are also
163 // unlikely to result in good code. This value is very high and should only
164 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
165 static int const MaxDimensionsInAccessRange
= 9;
168 OptComputeOut("polly-analysis-computeout",
169 cl::desc("Bound the scop analysis by a maximal amount of "
170 "computational steps (0 means no bound)"),
171 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
172 cl::cat(PollyCategory
));
174 static cl::opt
<bool> PollyRemarksMinimal(
175 "polly-remarks-minimal",
176 cl::desc("Do not emit remarks about assumptions that are known"),
177 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
179 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
180 "polly-rtc-max-array-disjuncts",
181 cl::desc("The maximal number of disjunts allowed in memory accesses to "
183 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
185 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
186 "polly-rtc-max-parameters",
187 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
188 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
190 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
191 "polly-rtc-max-arrays-per-group",
192 cl::desc("The maximal number of arrays to compare in each alias group."),
193 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
195 static cl::opt
<std::string
> UserContextStr(
196 "polly-context", cl::value_desc("isl parameter set"),
197 cl::desc("Provide additional constraints on the context parameters"),
198 cl::init(""), cl::cat(PollyCategory
));
201 IslOnErrorAbort("polly-on-isl-error-abort",
202 cl::desc("Abort if an isl error is encountered"),
203 cl::init(true), cl::cat(PollyCategory
));
205 static cl::opt
<bool> PollyPreciseInbounds(
206 "polly-precise-inbounds",
207 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
208 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
211 PollyIgnoreInbounds("polly-ignore-inbounds",
212 cl::desc("Do not take inbounds assumptions at all"),
213 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
215 static cl::opt
<bool> PollyIgnoreParamBounds(
216 "polly-ignore-parameter-bounds",
218 "Do not add parameter bounds and do no gist simplify sets accordingly"),
219 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
221 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
222 "polly-allow-dereference-of-all-function-parameters",
224 "Treat all parameters to functions that are pointers as dereferencible."
225 " This is useful for invariant load hoisting, since we can generate"
226 " less runtime checks. This is only valid if all pointers to functions"
227 " are always initialized, so that Polly can choose to hoist"
229 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
231 static cl::opt
<bool> PollyPreciseFoldAccesses(
232 "polly-precise-fold-accesses",
233 cl::desc("Fold memory accesses to model more possible delinearizations "
234 "(does not scale well)"),
235 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
237 bool polly::UseInstructionNames
;
239 static cl::opt
<bool, true> XUseInstructionNames(
240 "polly-use-llvm-names",
241 cl::desc("Use LLVM-IR names when deriving statement names"),
242 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
243 cl::ZeroOrMore
, cl::cat(PollyCategory
));
245 static cl::opt
<bool> PollyPrintInstructions(
246 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
247 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
249 //===----------------------------------------------------------------------===//
251 // Create a sequence of two schedules. Either argument may be null and is
252 // interpreted as the empty schedule. Can also return null if both schedules are
254 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
260 return Prev
.sequence(Succ
);
263 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
264 int dim
, isl::dim type
) {
266 isl::ctx Ctx
= S
.get_ctx();
268 // The upper and lower bound for a parameter value is derived either from
269 // the data type of the parameter or from the - possibly more restrictive -
271 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
272 S
= S
.lower_bound_val(type
, dim
, V
);
273 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
274 S
= S
.upper_bound_val(type
, dim
, V
);
276 if (Range
.isFullSet())
279 if (S
.n_basic_set() > MaxDisjunctsInContext
)
282 // In case of signed wrapping, we can refine the set of valid values by
283 // excluding the part not covered by the wrapping range.
284 if (Range
.isSignWrappedSet()) {
285 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
286 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
288 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
290 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
297 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
298 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
302 if (!S
->contains(BasePtrLI
))
305 ScalarEvolution
&SE
= *S
->getSE();
307 auto *OriginBaseSCEV
=
308 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
312 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
313 if (!OriginBaseSCEVUnknown
)
316 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
320 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl::ctx Ctx
,
321 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
322 const DataLayout
&DL
, Scop
*S
,
323 const char *BaseName
)
324 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
325 std::string BasePtrName
=
327 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
328 Kind
== MemoryKind::PHI
? "__phi" : "",
329 UseInstructionNames
);
330 Id
= isl::id::alloc(Ctx
, BasePtrName
, this);
334 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
335 BasePtrOriginSAI
= nullptr;
339 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
340 if (BasePtrOriginSAI
)
341 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
344 ScopArrayInfo::~ScopArrayInfo() = default;
346 isl::space
ScopArrayInfo::getSpace() const {
347 auto Space
= isl::space(Id
.get_ctx(), 0, getNumberOfDimensions());
348 Space
= Space
.set_tuple_id(isl::dim::set
, Id
);
352 bool ScopArrayInfo::isReadOnly() {
353 isl::union_set WriteSet
= S
.getWrites().range();
354 isl::space Space
= getSpace();
355 WriteSet
= WriteSet
.extract_set(Space
);
357 return bool(WriteSet
.is_empty());
360 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
361 if (Array
->getElementType() != getElementType())
364 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
367 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
368 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
374 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
375 if (NewElementType
== ElementType
)
378 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
379 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
381 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
384 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
385 ElementType
= NewElementType
;
387 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
388 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
392 /// Make the ScopArrayInfo model a Fortran Array
393 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
394 assert(FAD
&& "got invalid Fortran array descriptor");
396 assert(this->FAD
== FAD
&&
397 "receiving different array descriptors for same array");
401 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
405 isl::space
Space(S
.getIslCtx(), 1, 0);
407 std::string param_name
= getName();
408 param_name
+= "_fortranarr_size";
409 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
, this);
411 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
413 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
415 DimensionSizesPw
[0] = PwAff
;
418 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
419 bool CheckConsistency
) {
420 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
421 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
422 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
424 if (CheckConsistency
) {
425 for (int i
= 0; i
< SharedDims
; i
++) {
426 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
427 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
428 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
432 if (DimensionSizes
.size() >= NewSizes
.size())
436 DimensionSizes
.clear();
437 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
439 DimensionSizesPw
.clear();
440 for (const SCEV
*Expr
: DimensionSizes
) {
442 DimensionSizesPw
.push_back(nullptr);
445 isl::pw_aff Size
= S
.getPwAffOnly(Expr
);
446 DimensionSizesPw
.push_back(Size
);
451 std::string
ScopArrayInfo::getName() const { return Id
.get_name(); }
453 int ScopArrayInfo::getElemSizeInBytes() const {
454 return DL
.getTypeAllocSize(ElementType
);
457 isl::id
ScopArrayInfo::getBasePtrId() const { return Id
; }
459 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
460 LLVM_DUMP_METHOD
void ScopArrayInfo::dump() const { print(errs()); }
463 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
464 OS
.indent(8) << *getElementType() << " " << getName();
466 // If this is a Fortran array, then we can print the outermost dimension
467 // as a isl_pw_aff even though there is no SCEV information.
468 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
470 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
471 !getDimensionSize(0)) {
475 for (; u
< getNumberOfDimensions(); u
++) {
479 isl::pw_aff Size
= getDimensionSizePw(u
);
480 OS
<< " " << Size
<< " ";
482 OS
<< *getDimensionSize(u
);
490 if (BasePtrOriginSAI
)
491 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
493 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
496 const ScopArrayInfo
*
497 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA
) {
498 isl::id Id
= PMA
.get_tuple_id(isl::dim::out
);
499 assert(!Id
.is_null() && "Output dimension didn't have an ID");
500 return getFromId(Id
);
503 const ScopArrayInfo
*ScopArrayInfo::getFromId(isl::id Id
) {
504 void *User
= Id
.get_user();
505 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
509 void MemoryAccess::wrapConstantDimensions() {
510 auto *SAI
= getScopArrayInfo();
511 isl::space ArraySpace
= SAI
->getSpace();
512 isl::ctx Ctx
= ArraySpace
.get_ctx();
513 unsigned DimsArray
= SAI
->getNumberOfDimensions();
515 isl::multi_aff DivModAff
= isl::multi_aff::identity(
516 ArraySpace
.map_from_domain_and_range(ArraySpace
));
517 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
519 // Begin with last dimension, to iteratively carry into higher dimensions.
520 for (int i
= DimsArray
- 1; i
> 0; i
--) {
521 auto *DimSize
= SAI
->getDimensionSize(i
);
522 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
524 // This transformation is not applicable to dimensions with dynamic size.
528 // This transformation is not applicable to dimensions of size zero.
529 if (DimSize
->isZero())
532 isl::val DimSizeVal
=
533 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
534 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
536 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
538 // Compute: index % size
539 // Modulo must apply in the divide of the previous iteration, if any.
540 isl::aff Modulo
= Var
.mod(DimSizeVal
);
541 Modulo
= Modulo
.pullback(DivModAff
);
543 // Compute: floor(index / size)
544 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
545 Divide
= Divide
.floor();
546 Divide
= Divide
.add(PrevVar
);
547 Divide
= Divide
.pullback(DivModAff
);
549 // Apply Modulo and Divide.
550 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
551 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
554 // Apply all modulo/divides on the accesses.
555 isl::map Relation
= AccessRelation
;
556 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
557 Relation
= Relation
.detect_equalities();
558 AccessRelation
= Relation
;
561 void MemoryAccess::updateDimensionality() {
562 auto *SAI
= getScopArrayInfo();
563 isl::space ArraySpace
= SAI
->getSpace();
564 isl::space AccessSpace
= AccessRelation
.get_space().range();
565 isl::ctx Ctx
= ArraySpace
.get_ctx();
567 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
568 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
569 auto DimsMissing
= DimsArray
- DimsAccess
;
571 auto *BB
= getStatement()->getEntryBlock();
572 auto &DL
= BB
->getModule()->getDataLayout();
573 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
574 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
576 isl::map Map
= isl::map::from_domain_and_range(
577 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
579 for (unsigned i
= 0; i
< DimsMissing
; i
++)
580 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
582 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
583 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
585 AccessRelation
= AccessRelation
.apply_range(Map
);
587 // For the non delinearized arrays, divide the access function of the last
588 // subscript by the size of the elements in the array.
590 // A stride one array access in C expressed as A[i] is expressed in
591 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
592 // two subsequent values of 'i' index two values that are stored next to
593 // each other in memory. By this division we make this characteristic
594 // obvious again. If the base pointer was accessed with offsets not divisible
595 // by the accesses element size, we will have chosen a smaller ArrayElemSize
596 // that divides the offsets of all accesses to this base pointer.
597 if (DimsAccess
== 1) {
598 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
599 AccessRelation
= AccessRelation
.floordiv_val(V
);
602 // We currently do this only if we added at least one dimension, which means
603 // some dimension's indices have not been specified, an indicator that some
604 // index values have been added together.
605 // TODO: Investigate general usefulness; Effect on unit tests is to make index
606 // expressions more complicated.
608 wrapConstantDimensions();
611 computeBoundsOnAccessRelation(ArrayElemSize
);
613 // Introduce multi-element accesses in case the type loaded by this memory
614 // access is larger than the canonical element type of the array.
616 // An access ((float *)A)[i] to an array char *A is modeled as
617 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
618 if (ElemBytes
> ArrayElemSize
) {
619 assert(ElemBytes
% ArrayElemSize
== 0 &&
620 "Loaded element size should be multiple of canonical element size");
621 isl::map Map
= isl::map::from_domain_and_range(
622 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
623 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
624 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
629 LS
= isl::local_space(Map
.get_space());
630 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
632 C
= isl::constraint::alloc_inequality(LS
);
633 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
634 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
635 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
636 Map
= Map
.add_constraint(C
);
638 C
= isl::constraint::alloc_inequality(LS
);
639 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
640 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
641 C
= C
.set_constant_val(isl::val(Ctx
, 0));
642 Map
= Map
.add_constraint(C
);
643 AccessRelation
= AccessRelation
.apply_range(Map
);
648 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
650 case MemoryAccess::RT_NONE
:
651 llvm_unreachable("Requested a reduction operator string for a memory "
652 "access which isn't a reduction");
653 case MemoryAccess::RT_ADD
:
655 case MemoryAccess::RT_MUL
:
657 case MemoryAccess::RT_BOR
:
659 case MemoryAccess::RT_BXOR
:
661 case MemoryAccess::RT_BAND
:
664 llvm_unreachable("Unknown reduction type");
667 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
668 isl::id ArrayId
= getArrayId();
669 void *User
= ArrayId
.get_user();
670 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
674 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
675 isl::id ArrayId
= getLatestArrayId();
676 void *User
= ArrayId
.get_user();
677 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
681 isl::id
MemoryAccess::getOriginalArrayId() const {
682 return AccessRelation
.get_tuple_id(isl::dim::out
);
685 isl::id
MemoryAccess::getLatestArrayId() const {
686 if (!hasNewAccessRelation())
687 return getOriginalArrayId();
688 return NewAccessRelation
.get_tuple_id(isl::dim::out
);
691 isl::map
MemoryAccess::getAddressFunction() const {
692 return getAccessRelation().lexmin();
696 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule
) const {
697 isl::map Schedule
, ScheduledAccRel
;
698 isl::union_set UDomain
;
700 UDomain
= getStatement()->getDomain();
701 USchedule
= USchedule
.intersect_domain(UDomain
);
702 Schedule
= isl::map::from_union_map(USchedule
);
703 ScheduledAccRel
= getAddressFunction().apply_domain(Schedule
);
704 return isl::pw_multi_aff::from_map(ScheduledAccRel
);
707 isl::map
MemoryAccess::getOriginalAccessRelation() const {
708 return AccessRelation
;
711 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
712 return AccessRelation
.to_str();
715 isl::space
MemoryAccess::getOriginalAccessRelationSpace() const {
716 return AccessRelation
.get_space();
719 isl::map
MemoryAccess::getNewAccessRelation() const {
720 return NewAccessRelation
;
723 std::string
MemoryAccess::getNewAccessRelationStr() const {
724 return NewAccessRelation
.to_str();
727 std::string
MemoryAccess::getAccessRelationStr() const {
728 return getAccessRelation().to_str();
731 isl::basic_map
MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
732 isl::space Space
= isl::space(Statement
->getIslCtx(), 0, 1);
733 Space
= Space
.align_params(Statement
->getDomainSpace());
735 return isl::basic_map::from_domain_and_range(
736 isl::basic_set::universe(Statement
->getDomainSpace()),
737 isl::basic_set::universe(Space
));
740 // Formalize no out-of-bound access assumption
742 // When delinearizing array accesses we optimistically assume that the
743 // delinearized accesses do not access out of bound locations (the subscript
744 // expression of each array evaluates for each statement instance that is
745 // executed to a value that is larger than zero and strictly smaller than the
746 // size of the corresponding dimension). The only exception is the outermost
747 // dimension for which we do not need to assume any upper bound. At this point
748 // we formalize this assumption to ensure that at code generation time the
749 // relevant run-time checks can be generated.
751 // To find the set of constraints necessary to avoid out of bound accesses, we
752 // first build the set of data locations that are not within array bounds. We
753 // then apply the reverse access relation to obtain the set of iterations that
754 // may contain invalid accesses and reduce this set of iterations to the ones
755 // that are actually executed by intersecting them with the domain of the
756 // statement. If we now project out all loop dimensions, we obtain a set of
757 // parameters that may cause statement instances to be executed that may
758 // possibly yield out of bound memory accesses. The complement of these
759 // constraints is the set of constraints that needs to be assumed to ensure such
760 // statement instances are never executed.
761 void MemoryAccess::assumeNoOutOfBound() {
762 if (PollyIgnoreInbounds
)
764 auto *SAI
= getScopArrayInfo();
765 isl::space Space
= getOriginalAccessRelationSpace().range();
766 isl::set Outside
= isl::set::empty(Space
);
767 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
768 isl::local_space
LS(Space
);
769 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
770 isl::pw_aff Zero
= isl::pw_aff(LS
);
772 isl::set DimOutside
= Var
.lt_set(Zero
);
773 isl::pw_aff SizeE
= SAI
->getDimensionSizePw(i
);
774 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
775 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
776 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
778 Outside
= Outside
.unite(DimOutside
);
781 Outside
= Outside
.apply(getAccessRelation().reverse());
782 Outside
= Outside
.intersect(Statement
->getDomain());
783 Outside
= Outside
.params();
785 // Remove divs to avoid the construction of overly complicated assumptions.
786 // Doing so increases the set of parameter combinations that are assumed to
787 // not appear. This is always save, but may make the resulting run-time check
788 // bail out more often than strictly necessary.
789 Outside
= Outside
.remove_divs();
790 Outside
= Outside
.complement();
791 const auto &Loc
= getAccessInstruction()
792 ? getAccessInstruction()->getDebugLoc()
794 if (!PollyPreciseInbounds
)
795 Outside
= Outside
.gist_params(Statement
->getDomain().params());
796 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
, Loc
,
800 void MemoryAccess::buildMemIntrinsicAccessRelation() {
801 assert(isMemoryIntrinsic());
802 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
804 isl::pw_aff SubscriptPWA
= getPwAff(Subscripts
[0]);
805 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
808 if (Subscripts
[1] == nullptr) {
809 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
811 isl::pw_aff LengthPWA
= getPwAff(Subscripts
[1]);
812 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
813 isl::space RangeSpace
= LengthMap
.get_space().range();
814 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
816 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
817 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
818 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
819 LengthMap
= LengthMap
.sum(SubscriptMap
);
821 LengthMap
.set_tuple_id(isl::dim::in
, getStatement()->getDomainId());
824 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
825 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
827 auto MAI
= MemAccInst(getAccessInstruction());
828 if (isa
<MemIntrinsic
>(MAI
))
831 Value
*Ptr
= MAI
.getPointerOperand();
832 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
835 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
836 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
839 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
840 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
841 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
843 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
844 if (Range
.isFullSet())
847 if (Range
.isWrappedSet() || Range
.isSignWrappedSet())
850 bool isWrapping
= Range
.isSignWrappedSet();
852 unsigned BW
= Range
.getBitWidth();
853 const auto One
= APInt(BW
, 1);
854 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
855 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
857 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
858 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
860 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
862 isl::map Relation
= AccessRelation
;
863 isl::set AccessRange
= Relation
.range();
864 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
866 AccessRelation
= Relation
.intersect_range(AccessRange
);
869 void MemoryAccess::foldAccessRelation() {
870 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
873 int Size
= Subscripts
.size();
875 isl::map NewAccessRelation
= AccessRelation
;
877 for (int i
= Size
- 2; i
>= 0; --i
) {
879 isl::map MapOne
, MapTwo
;
880 isl::pw_aff DimSize
= getPwAff(Sizes
[i
+ 1]);
882 isl::space SpaceSize
= DimSize
.get_space();
883 isl::id ParamId
= SpaceSize
.get_dim_id(isl::dim::param
, 0);
885 Space
= AccessRelation
.get_space();
886 Space
= Space
.range().map_from_set();
887 Space
= Space
.align_params(SpaceSize
);
889 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
891 MapOne
= isl::map::universe(Space
);
892 for (int j
= 0; j
< Size
; ++j
)
893 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
894 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
896 MapTwo
= isl::map::universe(Space
);
897 for (int j
= 0; j
< Size
; ++j
)
898 if (j
< i
|| j
> i
+ 1)
899 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
901 isl::local_space
LS(Space
);
903 C
= isl::constraint::alloc_equality(LS
);
904 C
= C
.set_constant_si(-1);
905 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
906 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
907 MapTwo
= MapTwo
.add_constraint(C
);
908 C
= isl::constraint::alloc_equality(LS
);
909 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
910 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
911 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
912 MapTwo
= MapTwo
.add_constraint(C
);
913 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
915 MapOne
= MapOne
.unite(MapTwo
);
916 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
919 isl::id BaseAddrId
= getScopArrayInfo()->getBasePtrId();
920 isl::space Space
= Statement
->getDomainSpace();
921 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
922 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
923 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
924 NewAccessRelation
= NewAccessRelation
.gist_domain(Statement
->getDomain());
926 // Access dimension folding might in certain cases increase the number of
927 // disjuncts in the memory access, which can possibly complicate the generated
928 // run-time checks and can lead to costly compilation.
929 if (!PollyPreciseFoldAccesses
&&
930 isl_map_n_basic_map(NewAccessRelation
.get()) >
931 isl_map_n_basic_map(AccessRelation
.get())) {
933 AccessRelation
= NewAccessRelation
;
937 /// Check if @p Expr is divisible by @p Size.
938 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
943 // Only one factor needs to be divisible.
944 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
945 for (auto *FactorExpr
: MulExpr
->operands())
946 if (isDivisible(FactorExpr
, Size
, SE
))
951 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
953 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
954 for (auto *OpExpr
: NAryExpr
->operands())
955 if (!isDivisible(OpExpr
, Size
, SE
))
960 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
961 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
962 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
963 return MulSCEV
== Expr
;
966 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
967 assert(AccessRelation
.is_null() && "AccessRelation already built");
969 // Initialize the invalid domain which describes all iterations for which the
970 // access relation is not modeled correctly.
971 isl::set StmtInvalidDomain
= getStatement()->getInvalidDomain();
972 InvalidDomain
= isl::set::empty(StmtInvalidDomain
.get_space());
974 isl::ctx Ctx
= Id
.get_ctx();
975 isl::id BaseAddrId
= SAI
->getBasePtrId();
977 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
978 buildMemIntrinsicAccessRelation();
979 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
984 // We overapproximate non-affine accesses with a possible access to the
985 // whole array. For read accesses it does not make a difference, if an
986 // access must or may happen. However, for write accesses it is important to
987 // differentiate between writes that must happen and writes that may happen.
988 if (AccessRelation
.is_null())
989 AccessRelation
= createBasicAccessMap(Statement
);
991 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
995 isl::space Space
= isl::space(Ctx
, 0, Statement
->getNumIterators(), 0);
996 AccessRelation
= isl::map::universe(Space
);
998 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
999 isl::pw_aff Affine
= getPwAff(Subscripts
[i
]);
1000 isl::map SubscriptMap
= isl::map::from_pw_aff(Affine
);
1001 AccessRelation
= AccessRelation
.flat_range_product(SubscriptMap
);
1004 Space
= Statement
->getDomainSpace();
1005 AccessRelation
= AccessRelation
.set_tuple_id(
1006 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
1007 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1009 AccessRelation
= AccessRelation
.gist_domain(Statement
->getDomain());
1012 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1013 AccessType AccType
, Value
*BaseAddress
,
1014 Type
*ElementType
, bool Affine
,
1015 ArrayRef
<const SCEV
*> Subscripts
,
1016 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1018 : Kind(Kind
), AccType(AccType
), Statement(Stmt
), InvalidDomain(nullptr),
1019 BaseAddr(BaseAddress
), ElementType(ElementType
),
1020 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1021 AccessValue(AccessValue
), IsAffine(Affine
),
1022 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1023 NewAccessRelation(nullptr), FAD(nullptr) {
1024 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1025 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1027 std::string IdName
= Stmt
->getBaseName() + Access
;
1028 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1031 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
, isl::map AccRel
)
1032 : Kind(MemoryKind::Array
), AccType(AccType
), Statement(Stmt
),
1033 InvalidDomain(nullptr), AccessRelation(nullptr),
1034 NewAccessRelation(AccRel
), FAD(nullptr) {
1035 isl::id ArrayInfoId
= NewAccessRelation
.get_tuple_id(isl::dim::out
);
1036 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1037 Sizes
.push_back(nullptr);
1038 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1039 Sizes
.push_back(SAI
->getDimensionSize(i
));
1040 ElementType
= SAI
->getElementType();
1041 BaseAddr
= SAI
->getBasePtr();
1042 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1043 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1045 std::string IdName
= Stmt
->getBaseName() + Access
;
1046 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1049 MemoryAccess::~MemoryAccess() = default;
1051 void MemoryAccess::realignParams() {
1052 isl::set Ctx
= Statement
->getParent()->getContext();
1053 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1054 AccessRelation
= AccessRelation
.gist_params(Ctx
);
1057 const std::string
MemoryAccess::getReductionOperatorStr() const {
1058 return MemoryAccess::getReductionOperatorStr(getReductionType());
1061 isl::id
MemoryAccess::getId() const { return Id
; }
1063 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1064 MemoryAccess::ReductionType RT
) {
1065 if (RT
== MemoryAccess::RT_NONE
)
1068 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1072 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1074 void MemoryAccess::print(raw_ostream
&OS
) const {
1077 OS
.indent(12) << "ReadAccess :=\t";
1080 OS
.indent(12) << "MustWriteAccess :=\t";
1083 OS
.indent(12) << "MayWriteAccess :=\t";
1087 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1090 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1094 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1095 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1096 if (hasNewAccessRelation())
1097 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 LLVM_DUMP_METHOD
void MemoryAccess::dump() const { print(errs()); }
1104 isl::pw_aff
MemoryAccess::getPwAff(const SCEV
*E
) {
1105 auto *Stmt
= getStatement();
1106 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1107 isl::set StmtDom
= getStatement()->getDomain();
1108 StmtDom
= StmtDom
.reset_tuple_id();
1109 isl::set NewInvalidDom
= StmtDom
.intersect(PWAC
.second
);
1110 InvalidDomain
= InvalidDomain
.unite(NewInvalidDom
);
1114 // Create a map in the size of the provided set domain, that maps from the
1115 // one element of the provided set domain to another element of the provided
1117 // The mapping is limited to all points that are equal in all but the last
1118 // dimension and for which the last dimension of the input is strict smaller
1119 // than the last dimension of the output.
1121 // getEqualAndLarger(set[i0, i1, ..., iX]):
1123 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1124 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1126 static isl::map
getEqualAndLarger(isl::space SetDomain
) {
1127 isl::space Space
= SetDomain
.map_from_set();
1128 isl::map Map
= isl::map::universe(Space
);
1129 unsigned lastDimension
= Map
.dim(isl::dim::in
) - 1;
1131 // Set all but the last dimension to be equal for the input and output
1133 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1134 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1135 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1136 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
1138 // Set the last dimension of the input to be strict smaller than the
1139 // last dimension of the output.
1141 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1142 Map
= Map
.order_lt(isl::dim::in
, lastDimension
, isl::dim::out
, lastDimension
);
1146 isl::set
MemoryAccess::getStride(isl::map Schedule
) const {
1147 isl::map AccessRelation
= getAccessRelation();
1148 isl::space Space
= Schedule
.get_space().range();
1149 isl::map NextScatt
= getEqualAndLarger(Space
);
1151 Schedule
= Schedule
.reverse();
1152 NextScatt
= NextScatt
.lexmin();
1154 NextScatt
= NextScatt
.apply_range(Schedule
);
1155 NextScatt
= NextScatt
.apply_range(AccessRelation
);
1156 NextScatt
= NextScatt
.apply_domain(Schedule
);
1157 NextScatt
= NextScatt
.apply_domain(AccessRelation
);
1159 isl::set Deltas
= NextScatt
.deltas();
1163 bool MemoryAccess::isStrideX(isl::map Schedule
, int StrideWidth
) const {
1164 isl::set Stride
, StrideX
;
1167 Stride
= getStride(Schedule
);
1168 StrideX
= isl::set::universe(Stride
.get_space());
1169 for (unsigned i
= 0; i
< StrideX
.dim(isl::dim::set
) - 1; i
++)
1170 StrideX
= StrideX
.fix_si(isl::dim::set
, i
, 0);
1171 StrideX
= StrideX
.fix_si(isl::dim::set
, StrideX
.dim(isl::dim::set
) - 1,
1173 IsStrideX
= Stride
.is_subset(StrideX
);
1178 bool MemoryAccess::isStrideZero(isl::map Schedule
) const {
1179 return isStrideX(Schedule
, 0);
1182 bool MemoryAccess::isStrideOne(isl::map Schedule
) const {
1183 return isStrideX(Schedule
, 1);
1186 void MemoryAccess::setAccessRelation(isl::map NewAccess
) {
1187 AccessRelation
= NewAccess
;
1190 void MemoryAccess::setNewAccessRelation(isl::map NewAccess
) {
1194 // Check domain space compatibility.
1195 isl::space NewSpace
= NewAccess
.get_space();
1196 isl::space NewDomainSpace
= NewSpace
.domain();
1197 isl::space OriginalDomainSpace
= getStatement()->getDomainSpace();
1198 assert(OriginalDomainSpace
.has_equal_tuples(NewDomainSpace
));
1200 // Reads must be executed unconditionally. Writes might be executed in a
1203 // Check whether there is an access for every statement instance.
1204 isl::set StmtDomain
= getStatement()->getDomain();
1206 StmtDomain
.intersect_params(getStatement()->getParent()->getContext());
1207 isl::set NewDomain
= NewAccess
.domain();
1208 assert(StmtDomain
.is_subset(NewDomain
) &&
1209 "Partial READ accesses not supported");
1212 isl::space NewAccessSpace
= NewAccess
.get_space();
1213 assert(NewAccessSpace
.has_tuple_id(isl::dim::set
) &&
1214 "Must specify the array that is accessed");
1215 isl::id NewArrayId
= NewAccessSpace
.get_tuple_id(isl::dim::set
);
1216 auto *SAI
= static_cast<ScopArrayInfo
*>(NewArrayId
.get_user());
1217 assert(SAI
&& "Must set a ScopArrayInfo");
1219 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1220 InvariantEquivClassTy
*EqClass
=
1221 getStatement()->getParent()->lookupInvariantEquivClass(
1224 "Access functions to indirect arrays must have an invariant and "
1225 "hoisted base pointer");
1228 // Check whether access dimensions correspond to number of dimensions of the
1230 auto Dims
= SAI
->getNumberOfDimensions();
1231 assert(NewAccessSpace
.dim(isl::dim::set
) == Dims
&&
1232 "Access dims must match array dims");
1235 NewAccess
= NewAccess
.gist_domain(getStatement()->getDomain());
1236 NewAccessRelation
= NewAccess
;
1239 bool MemoryAccess::isLatestPartialAccess() const {
1240 isl::set StmtDom
= getStatement()->getDomain();
1241 isl::set AccDom
= getLatestAccessRelation().domain();
1243 return !StmtDom
.is_subset(AccDom
);
1246 //===----------------------------------------------------------------------===//
1248 isl::map
ScopStmt::getSchedule() const {
1249 isl::set Domain
= getDomain();
1250 if (Domain
.is_empty())
1251 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1252 auto Schedule
= getParent()->getSchedule();
1255 Schedule
= Schedule
.intersect_domain(isl::union_set(Domain
));
1256 if (Schedule
.is_empty())
1257 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1258 isl::map M
= M
.from_union_map(Schedule
);
1260 M
= M
.gist_domain(Domain
);
1265 void ScopStmt::restrictDomain(isl::set NewDomain
) {
1266 assert(NewDomain
.is_subset(Domain
) &&
1267 "New domain is not a subset of old domain!");
1271 void ScopStmt::addAccess(MemoryAccess
*Access
, bool Prepend
) {
1272 Instruction
*AccessInst
= Access
->getAccessInstruction();
1274 if (Access
->isArrayKind()) {
1275 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1276 MAL
.emplace_front(Access
);
1277 } else if (Access
->isValueKind() && Access
->isWrite()) {
1278 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1279 assert(!ValueWrites
.lookup(AccessVal
));
1281 ValueWrites
[AccessVal
] = Access
;
1282 } else if (Access
->isValueKind() && Access
->isRead()) {
1283 Value
*AccessVal
= Access
->getAccessValue();
1284 assert(!ValueReads
.lookup(AccessVal
));
1286 ValueReads
[AccessVal
] = Access
;
1287 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1288 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1289 assert(!PHIWrites
.lookup(PHI
));
1291 PHIWrites
[PHI
] = Access
;
1292 } else if (Access
->isAnyPHIKind() && Access
->isRead()) {
1293 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1294 assert(!PHIReads
.lookup(PHI
));
1296 PHIReads
[PHI
] = Access
;
1300 MemAccs
.insert(MemAccs
.begin(), Access
);
1303 MemAccs
.push_back(Access
);
1306 void ScopStmt::realignParams() {
1307 for (MemoryAccess
*MA
: *this)
1308 MA
->realignParams();
1310 isl::set Ctx
= Parent
.getContext();
1311 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1312 Domain
= Domain
.gist_params(Ctx
);
1315 /// Add @p BSet to the set @p User if @p BSet is bounded.
1316 static isl_stat
collectBoundedParts(__isl_take isl_basic_set
*BSet
,
1318 isl_set
**BoundedParts
= static_cast<isl_set
**>(User
);
1319 if (isl_basic_set_is_bounded(BSet
))
1320 *BoundedParts
= isl_set_union(*BoundedParts
, isl_set_from_basic_set(BSet
));
1322 isl_basic_set_free(BSet
);
1326 /// Return the bounded parts of @p S.
1327 static __isl_give isl_set
*collectBoundedParts(__isl_take isl_set
*S
) {
1328 isl_set
*BoundedParts
= isl_set_empty(isl_set_get_space(S
));
1329 isl_set_foreach_basic_set(S
, collectBoundedParts
, &BoundedParts
);
1331 return BoundedParts
;
1334 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1336 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1337 /// both with regards to the dimension @p Dim.
1338 static std::pair
<__isl_give isl_set
*, __isl_give isl_set
*>
1339 partitionSetParts(__isl_take isl_set
*S
, unsigned Dim
) {
1340 for (unsigned u
= 0, e
= isl_set_n_dim(S
); u
< e
; u
++)
1341 S
= isl_set_lower_bound_si(S
, isl_dim_set
, u
, 0);
1343 unsigned NumDimsS
= isl_set_n_dim(S
);
1344 isl_set
*OnlyDimS
= isl_set_copy(S
);
1346 // Remove dimensions that are greater than Dim as they are not interesting.
1347 assert(NumDimsS
>= Dim
+ 1);
1349 isl_set_project_out(OnlyDimS
, isl_dim_set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1351 // Create artificial parametric upper bounds for dimensions smaller than Dim
1352 // as we are not interested in them.
1353 OnlyDimS
= isl_set_insert_dims(OnlyDimS
, isl_dim_param
, 0, Dim
);
1354 for (unsigned u
= 0; u
< Dim
; u
++) {
1355 isl_constraint
*C
= isl_inequality_alloc(
1356 isl_local_space_from_space(isl_set_get_space(OnlyDimS
)));
1357 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, u
, 1);
1358 C
= isl_constraint_set_coefficient_si(C
, isl_dim_set
, u
, -1);
1359 OnlyDimS
= isl_set_add_constraint(OnlyDimS
, C
);
1362 // Collect all bounded parts of OnlyDimS.
1363 isl_set
*BoundedParts
= collectBoundedParts(OnlyDimS
);
1365 // Create the dimensions greater than Dim again.
1366 BoundedParts
= isl_set_insert_dims(BoundedParts
, isl_dim_set
, Dim
+ 1,
1367 NumDimsS
- Dim
- 1);
1369 // Remove the artificial upper bound parameters again.
1370 BoundedParts
= isl_set_remove_dims(BoundedParts
, isl_dim_param
, 0, Dim
);
1372 isl_set
*UnboundedParts
= isl_set_subtract(S
, isl_set_copy(BoundedParts
));
1373 return std::make_pair(UnboundedParts
, BoundedParts
);
1376 /// Create the conditions under which @p L @p Pred @p R is true.
1377 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1378 __isl_take isl_pw_aff
*L
,
1379 __isl_take isl_pw_aff
*R
) {
1381 case ICmpInst::ICMP_EQ
:
1382 return isl_pw_aff_eq_set(L
, R
);
1383 case ICmpInst::ICMP_NE
:
1384 return isl_pw_aff_ne_set(L
, R
);
1385 case ICmpInst::ICMP_SLT
:
1386 return isl_pw_aff_lt_set(L
, R
);
1387 case ICmpInst::ICMP_SLE
:
1388 return isl_pw_aff_le_set(L
, R
);
1389 case ICmpInst::ICMP_SGT
:
1390 return isl_pw_aff_gt_set(L
, R
);
1391 case ICmpInst::ICMP_SGE
:
1392 return isl_pw_aff_ge_set(L
, R
);
1393 case ICmpInst::ICMP_ULT
:
1394 return isl_pw_aff_lt_set(L
, R
);
1395 case ICmpInst::ICMP_UGT
:
1396 return isl_pw_aff_gt_set(L
, R
);
1397 case ICmpInst::ICMP_ULE
:
1398 return isl_pw_aff_le_set(L
, R
);
1399 case ICmpInst::ICMP_UGE
:
1400 return isl_pw_aff_ge_set(L
, R
);
1402 llvm_unreachable("Non integer predicate not supported");
1406 /// Compute the isl representation for the SCEV @p E in this BB.
1408 /// @param S The Scop in which @p BB resides in.
1409 /// @param BB The BB for which isl representation is to be
1411 /// @param InvalidDomainMap A map of BB to their invalid domains.
1412 /// @param E The SCEV that should be translated.
1413 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1415 /// Note that this function will also adjust the invalid context accordingly.
1417 __isl_give isl_pw_aff
*
1418 getPwAff(Scop
&S
, BasicBlock
*BB
,
1419 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
, const SCEV
*E
,
1420 bool NonNegative
= false) {
1421 PWACtx PWAC
= S
.getPwAff(E
, BB
, NonNegative
);
1422 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
1423 return PWAC
.first
.release();
1426 /// Build the conditions sets for the switch @p SI in the @p Domain.
1428 /// This will fill @p ConditionSets with the conditions under which control
1429 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1430 /// have as many elements as @p SI has successors.
1431 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
,
1432 __isl_keep isl_set
*Domain
,
1433 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1434 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1435 Value
*Condition
= getConditionFromTerminator(SI
);
1436 assert(Condition
&& "No condition for switch");
1438 ScalarEvolution
&SE
= *S
.getSE();
1439 isl_pw_aff
*LHS
, *RHS
;
1440 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
1442 unsigned NumSuccessors
= SI
->getNumSuccessors();
1443 ConditionSets
.resize(NumSuccessors
);
1444 for (auto &Case
: SI
->cases()) {
1445 unsigned Idx
= Case
.getSuccessorIndex();
1446 ConstantInt
*CaseValue
= Case
.getCaseValue();
1448 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
1449 isl_set
*CaseConditionSet
=
1450 buildConditionSet(ICmpInst::ICMP_EQ
, isl_pw_aff_copy(LHS
), RHS
);
1451 ConditionSets
[Idx
] = isl_set_coalesce(
1452 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1455 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1456 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1457 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1459 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1460 ConditionSets
[0] = isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
);
1462 isl_pw_aff_free(LHS
);
1467 /// Build condition sets for unsigned ICmpInst(s).
1468 /// Special handling is required for unsigned operands to ensure that if
1469 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1470 /// it should wrap around.
1472 /// @param IsStrictUpperBound holds information on the predicate relation
1473 /// between TestVal and UpperBound, i.e,
1474 /// TestVal < UpperBound OR TestVal <= UpperBound
1475 __isl_give isl_set
*
1476 buildUnsignedConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1477 __isl_keep isl_set
*Domain
, const SCEV
*SCEV_TestVal
,
1478 const SCEV
*SCEV_UpperBound
,
1479 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1480 bool IsStrictUpperBound
) {
1481 // Do not take NonNeg assumption on TestVal
1482 // as it might have MSB (Sign bit) set.
1483 isl_pw_aff
*TestVal
= getPwAff(S
, BB
, InvalidDomainMap
, SCEV_TestVal
, false);
1484 // Take NonNeg assumption on UpperBound.
1485 isl_pw_aff
*UpperBound
=
1486 getPwAff(S
, BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
1490 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1491 isl_pw_aff_get_domain_space(TestVal
))),
1492 isl_pw_aff_copy(TestVal
));
1495 if (IsStrictUpperBound
)
1496 // TestVal < UpperBound
1497 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
1499 // TestVal <= UpperBound
1500 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
1502 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
1503 return ConsequenceCondSet
;
1506 /// Build the conditions sets for the branch condition @p Condition in
1509 /// This will fill @p ConditionSets with the conditions under which control
1510 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1511 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1512 /// context under which @p Condition is true/false will be returned as the
1513 /// new elements of @p ConditionSets.
1514 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1515 TerminatorInst
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
1516 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1517 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1518 ScalarEvolution
&SE
= *S
.getSE();
1519 isl_set
*ConsequenceCondSet
= nullptr;
1521 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
1522 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
1523 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
1524 bool NonNeg
= false;
1525 isl_pw_aff
*LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
1526 isl_pw_aff
*RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
1527 ConsequenceCondSet
= buildConditionSet(ICmpInst::ICMP_SLE
, LHS
, RHS
);
1528 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
1529 auto *Unique
= dyn_cast
<ConstantInt
>(
1530 getUniqueNonErrorValue(PHI
, &S
.getRegion(), *S
.getLI(), *S
.getDT()));
1532 if (Unique
->isZero())
1533 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1535 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1536 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1537 if (CCond
->isZero())
1538 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1540 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1541 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1542 auto Opcode
= BinOp
->getOpcode();
1543 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1545 bool Valid
= buildConditionSets(S
, BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
1546 InvalidDomainMap
, ConditionSets
) &&
1547 buildConditionSets(S
, BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
1548 InvalidDomainMap
, ConditionSets
);
1550 while (!ConditionSets
.empty())
1551 isl_set_free(ConditionSets
.pop_back_val());
1555 isl_set_free(ConditionSets
.pop_back_val());
1556 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1557 isl_set_free(ConditionSets
.pop_back_val());
1558 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1560 if (Opcode
== Instruction::And
)
1561 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1563 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1565 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1567 "Condition of exiting branch was neither constant nor ICmp!");
1569 LoopInfo
&LI
= *S
.getLI();
1570 DominatorTree
&DT
= *S
.getDT();
1571 Region
&R
= S
.getRegion();
1573 isl_pw_aff
*LHS
, *RHS
;
1574 // For unsigned comparisons we assumed the signed bit of neither operand
1575 // to be set. The comparison is equal to a signed comparison under this
1577 bool NonNeg
= ICond
->isUnsigned();
1578 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
1579 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
1581 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, LI
, DT
);
1582 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, LI
, DT
);
1584 switch (ICond
->getPredicate()) {
1585 case ICmpInst::ICMP_ULT
:
1586 ConsequenceCondSet
=
1587 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1588 RightOperand
, InvalidDomainMap
, true);
1590 case ICmpInst::ICMP_ULE
:
1591 ConsequenceCondSet
=
1592 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1593 RightOperand
, InvalidDomainMap
, false);
1595 case ICmpInst::ICMP_UGT
:
1596 ConsequenceCondSet
=
1597 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1598 LeftOperand
, InvalidDomainMap
, true);
1600 case ICmpInst::ICMP_UGE
:
1601 ConsequenceCondSet
=
1602 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1603 LeftOperand
, InvalidDomainMap
, false);
1606 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
1607 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
1608 ConsequenceCondSet
= buildConditionSet(ICond
->getPredicate(), LHS
, RHS
);
1613 // If no terminator was given we are only looking for parameter constraints
1614 // under which @p Condition is true/false.
1616 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1617 assert(ConsequenceCondSet
);
1618 ConsequenceCondSet
= isl_set_coalesce(
1619 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1621 isl_set
*AlternativeCondSet
= nullptr;
1623 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1626 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1627 isl_set_copy(ConsequenceCondSet
));
1629 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1633 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
1634 TI
? TI
->getParent() : nullptr /* BasicBlock */);
1635 isl_set_free(AlternativeCondSet
);
1636 isl_set_free(ConsequenceCondSet
);
1640 ConditionSets
.push_back(ConsequenceCondSet
);
1641 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1646 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1648 /// This will fill @p ConditionSets with the conditions under which control
1649 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1650 /// have as many elements as @p TI has successors.
1651 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, TerminatorInst
*TI
, Loop
*L
,
1652 __isl_keep isl_set
*Domain
,
1653 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1654 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1655 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1656 return buildConditionSets(S
, BB
, SI
, L
, Domain
, InvalidDomainMap
,
1659 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1661 if (TI
->getNumSuccessors() == 1) {
1662 ConditionSets
.push_back(isl_set_copy(Domain
));
1666 Value
*Condition
= getConditionFromTerminator(TI
);
1667 assert(Condition
&& "No condition for Terminator");
1669 return buildConditionSets(S
, BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
1673 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, StringRef Name
,
1674 Loop
*SurroundingLoop
,
1675 std::vector
<Instruction
*> EntryBlockInstructions
)
1676 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), R(&R
),
1677 Build(nullptr), BaseName(Name
), SurroundingLoop(SurroundingLoop
),
1678 Instructions(EntryBlockInstructions
) {}
1680 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, StringRef Name
,
1681 Loop
*SurroundingLoop
,
1682 std::vector
<Instruction
*> Instructions
)
1683 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1684 Build(nullptr), BaseName(Name
), SurroundingLoop(SurroundingLoop
),
1685 Instructions(Instructions
) {}
1687 ScopStmt::ScopStmt(Scop
&parent
, isl::map SourceRel
, isl::map TargetRel
,
1689 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
),
1691 BaseName
= getIslCompatibleName("CopyStmt_", "",
1692 std::to_string(parent
.getCopyStmtsNum()));
1693 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1694 Domain
= Domain
.set_tuple_id(Id
);
1695 TargetRel
= TargetRel
.set_tuple_id(isl::dim::in
, Id
);
1697 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1698 parent
.addAccessFunction(Access
);
1700 SourceRel
= SourceRel
.set_tuple_id(isl::dim::in
, Id
);
1701 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1702 parent
.addAccessFunction(Access
);
1706 ScopStmt::~ScopStmt() = default;
1708 std::string
ScopStmt::getDomainStr() const { return Domain
.to_str(); }
1710 std::string
ScopStmt::getScheduleStr() const {
1711 auto *S
= getSchedule().release();
1714 auto Str
= stringFromIslObj(S
);
1719 void ScopStmt::setInvalidDomain(isl::set ID
) { InvalidDomain
= ID
; }
1721 BasicBlock
*ScopStmt::getEntryBlock() const {
1723 return getBasicBlock();
1724 return getRegion()->getEntry();
1727 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1729 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1731 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1732 return NestLoops
[Dimension
];
1735 isl::ctx
ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1737 isl::set
ScopStmt::getDomain() const { return Domain
; }
1739 isl::space
ScopStmt::getDomainSpace() const { return Domain
.get_space(); }
1741 isl::id
ScopStmt::getDomainId() const { return Domain
.get_tuple_id(); }
1743 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1744 OS
<< "Instructions {\n";
1746 for (Instruction
*Inst
: Instructions
)
1747 OS
.indent(16) << *Inst
<< "\n";
1749 OS
.indent(12) << "}\n";
1752 void ScopStmt::print(raw_ostream
&OS
, bool PrintInstructions
) const {
1753 OS
<< "\t" << getBaseName() << "\n";
1754 OS
.indent(12) << "Domain :=\n";
1757 OS
.indent(16) << getDomainStr() << ";\n";
1759 OS
.indent(16) << "n/a\n";
1761 OS
.indent(12) << "Schedule :=\n";
1764 OS
.indent(16) << getScheduleStr() << ";\n";
1766 OS
.indent(16) << "n/a\n";
1768 for (MemoryAccess
*Access
: MemAccs
)
1771 if (PrintInstructions
)
1772 printInstructions(OS
.indent(12));
1775 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1776 LLVM_DUMP_METHOD
void ScopStmt::dump() const { print(dbgs(), true); }
1779 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
1780 if (MA
->isRead() && MA
->isOriginalValueKind()) {
1781 bool Found
= ValueReads
.erase(MA
->getAccessValue());
1783 assert(Found
&& "Expected access data not found");
1785 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
1786 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
1788 assert(Found
&& "Expected access data not found");
1790 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
1791 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1793 assert(Found
&& "Expected access data not found");
1795 if (MA
->isRead() && MA
->isOriginalAnyPHIKind()) {
1796 bool Found
= PHIReads
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1798 assert(Found
&& "Expected access data not found");
1802 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
1803 // Remove the memory accesses from this statement together with all scalar
1804 // accesses that were caused by it. MemoryKind::Value READs have no access
1805 // instruction, hence would not be removed by this function. However, it is
1806 // only used for invariant LoadInst accesses, its arguments are always affine,
1807 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1808 // accesses to be removed.
1809 auto Predicate
= [&](MemoryAccess
*Acc
) {
1810 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
1812 for (auto *MA
: MemAccs
) {
1813 if (Predicate(MA
)) {
1814 removeAccessData(MA
);
1815 Parent
.removeAccessData(MA
);
1818 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
1820 InstructionToAccess
.erase(MA
->getAccessInstruction());
1823 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
, bool AfterHoisting
) {
1824 if (AfterHoisting
) {
1825 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
1826 assert(MAIt
!= MemAccs
.end());
1827 MemAccs
.erase(MAIt
);
1829 removeAccessData(MA
);
1830 Parent
.removeAccessData(MA
);
1833 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
1834 if (It
!= InstructionToAccess
.end()) {
1835 It
->second
.remove(MA
);
1836 if (It
->second
.empty())
1837 InstructionToAccess
.erase(MA
->getAccessInstruction());
1841 MemoryAccess
*ScopStmt::ensureValueRead(Value
*V
) {
1842 MemoryAccess
*Access
= lookupInputAccessOf(V
);
1846 ScopArrayInfo
*SAI
=
1847 Parent
.getOrCreateScopArrayInfo(V
, V
->getType(), {}, MemoryKind::Value
);
1848 Access
= new MemoryAccess(this, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1849 true, {}, {}, V
, MemoryKind::Value
);
1850 Parent
.addAccessFunction(Access
);
1851 Access
->buildAccessRelation(SAI
);
1853 Parent
.addAccessData(Access
);
1857 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const ScopStmt
&S
) {
1858 S
.print(OS
, PollyPrintInstructions
);
1862 //===----------------------------------------------------------------------===//
1863 /// Scop class implement
1865 void Scop::setContext(isl::set NewContext
) {
1866 Context
= NewContext
.align_params(Context
.get_space());
1871 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1872 struct SCEVSensitiveParameterRewriter
1873 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
1874 const ValueToValueMap
&VMap
;
1877 SCEVSensitiveParameterRewriter(const ValueToValueMap
&VMap
,
1878 ScalarEvolution
&SE
)
1879 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
1881 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
1882 const ValueToValueMap
&VMap
) {
1883 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
1884 return SSPR
.visit(E
);
1887 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
1888 auto *Start
= visit(E
->getStart());
1889 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
1890 visit(E
->getStepRecurrence(SE
)),
1891 E
->getLoop(), SCEV::FlagAnyWrap
);
1892 return SE
.getAddExpr(Start
, AddRec
);
1895 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
1896 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
1897 return SE
.getUnknown(NewValue
);
1902 /// Check whether we should remap a SCEV expression.
1903 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
1904 const ValueToValueMap
&VMap
;
1905 bool FoundInside
= false;
1909 SCEVFindInsideScop(const ValueToValueMap
&VMap
, ScalarEvolution
&SE
,
1911 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
1913 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
1914 const ValueToValueMap
&VMap
, const Scop
*S
) {
1915 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
1917 return SFIS
.FoundInside
;
1920 bool follow(const SCEV
*E
) {
1921 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
1922 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
1923 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
1924 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
1925 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
1927 return !FoundInside
;
1930 bool isDone() { return FoundInside
; }
1932 } // end anonymous namespace
1934 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) const {
1935 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
1936 // doesn't like addition between an AddRec and an expression that
1937 // doesn't have a dominance relationship with it.)
1938 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
1942 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
1945 // This table of function names is used to translate parameter names in more
1946 // human-readable names. This makes it easier to interpret Polly analysis
1948 StringMap
<std::string
> KnownNames
= {
1949 {"_Z13get_global_idj", "global_id"},
1950 {"_Z12get_local_idj", "local_id"},
1951 {"_Z15get_global_sizej", "global_size"},
1952 {"_Z14get_local_sizej", "local_size"},
1953 {"_Z12get_work_dimv", "work_dim"},
1954 {"_Z17get_global_offsetj", "global_offset"},
1955 {"_Z12get_group_idj", "group_id"},
1956 {"_Z14get_num_groupsj", "num_groups"},
1959 static std::string
getCallParamName(CallInst
*Call
) {
1961 raw_string_ostream
OS(Result
);
1962 std::string Name
= Call
->getCalledFunction()->getName();
1964 auto Iterator
= KnownNames
.find(Name
);
1965 if (Iterator
!= KnownNames
.end())
1966 Name
= "__" + Iterator
->getValue();
1968 for (auto &Operand
: Call
->arg_operands()) {
1969 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
1970 OS
<< "_" << Op
->getValue();
1976 void Scop::createParameterId(const SCEV
*Parameter
) {
1977 assert(Parameters
.count(Parameter
));
1978 assert(!ParameterIds
.count(Parameter
));
1980 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
1982 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
1983 Value
*Val
= ValueParameter
->getValue();
1984 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
1986 if (Call
&& isConstCall(Call
)) {
1987 ParameterName
= getCallParamName(Call
);
1988 } else if (UseInstructionNames
) {
1989 // If this parameter references a specific Value and this value has a name
1990 // we use this name as it is likely to be unique and more useful than just
1993 ParameterName
= Val
->getName();
1994 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
1995 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
1996 if (LoadOrigin
->hasName()) {
1997 ParameterName
+= "_loaded_from_";
1999 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
2004 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2007 isl::id Id
= isl::id::alloc(getIslCtx(), ParameterName
,
2008 const_cast<void *>((const void *)Parameter
));
2009 ParameterIds
[Parameter
] = Id
;
2012 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2013 for (const SCEV
*Parameter
: NewParameters
) {
2014 // Normalize the SCEV to get the representing element for an invariant load.
2015 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2016 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2018 if (Parameters
.insert(Parameter
))
2019 createParameterId(Parameter
);
2023 isl::id
Scop::getIdForParam(const SCEV
*Parameter
) const {
2024 // Normalize the SCEV to get the representing element for an invariant load.
2025 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2026 return ParameterIds
.lookup(Parameter
);
2029 isl::set
Scop::addNonEmptyDomainConstraints(isl::set C
) const {
2030 isl_set
*DomainContext
= isl_union_set_params(getDomains().release());
2031 return isl::manage(isl_set_intersect_params(C
.release(), DomainContext
));
2034 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2035 return DT
.dominates(BB
, getEntry());
2038 void Scop::addUserAssumptions(
2039 AssumptionCache
&AC
, DominatorTree
&DT
, LoopInfo
&LI
,
2040 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2041 for (auto &Assumption
: AC
.assumptions()) {
2042 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2043 if (!CI
|| CI
->getNumArgOperands() != 1)
2046 bool InScop
= contains(CI
);
2047 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2050 auto *L
= LI
.getLoopFor(CI
->getParent());
2051 auto *Val
= CI
->getArgOperand(0);
2052 ParameterSetTy DetectedParams
;
2053 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2055 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
2056 << "Non-affine user assumption ignored.");
2060 // Collect all newly introduced parameters.
2061 ParameterSetTy NewParams
;
2062 for (auto *Param
: DetectedParams
) {
2063 Param
= extractConstantFactor(Param
, *SE
).second
;
2064 Param
= getRepresentingInvariantLoadSCEV(Param
);
2065 if (Parameters
.count(Param
))
2067 NewParams
.insert(Param
);
2070 SmallVector
<isl_set
*, 2> ConditionSets
;
2071 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2072 BasicBlock
*BB
= InScop
? CI
->getParent() : getRegion().getEntry();
2073 auto *Dom
= InScop
? DomainMap
[BB
].copy() : Context
.copy();
2074 assert(Dom
&& "Cannot propagate a nullptr.");
2075 bool Valid
= buildConditionSets(*this, BB
, Val
, TI
, L
, Dom
,
2076 InvalidDomainMap
, ConditionSets
);
2082 isl_set
*AssumptionCtx
= nullptr;
2084 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2085 isl_set_free(ConditionSets
[0]);
2087 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2088 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2091 // Project out newly introduced parameters as they are not otherwise useful.
2092 if (!NewParams
.empty()) {
2093 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2094 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2095 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2098 if (!NewParams
.count(Param
))
2102 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2105 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
2106 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
2107 Context
= Context
.intersect(isl::manage(AssumptionCtx
));
2111 void Scop::addUserContext() {
2112 if (UserContextStr
.empty())
2115 isl_set
*UserContext
=
2116 isl_set_read_from_str(getIslCtx().get(), UserContextStr
.c_str());
2117 isl_space
*Space
= getParamSpace().release();
2118 if (isl_space_dim(Space
, isl_dim_param
) !=
2119 isl_set_dim(UserContext
, isl_dim_param
)) {
2120 auto SpaceStr
= isl_space_to_str(Space
);
2121 errs() << "Error: the context provided in -polly-context has not the same "
2122 << "number of dimensions than the computed context. Due to this "
2123 << "mismatch, the -polly-context option is ignored. Please provide "
2124 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2126 isl_set_free(UserContext
);
2127 isl_space_free(Space
);
2131 for (unsigned i
= 0; i
< isl_space_dim(Space
, isl_dim_param
); i
++) {
2132 std::string NameContext
= Context
.get_dim_name(isl::dim::param
, i
);
2133 std::string NameUserContext
=
2134 isl_set_get_dim_name(UserContext
, isl_dim_param
, i
);
2136 if (NameContext
!= NameUserContext
) {
2137 auto SpaceStr
= isl_space_to_str(Space
);
2138 errs() << "Error: the name of dimension " << i
2139 << " provided in -polly-context "
2140 << "is '" << NameUserContext
<< "', but the name in the computed "
2141 << "context is '" << NameContext
2142 << "'. Due to this name mismatch, "
2143 << "the -polly-context option is ignored. Please provide "
2144 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2146 isl_set_free(UserContext
);
2147 isl_space_free(Space
);
2152 isl_set_set_dim_id(UserContext
, isl_dim_param
, i
,
2153 isl_space_get_dim_id(Space
, isl_dim_param
, i
));
2156 Context
= Context
.intersect(isl::manage(UserContext
));
2157 isl_space_free(Space
);
2160 void Scop::buildInvariantEquivalenceClasses() {
2161 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2163 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2164 for (LoadInst
*LInst
: RIL
) {
2165 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2167 Type
*Ty
= LInst
->getType();
2168 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2170 InvEquivClassVMap
[LInst
] = ClassRep
;
2175 InvariantEquivClasses
.emplace_back(
2176 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2180 void Scop::buildContext() {
2181 isl::space Space
= isl::space::params_alloc(getIslCtx(), 0);
2182 Context
= isl::set::universe(Space
);
2183 InvalidContext
= isl::set::empty(Space
);
2184 AssumedContext
= isl::set::universe(Space
);
2187 void Scop::addParameterBounds() {
2189 for (auto *Parameter
: Parameters
) {
2190 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2191 Context
= addRangeBoundsToSet(Context
, SRange
, PDim
++, isl::dim::param
);
2195 static std::vector
<isl::id
> getFortranArrayIds(Scop::array_range Arrays
) {
2196 std::vector
<isl::id
> OutermostSizeIds
;
2197 for (auto Array
: Arrays
) {
2198 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2199 // for its outermost dimension. Fortran arrays will have this since the
2200 // outermost dimension size can be picked up from their runtime description.
2201 // TODO: actually need to check if it has a FAD, but for now this works.
2202 if (Array
->getNumberOfDimensions() > 0) {
2203 isl::pw_aff PwAff
= Array
->getDimensionSizePw(0);
2208 isl::manage(isl_pw_aff_get_dim_id(PwAff
.get(), isl_dim_param
, 0));
2209 assert(!Id
.is_null() &&
2210 "Invalid Id for PwAff expression in Fortran array");
2212 OutermostSizeIds
.push_back(Id
);
2215 return OutermostSizeIds
;
2218 // The FORTRAN array size parameters are known to be non-negative.
2219 static isl::set
boundFortranArrayParams(isl::set Context
,
2220 Scop::array_range Arrays
) {
2221 std::vector
<isl::id
> OutermostSizeIds
;
2222 OutermostSizeIds
= getFortranArrayIds(Arrays
);
2224 for (isl::id Id
: OutermostSizeIds
) {
2225 int dim
= Context
.find_dim_by_id(isl::dim::param
, Id
);
2226 Context
= Context
.lower_bound_si(isl::dim::param
, dim
, 0);
2232 void Scop::realignParams() {
2233 if (PollyIgnoreParamBounds
)
2236 // Add all parameters into a common model.
2237 isl::space Space
= getFullParamSpace();
2239 // Align the parameters of all data structures to the model.
2240 Context
= Context
.align_params(Space
);
2242 // Bound the size of the fortran array dimensions.
2243 Context
= boundFortranArrayParams(Context
, arrays());
2245 // As all parameters are known add bounds to them.
2246 addParameterBounds();
2248 for (ScopStmt
&Stmt
: *this)
2249 Stmt
.realignParams();
2250 // Simplify the schedule according to the context too.
2251 Schedule
= Schedule
.gist_domain_params(getContext());
2254 static isl::set
simplifyAssumptionContext(isl::set AssumptionContext
,
2256 // If we have modeled all blocks in the SCoP that have side effects we can
2257 // simplify the context with the constraints that are needed for anything to
2258 // be executed at all. However, if we have error blocks in the SCoP we already
2259 // assumed some parameter combinations cannot occur and removed them from the
2260 // domains, thus we cannot use the remaining domain to simplify the
2262 if (!S
.hasErrorBlock()) {
2263 auto DomainParameters
= S
.getDomains().params();
2264 AssumptionContext
= AssumptionContext
.gist_params(DomainParameters
);
2267 AssumptionContext
= AssumptionContext
.gist_params(S
.getContext());
2268 return AssumptionContext
;
2271 void Scop::simplifyContexts() {
2272 // The parameter constraints of the iteration domains give us a set of
2273 // constraints that need to hold for all cases where at least a single
2274 // statement iteration is executed in the whole scop. We now simplify the
2275 // assumed context under the assumption that such constraints hold and at
2276 // least a single statement iteration is executed. For cases where no
2277 // statement instances are executed, the assumptions we have taken about
2278 // the executed code do not matter and can be changed.
2280 // WARNING: This only holds if the assumptions we have taken do not reduce
2281 // the set of statement instances that are executed. Otherwise we
2282 // may run into a case where the iteration domains suggest that
2283 // for a certain set of parameter constraints no code is executed,
2284 // but in the original program some computation would have been
2285 // performed. In such a case, modifying the run-time conditions and
2286 // possibly influencing the run-time check may cause certain scops
2287 // to not be executed.
2291 // When delinearizing the following code:
2293 // for (long i = 0; i < 100; i++)
2294 // for (long j = 0; j < m; j++)
2297 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2298 // otherwise we would access out of bound data. Now, knowing that code is
2299 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2300 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2301 InvalidContext
= InvalidContext
.align_params(getParamSpace());
2304 /// Add the minimal/maximal access in @p Set to @p User.
2306 buildMinMaxAccess(isl::set Set
, Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2307 isl::pw_multi_aff MinPMA
, MaxPMA
;
2308 isl::pw_aff LastDimAff
;
2312 Set
= Set
.remove_divs();
2313 polly::simplify(Set
);
2315 if (Set
.n_basic_set() > RunTimeChecksMaxAccessDisjuncts
)
2316 Set
= Set
.simple_hull();
2318 // Restrict the number of parameters involved in the access as the lexmin/
2319 // lexmax computation will take too long if this number is high.
2321 // Experiments with a simple test case using an i7 4800MQ:
2323 // #Parameters involved | Time (in sec)
2332 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2333 unsigned InvolvedParams
= 0;
2334 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2335 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2338 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2339 return isl::stat::error
;
2342 MinPMA
= Set
.lexmin_pw_multi_aff();
2343 MaxPMA
= Set
.lexmax_pw_multi_aff();
2345 MinPMA
= MinPMA
.coalesce();
2346 MaxPMA
= MaxPMA
.coalesce();
2348 // Adjust the last dimension of the maximal access by one as we want to
2349 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2350 // we test during code generation might now point after the end of the
2351 // allocated array but we will never dereference it anyway.
2352 assert((!MaxPMA
|| MaxPMA
.dim(isl::dim::out
)) &&
2353 "Assumed at least one output dimension");
2355 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2356 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2357 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2358 OneAff
= OneAff
.add_constant_si(1);
2359 LastDimAff
= LastDimAff
.add(OneAff
);
2360 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2362 if (!MinPMA
|| !MaxPMA
)
2363 return isl::stat::error
;
2365 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
2367 return isl::stat::ok
;
2370 static __isl_give isl_set
*getAccessDomain(MemoryAccess
*MA
) {
2371 isl_set
*Domain
= MA
->getStatement()->getDomain().release();
2372 Domain
= isl_set_project_out(Domain
, isl_dim_set
, 0, isl_set_n_dim(Domain
));
2373 return isl_set_reset_tuple_id(Domain
);
2376 /// Wrapper function to calculate minimal/maximal accesses to each array.
2377 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2378 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2379 MinMaxAccesses
.reserve(AliasGroup
.size());
2381 isl::union_set Domains
= S
.getDomains();
2382 isl::union_map Accesses
= isl::union_map::empty(S
.getParamSpace());
2384 for (MemoryAccess
*MA
: AliasGroup
)
2385 Accesses
= Accesses
.add_map(MA
->getAccessRelation());
2387 Accesses
= Accesses
.intersect_domain(Domains
);
2388 isl::union_set Locations
= Accesses
.range();
2390 auto Lambda
= [&MinMaxAccesses
, &S
](isl::set Set
) -> isl::stat
{
2391 return buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2393 return Locations
.foreach_set(Lambda
) == isl::stat::ok
;
2396 /// Helper to treat non-affine regions and basic blocks the same.
2400 /// Return the block that is the representing block for @p RN.
2401 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2402 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2403 : RN
->getNodeAs
<BasicBlock
>();
2406 /// Return the @p idx'th block that is executed after @p RN.
2407 static inline BasicBlock
*
2408 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2409 if (RN
->isSubRegion()) {
2411 return RN
->getNodeAs
<Region
>()->getExit();
2413 return TI
->getSuccessor(idx
);
2416 /// Return the smallest loop surrounding @p RN.
2417 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2418 if (!RN
->isSubRegion()) {
2419 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2420 Loop
*L
= LI
.getLoopFor(BB
);
2422 // Unreachable statements are not considered to belong to a LLVM loop, as
2423 // they are not part of an actual loop in the control flow graph.
2424 // Nevertheless, we handle certain unreachable statements that are common
2425 // when modeling run-time bounds checks as being part of the loop to be
2426 // able to model them and to later eliminate the run-time bounds checks.
2428 // Specifically, for basic blocks that terminate in an unreachable and
2429 // where the immediate predecessor is part of a loop, we assume these
2430 // basic blocks belong to the loop the predecessor belongs to. This
2431 // allows us to model the following code.
2433 // for (i = 0; i < N; i++) {
2435 // abort(); <- this abort might be translated to an
2440 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2441 L
= LI
.getLoopFor(BB
->getPrevNode());
2445 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2446 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2447 while (L
&& NonAffineSubRegion
->contains(L
))
2448 L
= L
->getParentLoop();
2452 /// Get the number of blocks in @p L.
2454 /// The number of blocks in a loop are the number of basic blocks actually
2455 /// belonging to the loop, as well as all single basic blocks that the loop
2456 /// exits to and which terminate in an unreachable instruction. We do not
2457 /// allow such basic blocks in the exit of a scop, hence they belong to the
2458 /// scop and represent run-time conditions which we want to model and
2459 /// subsequently speculate away.
2461 /// @see getRegionNodeLoop for additional details.
2462 unsigned getNumBlocksInLoop(Loop
*L
) {
2463 unsigned NumBlocks
= L
->getNumBlocks();
2464 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
2465 L
->getExitBlocks(ExitBlocks
);
2467 for (auto ExitBlock
: ExitBlocks
) {
2468 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2474 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2475 if (!RN
->isSubRegion())
2478 Region
*R
= RN
->getNodeAs
<Region
>();
2479 return std::distance(R
->block_begin(), R
->block_end());
2482 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2483 const DominatorTree
&DT
) {
2484 if (!RN
->isSubRegion())
2485 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2486 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2487 if (isErrorBlock(*BB
, R
, LI
, DT
))
2494 isl::set
Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2495 return getDomainConditions(Stmt
->getEntryBlock());
2498 isl::set
Scop::getDomainConditions(BasicBlock
*BB
) const {
2499 auto DIt
= DomainMap
.find(BB
);
2500 if (DIt
!= DomainMap
.end())
2501 return DIt
->getSecond();
2503 auto &RI
= *R
.getRegionInfo();
2504 auto *BBR
= RI
.getRegionFor(BB
);
2505 while (BBR
->getEntry() == BB
)
2506 BBR
= BBR
->getParent();
2507 return getDomainConditions(BBR
->getEntry());
2510 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2511 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2512 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2513 auto *EntryBB
= R
->getEntry();
2514 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2515 int LD
= getRelativeLoopDepth(L
);
2516 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD
+ 1));
2519 L
= L
->getParentLoop();
2522 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
2523 DomainMap
[EntryBB
] = isl::manage(S
);
2525 if (IsOnlyNonAffineRegion
)
2526 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2528 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
, InvalidDomainMap
))
2531 if (!propagateDomainConstraints(R
, DT
, LI
, InvalidDomainMap
))
2534 // Error blocks and blocks dominated by them have been assumed to never be
2535 // executed. Representing them in the Scop does not add any value. In fact,
2536 // it is likely to cause issues during construction of the ScopStmts. The
2537 // contents of error blocks have not been verified to be expressible and
2538 // will cause problems when building up a ScopStmt for them.
2539 // Furthermore, basic blocks dominated by error blocks may reference
2540 // instructions in the error block which, if the error block is not modeled,
2541 // can themselves not be constructed properly. To this end we will replace
2542 // the domains of error blocks and those only reachable via error blocks
2543 // with an empty set. Additionally, we will record for each block under which
2544 // parameter combination it would be reached via an error block in its
2545 // InvalidDomain. This information is needed during load hoisting.
2546 if (!propagateInvalidStmtDomains(R
, DT
, LI
, InvalidDomainMap
))
2552 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2553 /// to be compatible to domains constructed for loop @p NewL.
2555 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2556 /// edge from @p OldL to @p NewL.
2557 static __isl_give isl_set
*adjustDomainDimensions(Scop
&S
,
2558 __isl_take isl_set
*Dom
,
2559 Loop
*OldL
, Loop
*NewL
) {
2560 // If the loops are the same there is nothing to do.
2564 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2565 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2566 // If both loops are non-affine loops there is nothing to do.
2567 if (OldDepth
== -1 && NewDepth
== -1)
2570 // Distinguish three cases:
2571 // 1) The depth is the same but the loops are not.
2572 // => One loop was left one was entered.
2573 // 2) The depth increased from OldL to NewL.
2574 // => One loop was entered, none was left.
2575 // 3) The depth decreased from OldL to NewL.
2576 // => Loops were left were difference of the depths defines how many.
2577 if (OldDepth
== NewDepth
) {
2578 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2579 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NewDepth
, 1);
2580 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2581 } else if (OldDepth
< NewDepth
) {
2582 assert(OldDepth
+ 1 == NewDepth
);
2583 auto &R
= S
.getRegion();
2585 assert(NewL
->getParentLoop() == OldL
||
2586 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2587 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2589 assert(OldDepth
> NewDepth
);
2590 int Diff
= OldDepth
- NewDepth
;
2591 int NumDim
= isl_set_n_dim(Dom
);
2592 assert(NumDim
>= Diff
);
2593 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NumDim
- Diff
, Diff
);
2599 bool Scop::propagateInvalidStmtDomains(
2600 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2601 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2602 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2603 for (auto *RN
: RTraversal
) {
2605 // Recurse for affine subregions but go on for basic blocks and non-affine
2607 if (RN
->isSubRegion()) {
2608 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2609 if (!isNonAffineSubRegion(SubRegion
)) {
2610 propagateInvalidStmtDomains(SubRegion
, DT
, LI
, InvalidDomainMap
);
2615 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2616 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2617 isl::set
&Domain
= DomainMap
[BB
];
2618 assert(Domain
&& "Cannot propagate a nullptr");
2620 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
2622 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
2624 if (!IsInvalidBlock
) {
2625 InvalidDomain
= InvalidDomain
.intersect(Domain
);
2627 InvalidDomain
= Domain
;
2628 isl::set DomPar
= Domain
.params();
2629 recordAssumption(ERRORBLOCK
, DomPar
, BB
->getTerminator()->getDebugLoc(),
2634 if (InvalidDomain
.is_empty()) {
2635 InvalidDomainMap
[BB
] = InvalidDomain
;
2639 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2640 auto *TI
= BB
->getTerminator();
2641 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2642 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2643 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2645 // Skip successors outside the SCoP.
2646 if (!contains(SuccBB
))
2650 if (DT
.dominates(SuccBB
, BB
))
2653 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2655 auto AdjustedInvalidDomain
= isl::manage(adjustDomainDimensions(
2656 *this, InvalidDomain
.copy(), BBLoop
, SuccBBLoop
));
2658 isl::set SuccInvalidDomain
= InvalidDomainMap
[SuccBB
];
2659 SuccInvalidDomain
= SuccInvalidDomain
.unite(AdjustedInvalidDomain
);
2660 SuccInvalidDomain
= SuccInvalidDomain
.coalesce();
2661 unsigned NumConjucts
= SuccInvalidDomain
.n_basic_set();
2663 InvalidDomainMap
[SuccBB
] = SuccInvalidDomain
;
2665 // Check if the maximal number of domain disjunctions was reached.
2666 // In case this happens we will bail.
2667 if (NumConjucts
< MaxDisjunctsInDomain
)
2670 InvalidDomainMap
.erase(BB
);
2671 invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
2675 InvalidDomainMap
[BB
] = InvalidDomain
;
2681 void Scop::propagateDomainConstraintsToRegionExit(
2682 BasicBlock
*BB
, Loop
*BBLoop
,
2683 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
,
2684 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2685 // Check if the block @p BB is the entry of a region. If so we propagate it's
2686 // domain to the exit block of the region. Otherwise we are done.
2687 auto *RI
= R
.getRegionInfo();
2688 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2689 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2690 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2693 // Do not propagate the domain if there is a loop backedge inside the region
2694 // that would prevent the exit block from being executed.
2696 while (L
&& contains(L
)) {
2697 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2698 BBLoop
->getLoopLatches(LatchBBs
);
2699 for (auto *LatchBB
: LatchBBs
)
2700 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2702 L
= L
->getParentLoop();
2705 isl::set Domain
= DomainMap
[BB
];
2706 assert(Domain
&& "Cannot propagate a nullptr");
2708 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, getBoxedLoops());
2710 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2711 // adjust the domain before we can propagate it.
2712 isl::set AdjustedDomain
= isl::manage(
2713 adjustDomainDimensions(*this, Domain
.copy(), BBLoop
, ExitBBLoop
));
2714 isl::set
&ExitDomain
= DomainMap
[ExitBB
];
2716 // If the exit domain is not yet created we set it otherwise we "add" the
2718 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
2720 // Initialize the invalid domain.
2721 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
2723 FinishedExitBlocks
.insert(ExitBB
);
2726 bool Scop::buildDomainsWithBranchConstraints(
2727 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2728 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2729 // To create the domain for each block in R we iterate over all blocks and
2730 // subregions in R and propagate the conditions under which the current region
2731 // element is executed. To this end we iterate in reverse post order over R as
2732 // it ensures that we first visit all predecessors of a region node (either a
2733 // basic block or a subregion) before we visit the region node itself.
2734 // Initially, only the domain for the SCoP region entry block is set and from
2735 // there we propagate the current domain to all successors, however we add the
2736 // condition that the successor is actually executed next.
2737 // As we are only interested in non-loop carried constraints here we can
2738 // simply skip loop back edges.
2740 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2741 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2742 for (auto *RN
: RTraversal
) {
2743 // Recurse for affine subregions but go on for basic blocks and non-affine
2745 if (RN
->isSubRegion()) {
2746 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2747 if (!isNonAffineSubRegion(SubRegion
)) {
2748 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
,
2755 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
2756 HasErrorBlock
= true;
2758 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2759 TerminatorInst
*TI
= BB
->getTerminator();
2761 if (isa
<UnreachableInst
>(TI
))
2764 isl::set Domain
= DomainMap
.lookup(BB
);
2767 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
.get()));
2769 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2770 // Propagate the domain from BB directly to blocks that have a superset
2771 // domain, at the moment only region exit nodes of regions that start in BB.
2772 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
,
2775 // If all successors of BB have been set a domain through the propagation
2776 // above we do not need to build condition sets but can just skip this
2777 // block. However, it is important to note that this is a local property
2778 // with regards to the region @p R. To this end FinishedExitBlocks is a
2780 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
2781 return FinishedExitBlocks
.count(SuccBB
);
2783 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
2786 // Build the condition sets for the successor nodes of the current region
2787 // node. If it is a non-affine subregion we will always execute the single
2788 // exit node, hence the single entry node domain is the condition set. For
2789 // basic blocks we use the helper function buildConditionSets.
2790 SmallVector
<isl_set
*, 8> ConditionSets
;
2791 if (RN
->isSubRegion())
2792 ConditionSets
.push_back(Domain
.copy());
2793 else if (!buildConditionSets(*this, BB
, TI
, BBLoop
, Domain
.get(),
2794 InvalidDomainMap
, ConditionSets
))
2797 // Now iterate over the successors and set their initial domain based on
2798 // their condition set. We skip back edges here and have to be careful when
2799 // we leave a loop not to keep constraints over a dimension that doesn't
2801 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
2802 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
2803 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
2804 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2806 // Skip blocks outside the region.
2807 if (!contains(SuccBB
))
2810 // If we propagate the domain of some block to "SuccBB" we do not have to
2811 // adjust the domain.
2812 if (FinishedExitBlocks
.count(SuccBB
))
2816 if (DT
.dominates(SuccBB
, BB
))
2819 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2821 CondSet
= isl::manage(
2822 adjustDomainDimensions(*this, CondSet
.copy(), BBLoop
, SuccBBLoop
));
2824 // Set the domain for the successor or merge it with an existing domain in
2825 // case there are multiple paths (without loop back edges) to the
2827 isl::set
&SuccDomain
= DomainMap
[SuccBB
];
2830 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
2832 // Initialize the invalid domain.
2833 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
2834 SuccDomain
= CondSet
;
2837 SuccDomain
= SuccDomain
.detect_equalities();
2839 // Check if the maximal number of domain disjunctions was reached.
2840 // In case this happens we will clean up and bail.
2841 if (SuccDomain
.n_basic_set() < MaxDisjunctsInDomain
)
2844 invalidate(COMPLEXITY
, DebugLoc());
2845 while (++u
< ConditionSets
.size())
2846 isl_set_free(ConditionSets
[u
]);
2854 isl::set
Scop::getPredecessorDomainConstraints(BasicBlock
*BB
, isl::set Domain
,
2857 // If @p BB is the ScopEntry we are done
2858 if (R
.getEntry() == BB
)
2859 return isl::set::universe(Domain
.get_space());
2861 // The region info of this function.
2862 auto &RI
= *R
.getRegionInfo();
2864 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, getBoxedLoops());
2866 // A domain to collect all predecessor domains, thus all conditions under
2867 // which the block is executed. To this end we start with the empty domain.
2868 isl::set PredDom
= isl::set::empty(Domain
.get_space());
2870 // Set of regions of which the entry block domain has been propagated to BB.
2871 // all predecessors inside any of the regions can be skipped.
2872 SmallSet
<Region
*, 8> PropagatedRegions
;
2874 for (auto *PredBB
: predecessors(BB
)) {
2876 if (DT
.dominates(BB
, PredBB
))
2879 // If the predecessor is in a region we used for propagation we can skip it.
2880 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
2881 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
2886 // Check if there is a valid region we can use for propagation, thus look
2887 // for a region that contains the predecessor and has @p BB as exit block.
2888 auto *PredR
= RI
.getRegionFor(PredBB
);
2889 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
2892 // If a valid region for propagation was found use the entry of that region
2893 // for propagation, otherwise the PredBB directly.
2894 if (PredR
->getExit() == BB
) {
2895 PredBB
= PredR
->getEntry();
2896 PropagatedRegions
.insert(PredR
);
2899 auto *PredBBDom
= getDomainConditions(PredBB
).release();
2900 Loop
*PredBBLoop
= getFirstNonBoxedLoopFor(PredBB
, LI
, getBoxedLoops());
2902 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
2904 PredDom
= PredDom
.unite(isl::manage(PredBBDom
));
2910 bool Scop::propagateDomainConstraints(
2911 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2912 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2913 // Iterate over the region R and propagate the domain constrains from the
2914 // predecessors to the current node. In contrast to the
2915 // buildDomainsWithBranchConstraints function, this one will pull the domain
2916 // information from the predecessors instead of pushing it to the successors.
2917 // Additionally, we assume the domains to be already present in the domain
2918 // map here. However, we iterate again in reverse post order so we know all
2919 // predecessors have been visited before a block or non-affine subregion is
2922 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2923 for (auto *RN
: RTraversal
) {
2924 // Recurse for affine subregions but go on for basic blocks and non-affine
2926 if (RN
->isSubRegion()) {
2927 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2928 if (!isNonAffineSubRegion(SubRegion
)) {
2929 if (!propagateDomainConstraints(SubRegion
, DT
, LI
, InvalidDomainMap
))
2935 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2936 isl::set
&Domain
= DomainMap
[BB
];
2939 // Under the union of all predecessor conditions we can reach this block.
2940 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
2941 Domain
= Domain
.intersect(PredDom
).coalesce();
2942 Domain
= Domain
.align_params(getParamSpace());
2944 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
2945 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
2946 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
, InvalidDomainMap
))
2953 /// Create a map to map from a given iteration to a subsequent iteration.
2955 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
2956 /// is incremented by one and all other dimensions are equal, e.g.,
2957 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
2959 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
2960 static __isl_give isl_map
*
2961 createNextIterationMap(__isl_take isl_space
*SetSpace
, unsigned Dim
) {
2962 auto *MapSpace
= isl_space_map_from_set(SetSpace
);
2963 auto *NextIterationMap
= isl_map_universe(isl_space_copy(MapSpace
));
2964 for (unsigned u
= 0; u
< isl_map_dim(NextIterationMap
, isl_dim_in
); u
++)
2967 isl_map_equate(NextIterationMap
, isl_dim_in
, u
, isl_dim_out
, u
);
2968 auto *C
= isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace
));
2969 C
= isl_constraint_set_constant_si(C
, 1);
2970 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, Dim
, 1);
2971 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, Dim
, -1);
2972 NextIterationMap
= isl_map_add_constraint(NextIterationMap
, C
);
2973 return NextIterationMap
;
2976 bool Scop::addLoopBoundsToHeaderDomain(
2977 Loop
*L
, LoopInfo
&LI
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2978 int LoopDepth
= getRelativeLoopDepth(L
);
2979 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
2981 BasicBlock
*HeaderBB
= L
->getHeader();
2982 assert(DomainMap
.count(HeaderBB
));
2983 isl::set
&HeaderBBDom
= DomainMap
[HeaderBB
];
2985 isl::map NextIterationMap
= isl::manage(
2986 createNextIterationMap(HeaderBBDom
.get_space().release(), LoopDepth
));
2988 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
2990 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
2991 L
->getLoopLatches(LatchBlocks
);
2993 for (BasicBlock
*LatchBB
: LatchBlocks
) {
2994 // If the latch is only reachable via error statements we skip it.
2995 isl::set LatchBBDom
= DomainMap
.lookup(LatchBB
);
2999 isl::set BackedgeCondition
= nullptr;
3001 TerminatorInst
*TI
= LatchBB
->getTerminator();
3002 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
3003 assert(BI
&& "Only branch instructions allowed in loop latches");
3005 if (BI
->isUnconditional())
3006 BackedgeCondition
= LatchBBDom
;
3008 SmallVector
<isl_set
*, 8> ConditionSets
;
3009 int idx
= BI
->getSuccessor(0) != HeaderBB
;
3010 if (!buildConditionSets(*this, LatchBB
, TI
, L
, LatchBBDom
.get(),
3011 InvalidDomainMap
, ConditionSets
))
3014 // Free the non back edge condition set as we do not need it.
3015 isl_set_free(ConditionSets
[1 - idx
]);
3017 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
3020 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3021 assert(LatchLoopDepth
>= LoopDepth
);
3022 BackedgeCondition
= BackedgeCondition
.project_out(
3023 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
3024 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
3027 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
3028 for (int i
= 0; i
< LoopDepth
; i
++)
3029 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3031 isl::set UnionBackedgeConditionComplement
=
3032 UnionBackedgeCondition
.complement();
3033 UnionBackedgeConditionComplement
=
3034 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
3036 UnionBackedgeConditionComplement
=
3037 UnionBackedgeConditionComplement
.apply(ForwardMap
);
3038 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
3039 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
3041 auto Parts
= partitionSetParts(HeaderBBDom
.copy(), LoopDepth
);
3042 HeaderBBDom
= isl::manage(Parts
.second
);
3044 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3045 // the bounded assumptions to the context as they are already implied by the
3047 if (Affinator
.hasNSWAddRecForLoop(L
)) {
3048 isl_set_free(Parts
.first
);
3052 isl::set UnboundedCtx
= isl::manage(Parts
.first
).params();
3053 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3054 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3058 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3059 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3061 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3062 if (!PointerBaseInst
)
3065 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3069 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3072 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3073 isl::union_map Writes
) {
3074 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3075 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3078 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3079 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3080 if (!isa
<LoadInst
>(BasePtrInst
))
3081 return contains(BasePtrInst
);
3086 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3087 if (!PollyUseRuntimeAliasChecks
)
3090 if (buildAliasGroups(AA
)) {
3091 // Aliasing assumptions do not go through addAssumption but we still want to
3092 // collect statistics so we do it here explicitly.
3093 if (MinMaxAliasGroups
.size())
3094 AssumptionsAliasing
++;
3098 // If a problem occurs while building the alias groups we need to delete
3099 // this SCoP and pretend it wasn't valid in the first place. To this end
3100 // we make the assumed context infeasible.
3101 invalidate(ALIASING
, DebugLoc());
3104 dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3105 << " could not be created as the number of parameters involved "
3106 "is too high. The SCoP will be "
3107 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3108 "the maximal number of parameters but be advised that the "
3109 "compile time might increase exponentially.\n\n");
3113 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3114 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3115 AliasSetTracker
AST(AA
);
3117 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3118 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3119 for (ScopStmt
&Stmt
: *this) {
3121 isl_set
*StmtDomain
= Stmt
.getDomain().release();
3122 bool StmtDomainEmpty
= isl_set_is_empty(StmtDomain
);
3123 isl_set_free(StmtDomain
);
3125 // Statements with an empty domain will never be executed.
3126 if (StmtDomainEmpty
)
3129 for (MemoryAccess
*MA
: Stmt
) {
3130 if (MA
->isScalarKind())
3133 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3134 MemAccInst
Acc(MA
->getAccessInstruction());
3135 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3136 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3138 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3143 AliasGroupVectorTy AliasGroups
;
3144 for (AliasSet
&AS
: AST
) {
3145 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3149 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3152 AliasGroups
.push_back(std::move(AG
));
3155 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3158 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3159 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3161 AliasGroupTy
&AG
= AliasGroups
[u
];
3162 AliasGroupTy::iterator AGI
= AG
.begin();
3163 isl_set
*AGDomain
= getAccessDomain(*AGI
);
3164 while (AGI
!= AG
.end()) {
3165 MemoryAccess
*MA
= *AGI
;
3166 isl_set
*MADomain
= getAccessDomain(MA
);
3167 if (isl_set_is_disjoint(AGDomain
, MADomain
)) {
3168 NewAG
.push_back(MA
);
3169 AGI
= AG
.erase(AGI
);
3170 isl_set_free(MADomain
);
3172 AGDomain
= isl_set_union(AGDomain
, MADomain
);
3176 if (NewAG
.size() > 1)
3177 AliasGroups
.push_back(std::move(NewAG
));
3178 isl_set_free(AGDomain
);
3182 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3183 // To create sound alias checks we perform the following steps:
3184 // o) We partition each group into read only and non read only accesses.
3185 // o) For each group with more than one base pointer we then compute minimal
3186 // and maximal accesses to each array of a group in read only and non
3187 // read only partitions separately.
3188 AliasGroupVectorTy AliasGroups
;
3189 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3191 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3193 splitAliasGroupsByDomain(AliasGroups
);
3195 for (AliasGroupTy
&AG
: AliasGroups
) {
3196 if (!hasFeasibleRuntimeContext())
3200 IslMaxOperationsGuard
MaxOpGuard(getIslCtx().get(), OptComputeOut
);
3201 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3205 if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota
) {
3206 invalidate(COMPLEXITY
, DebugLoc());
3214 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3215 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3216 AliasGroupTy ReadOnlyAccesses
;
3217 AliasGroupTy ReadWriteAccesses
;
3218 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3219 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3221 if (AliasGroup
.size() < 2)
3224 for (MemoryAccess
*Access
: AliasGroup
) {
3225 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3226 Access
->getAccessInstruction())
3227 << "Possibly aliasing pointer, use restrict keyword.");
3228 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3229 if (HasWriteAccess
.count(Array
)) {
3230 ReadWriteArrays
.insert(Array
);
3231 ReadWriteAccesses
.push_back(Access
);
3233 ReadOnlyArrays
.insert(Array
);
3234 ReadOnlyAccesses
.push_back(Access
);
3238 // If there are no read-only pointers, and less than two read-write pointers,
3239 // no alias check is needed.
3240 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3243 // If there is no read-write pointer, no alias check is needed.
3244 if (ReadWriteArrays
.empty())
3247 // For non-affine accesses, no alias check can be generated as we cannot
3248 // compute a sufficiently tight lower and upper bound: bail out.
3249 for (MemoryAccess
*MA
: AliasGroup
) {
3250 if (!MA
->isAffine()) {
3251 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3252 MA
->getAccessInstruction()->getParent());
3257 // Ensure that for all memory accesses for which we generate alias checks,
3258 // their base pointers are available.
3259 for (MemoryAccess
*MA
: AliasGroup
) {
3260 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3261 addRequiredInvariantLoad(
3262 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3265 MinMaxAliasGroups
.emplace_back();
3266 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3267 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3268 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3273 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3278 // Bail out if the number of values we need to compare is too large.
3279 // This is important as the number of comparisons grows quadratically with
3280 // the number of values we need to compare.
3281 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3282 RunTimeChecksMaxArraysPerGroup
)
3286 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3294 /// Get the smallest loop that contains @p S but is not in @p S.
3295 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3296 // Start with the smallest loop containing the entry and expand that
3297 // loop until it contains all blocks in the region. If there is a loop
3298 // containing all blocks in the region check if it is itself contained
3299 // and if so take the parent loop as it will be the smallest containing
3300 // the region but not contained by it.
3301 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3303 bool AllContained
= true;
3304 for (auto *BB
: S
.blocks())
3305 AllContained
&= L
->contains(BB
);
3308 L
= L
->getParentLoop();
3311 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3314 int Scop::NextScopID
= 0;
3316 std::string
Scop::CurrentFunc
;
3318 int Scop::getNextID(std::string ParentFunc
) {
3319 if (ParentFunc
!= CurrentFunc
) {
3320 CurrentFunc
= ParentFunc
;
3323 return NextScopID
++;
3326 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3327 DominatorTree
&DT
, ScopDetection::DetectionContext
&DC
,
3328 OptimizationRemarkEmitter
&ORE
)
3329 : IslCtx(isl_ctx_alloc(), isl_ctx_free
), SE(&ScalarEvolution
), DT(&DT
),
3330 R(R
), name(None
), HasSingleExitEdge(R
.getExitingBlock()), DC(DC
),
3331 ORE(ORE
), Affinator(this, LI
),
3332 ID(getNextID((*R
.getEntry()->getParent()).getName().str())) {
3333 if (IslOnErrorAbort
)
3334 isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT
);
3338 Scop::~Scop() = default;
3340 void Scop::foldSizeConstantsToRight() {
3341 isl_union_set
*Accessed
= isl_union_map_range(getAccesses().release());
3343 for (auto Array
: arrays()) {
3344 if (Array
->getNumberOfDimensions() <= 1)
3347 isl_space
*Space
= Array
->getSpace().release();
3349 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3351 if (!isl_union_set_contains(Accessed
, Space
)) {
3352 isl_space_free(Space
);
3356 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3358 isl_map
*Transform
=
3359 isl_map_universe(isl_space_map_from_set(Array
->getSpace().release()));
3361 std::vector
<int> Int
;
3363 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3364 for (int i
= 0; i
< Dims
; i
++) {
3366 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3367 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3368 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3370 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3372 if (i
== Dims
- 1) {
3374 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3375 isl_basic_set_free(DimHull
);
3379 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3380 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3381 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3386 if (isl_val_is_int(Val
)) {
3387 auto ValAPInt
= APIntFromVal(Val
);
3388 if (ValAPInt
.isSignedIntN(32))
3389 ValInt
= ValAPInt
.getSExtValue();
3394 Int
.push_back(ValInt
);
3396 isl_constraint
*C
= isl_constraint_alloc_equality(
3397 isl_local_space_from_space(isl_map_get_space(Transform
)));
3398 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3399 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3400 Transform
= isl_map_add_constraint(Transform
, C
);
3401 isl_basic_set_free(DimHull
);
3405 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3406 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3409 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3413 Int
.push_back(ValInt
);
3414 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3415 isl_basic_set_free(DimHull
);
3416 isl_basic_set_free(ZeroSet
);
3419 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3421 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3422 isl_set_free(Elements
);
3423 isl_set_free(MappedElements
);
3424 isl_map_free(Transform
);
3428 isl_set_free(MappedElements
);
3430 bool CanFold
= true;
3435 unsigned NumDims
= Array
->getNumberOfDimensions();
3436 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3437 if (Int
[0] != Int
[i
] && Int
[i
])
3441 isl_set_free(Elements
);
3442 isl_map_free(Transform
);
3446 for (auto &Access
: AccessFunctions
)
3447 if (Access
->getScopArrayInfo() == Array
)
3448 Access
->setAccessRelation(Access
->getAccessRelation().apply_range(
3449 isl::manage_copy(Transform
)));
3451 isl_map_free(Transform
);
3453 std::vector
<const SCEV
*> Sizes
;
3454 for (unsigned i
= 0; i
< NumDims
; i
++) {
3455 auto Size
= Array
->getDimensionSize(i
);
3457 if (i
== NumDims
- 1)
3458 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3459 Sizes
.push_back(Size
);
3462 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3464 isl_set_free(Elements
);
3466 isl_union_set_free(Accessed
);
3469 void Scop::markFortranArrays() {
3470 for (ScopStmt
&Stmt
: Stmts
) {
3471 for (MemoryAccess
*MemAcc
: Stmt
) {
3472 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3476 // TODO: const_cast-ing to edit
3477 ScopArrayInfo
*SAI
=
3478 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3479 assert(SAI
&& "memory access into a Fortran array does not "
3480 "have an associated ScopArrayInfo");
3481 SAI
->applyAndSetFAD(FAD
);
3486 void Scop::finalizeAccesses() {
3487 updateAccessDimensionality();
3488 foldSizeConstantsToRight();
3489 foldAccessRelations();
3490 assumeNoOutOfBounds();
3491 markFortranArrays();
3494 void Scop::updateAccessDimensionality() {
3495 // Check all array accesses for each base pointer and find a (virtual) element
3496 // size for the base pointer that divides all access functions.
3497 for (ScopStmt
&Stmt
: *this)
3498 for (MemoryAccess
*Access
: Stmt
) {
3499 if (!Access
->isArrayKind())
3501 ScopArrayInfo
*Array
=
3502 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3504 if (Array
->getNumberOfDimensions() != 1)
3506 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3507 const SCEV
*Subscript
= Access
->getSubscript(0);
3508 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3510 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3511 Array
->updateElementType(Ty
);
3514 for (auto &Stmt
: *this)
3515 for (auto &Access
: Stmt
)
3516 Access
->updateDimensionality();
3519 void Scop::foldAccessRelations() {
3520 for (auto &Stmt
: *this)
3521 for (auto &Access
: Stmt
)
3522 Access
->foldAccessRelation();
3525 void Scop::assumeNoOutOfBounds() {
3526 for (auto &Stmt
: *this)
3527 for (auto &Access
: Stmt
)
3528 Access
->assumeNoOutOfBound();
3531 void Scop::removeFromStmtMap(ScopStmt
&Stmt
) {
3532 for (Instruction
*Inst
: Stmt
.getInstructions())
3533 InstStmtMap
.erase(Inst
);
3535 if (Stmt
.isRegionStmt()) {
3536 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks()) {
3538 // Skip entry basic block, as its instructions are already deleted as
3539 // part of the statement's instruction list.
3540 if (BB
== Stmt
.getEntryBlock())
3542 for (Instruction
&Inst
: *BB
)
3543 InstStmtMap
.erase(&Inst
);
3546 auto StmtMapIt
= StmtMap
.find(Stmt
.getBasicBlock());
3547 if (StmtMapIt
!= StmtMap
.end())
3548 StmtMapIt
->second
.erase(std::remove(StmtMapIt
->second
.begin(),
3549 StmtMapIt
->second
.end(), &Stmt
),
3550 StmtMapIt
->second
.end());
3551 for (Instruction
*Inst
: Stmt
.getInstructions())
3552 InstStmtMap
.erase(Inst
);
3556 void Scop::removeStmts(std::function
<bool(ScopStmt
&)> ShouldDelete
,
3557 bool AfterHoisting
) {
3558 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3559 if (!ShouldDelete(*StmtIt
)) {
3564 // Start with removing all of the statement's accesses including erasing it
3565 // from all maps that are pointing to them.
3566 // Make a temporary copy because removing MAs invalidates the iterator.
3567 SmallVector
<MemoryAccess
*, 16> MAList(StmtIt
->begin(), StmtIt
->end());
3568 for (MemoryAccess
*MA
: MAList
)
3569 StmtIt
->removeSingleMemoryAccess(MA
, AfterHoisting
);
3571 removeFromStmtMap(*StmtIt
);
3572 StmtIt
= Stmts
.erase(StmtIt
);
3576 void Scop::removeStmtNotInDomainMap() {
3577 auto ShouldDelete
= [this](ScopStmt
&Stmt
) -> bool {
3578 return !this->DomainMap
.lookup(Stmt
.getEntryBlock());
3580 removeStmts(ShouldDelete
, false);
3583 void Scop::simplifySCoP(bool AfterHoisting
) {
3584 auto ShouldDelete
= [AfterHoisting
](ScopStmt
&Stmt
) -> bool {
3585 // Never delete statements that contain calls to debug functions.
3586 if (hasDebugCall(&Stmt
))
3589 bool RemoveStmt
= Stmt
.isEmpty();
3591 // Remove read only statements only after invariant load hoisting.
3592 if (!RemoveStmt
&& AfterHoisting
) {
3593 bool OnlyRead
= true;
3594 for (MemoryAccess
*MA
: Stmt
) {
3602 RemoveStmt
= OnlyRead
;
3607 removeStmts(ShouldDelete
, AfterHoisting
);
3610 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3611 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3615 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3616 LInst
= cast
<LoadInst
>(Rep
);
3618 Type
*Ty
= LInst
->getType();
3619 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3620 for (auto &IAClass
: InvariantEquivClasses
) {
3621 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3624 auto &MAs
= IAClass
.InvariantAccesses
;
3625 for (auto *MA
: MAs
)
3626 if (MA
->getAccessInstruction() == Val
)
3633 bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
3634 for (const llvm::Argument
&Arg
: F
.args())
3635 if (&Arg
== maybeParam
)
3641 bool Scop::canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3642 bool MAInvalidCtxIsEmpty
,
3643 bool NonHoistableCtxIsEmpty
) {
3644 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3645 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3646 if (PollyAllowDereferenceOfAllFunctionParams
&&
3647 isAParameter(LInst
->getPointerOperand(), getFunction()))
3650 // TODO: We can provide more information for better but more expensive
3652 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3653 LInst
->getAlignment(), DL
))
3656 // If the location might be overwritten we do not hoist it unconditionally.
3658 // TODO: This is probably too conservative.
3659 if (!NonHoistableCtxIsEmpty
)
3662 // If a dereferenceable load is in a statement that is modeled precisely we
3664 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3667 // Even if the statement is not modeled precisely we can hoist the load if it
3668 // does not involve any parameters that might have been specialized by the
3669 // statement domain.
3670 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3671 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3676 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3680 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3681 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3683 // Get the context under which the statement is executed but remove the error
3684 // context under which this statement is reached.
3685 isl::set DomainCtx
= Stmt
.getDomain().params();
3686 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3688 if (DomainCtx
.n_basic_set() >= MaxDisjunctsInDomain
) {
3689 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3690 invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3694 // Project out all parameters that relate to loads in the statement. Otherwise
3695 // we could have cyclic dependences on the constraints under which the
3696 // hoisted loads are executed and we could not determine an order in which to
3697 // pre-load them. This happens because not only lower bounds are part of the
3698 // domain but also upper bounds.
3699 for (auto &InvMA
: InvMAs
) {
3700 auto *MA
= InvMA
.MA
;
3701 Instruction
*AccInst
= MA
->getAccessInstruction();
3702 if (SE
->isSCEVable(AccInst
->getType())) {
3703 SetVector
<Value
*> Values
;
3704 for (const SCEV
*Parameter
: Parameters
) {
3706 findValues(Parameter
, *SE
, Values
);
3707 if (!Values
.count(AccInst
))
3710 if (isl::id ParamId
= getIdForParam(Parameter
)) {
3711 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3713 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3719 for (auto &InvMA
: InvMAs
) {
3720 auto *MA
= InvMA
.MA
;
3721 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3723 // Check for another invariant access that accesses the same location as
3724 // MA and if found consolidate them. Otherwise create a new equivalence
3725 // class at the end of InvariantEquivClasses.
3726 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3727 Type
*Ty
= LInst
->getType();
3728 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3730 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3731 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3732 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3735 // Check if we know that this pointer can be speculatively accessed.
3736 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3737 NonHoistableCtxIsEmpty
)) {
3738 MACtx
= isl::set::universe(DomainCtx
.get_space());
3741 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3742 MACtx
= MACtx
.gist_params(getContext());
3745 bool Consolidated
= false;
3746 for (auto &IAClass
: InvariantEquivClasses
) {
3747 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3750 // If the pointer and the type is equal check if the access function wrt.
3751 // to the domain is equal too. It can happen that the domain fixes
3752 // parameter values and these can be different for distinct part of the
3753 // SCoP. If this happens we cannot consolidate the loads but need to
3754 // create a new invariant load equivalence class.
3755 auto &MAs
= IAClass
.InvariantAccesses
;
3757 auto *LastMA
= MAs
.front();
3759 isl::set AR
= MA
->getAccessRelation().range();
3760 isl::set LastAR
= LastMA
->getAccessRelation().range();
3761 bool SameAR
= AR
.is_equal(LastAR
);
3767 // Add MA to the list of accesses that are in this class.
3770 Consolidated
= true;
3772 // Unify the execution context of the class and this statement.
3773 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
3774 if (IAClassDomainCtx
)
3775 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
3777 IAClassDomainCtx
= MACtx
;
3778 IAClass
.ExecutionContext
= IAClassDomainCtx
;
3785 // If we did not consolidate MA, thus did not find an equivalence class
3786 // for it, we create a new one.
3787 InvariantEquivClasses
.emplace_back(
3788 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3792 /// Check if an access range is too complex.
3794 /// An access range is too complex, if it contains either many disjuncts or
3795 /// very complex expressions. As a simple heuristic, we assume if a set to
3796 /// be too complex if the sum of existentially quantified dimensions and
3797 /// set dimensions is larger than a threshold. This reliably detects both
3798 /// sets with many disjuncts as well as sets with many divisions as they
3801 /// @param AccessRange The range to check for complexity.
3803 /// @returns True if the access range is too complex.
3804 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
3805 unsigned NumTotalDims
= 0;
3807 auto CountDimensions
= [&NumTotalDims
](isl::basic_set BSet
) -> isl::stat
{
3808 NumTotalDims
+= BSet
.dim(isl::dim::div
);
3809 NumTotalDims
+= BSet
.dim(isl::dim::set
);
3810 return isl::stat::ok
;
3813 AccessRange
.foreach_basic_set(CountDimensions
);
3815 if (NumTotalDims
> MaxDimensionsInAccessRange
)
3821 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
3822 // TODO: Loads that are not loop carried, hence are in a statement with
3823 // zero iterators, are by construction invariant, though we
3824 // currently "hoist" them anyway. This is necessary because we allow
3825 // them to be treated as parameters (e.g., in conditions) and our code
3826 // generation would otherwise use the old value.
3828 auto &Stmt
= *Access
->getStatement();
3829 BasicBlock
*BB
= Stmt
.getEntryBlock();
3831 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
3832 Access
->isMemoryIntrinsic())
3835 // Skip accesses that have an invariant base pointer which is defined but
3836 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3837 // returns a pointer that is used as a base address. However, as we want
3838 // to hoist indirect pointers, we allow the base pointer to be defined in
3839 // the region if it is also a memory access. Each ScopArrayInfo object
3840 // that has a base pointer origin has a base pointer that is loaded and
3841 // that it is invariant, thus it will be hoisted too. However, if there is
3842 // no base pointer origin we check that the base pointer is defined
3843 // outside the region.
3844 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
3845 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
3848 isl::map AccessRelation
= Access
->getAccessRelation();
3849 assert(!AccessRelation
.is_empty());
3851 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
3854 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
3855 isl::set SafeToLoad
;
3857 auto &DL
= getFunction().getParent()->getDataLayout();
3858 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
3860 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
3861 } else if (BB
!= LI
->getParent()) {
3862 // Skip accesses in non-affine subregions as they might not be executed
3863 // under the same condition as the entry of the non-affine subregion.
3866 SafeToLoad
= AccessRelation
.range();
3869 if (isAccessRangeTooComplex(AccessRelation
.range()))
3872 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
3873 isl::set WrittenCtx
= Written
.params();
3874 bool IsWritten
= !WrittenCtx
.is_empty();
3879 WrittenCtx
= WrittenCtx
.remove_divs();
3880 bool TooComplex
= WrittenCtx
.n_basic_set() >= MaxDisjunctsInDomain
;
3881 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
3884 addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(), AS_RESTRICTION
,
3889 void Scop::verifyInvariantLoads() {
3890 auto &RIL
= getRequiredInvariantLoads();
3891 for (LoadInst
*LI
: RIL
) {
3892 assert(LI
&& contains(LI
));
3893 // If there exists a statement in the scop which has a memory access for
3894 // @p LI, then mark this scop as infeasible for optimization.
3895 for (ScopStmt
&Stmt
: Stmts
)
3896 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
3897 invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
3903 void Scop::hoistInvariantLoads() {
3904 if (!PollyInvariantLoadHoisting
)
3907 isl::union_map Writes
= getWrites();
3908 for (ScopStmt
&Stmt
: *this) {
3909 InvariantAccessesTy InvariantAccesses
;
3911 for (MemoryAccess
*Access
: Stmt
)
3912 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
3913 InvariantAccesses
.push_back({Access
, NHCtx
});
3915 // Transfer the memory access from the statement to the SCoP.
3916 for (auto InvMA
: InvariantAccesses
)
3917 Stmt
.removeMemoryAccess(InvMA
.MA
);
3918 addInvariantLoads(Stmt
, InvariantAccesses
);
3922 /// Find the canonical scop array info object for a set of invariant load
3923 /// hoisted loads. The canonical array is the one that corresponds to the
3924 /// first load in the list of accesses which is used as base pointer of a
3926 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
3927 MemoryAccessList
&Accesses
) {
3928 for (MemoryAccess
*Access
: Accesses
) {
3929 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
3930 Access
->getAccessInstruction(), MemoryKind::Array
);
3932 return CanonicalArray
;
3937 /// Check if @p Array severs as base array in an invariant load.
3938 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
3939 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
3940 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3941 if (Access2
->getScopArrayInfo() == Array
)
3946 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3947 /// with a reference to @p New.
3948 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
3949 const ScopArrayInfo
*New
) {
3950 for (ScopStmt
&Stmt
: *S
)
3951 for (MemoryAccess
*Access
: Stmt
) {
3952 if (Access
->getLatestScopArrayInfo() != Old
)
3955 isl::id Id
= New
->getBasePtrId();
3956 isl::map Map
= Access
->getAccessRelation();
3957 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3958 Access
->setAccessRelation(Map
);
3962 void Scop::canonicalizeDynamicBasePtrs() {
3963 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
3964 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3966 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3967 findCanonicalArray(this, BasePtrAccesses
);
3969 if (!CanonicalBasePtrSAI
)
3972 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3973 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
3974 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3975 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3976 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3979 // we currently do not canonicalize arrays where some accesses are
3980 // hoisted as invariant loads. If we would, we need to update the access
3981 // function of the invariant loads as well. However, as this is not a
3982 // very common situation, we leave this for now to avoid further
3983 // complexity increases.
3984 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
3987 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
3992 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
3993 ArrayRef
<const SCEV
*> Sizes
,
3995 const char *BaseName
) {
3996 assert((BasePtr
|| BaseName
) &&
3997 "BasePtr and BaseName can not be nullptr at the same time.");
3998 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
3999 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
4000 : ScopArrayNameMap
[BaseName
];
4002 auto &DL
= getFunction().getParent()->getDataLayout();
4003 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
4004 DL
, this, BaseName
));
4005 ScopArrayInfoSet
.insert(SAI
.get());
4007 SAI
->updateElementType(ElementType
);
4008 // In case of mismatching array sizes, we bail out by setting the run-time
4009 // context to false.
4010 if (!SAI
->updateSizes(Sizes
))
4011 invalidate(DELINEARIZATION
, DebugLoc());
4016 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
4017 const std::string
&BaseName
,
4018 const std::vector
<unsigned> &Sizes
) {
4019 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
4020 std::vector
<const SCEV
*> SCEVSizes
;
4022 for (auto size
: Sizes
)
4024 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
4026 SCEVSizes
.push_back(nullptr);
4028 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
4029 MemoryKind::Array
, BaseName
.c_str());
4033 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
4035 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
4039 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
4040 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
4041 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
4045 std::string
Scop::getContextStr() const { return getContext().to_str(); }
4047 std::string
Scop::getAssumedContextStr() const {
4048 assert(AssumedContext
&& "Assumed context not yet built");
4049 return AssumedContext
.to_str();
4052 std::string
Scop::getInvalidContextStr() const {
4053 return InvalidContext
.to_str();
4056 std::string
Scop::getNameStr() const {
4057 std::string ExitName
, EntryName
;
4058 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4059 return EntryName
+ "---" + ExitName
;
4062 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4063 std::string ExitName
, EntryName
;
4064 raw_string_ostream
ExitStr(ExitName
);
4065 raw_string_ostream
EntryStr(EntryName
);
4067 R
.getEntry()->printAsOperand(EntryStr
, false);
4071 R
.getExit()->printAsOperand(ExitStr
, false);
4074 ExitName
= "FunctionExit";
4076 return std::make_pair(EntryName
, ExitName
);
4079 isl::set
Scop::getContext() const { return Context
; }
4080 isl::space
Scop::getParamSpace() const { return getContext().get_space(); }
4082 isl::space
Scop::getFullParamSpace() const {
4083 std::vector
<isl::id
> FortranIDs
;
4084 FortranIDs
= getFortranArrayIds(arrays());
4086 isl::space Space
= isl::space::params_alloc(
4087 getIslCtx(), ParameterIds
.size() + FortranIDs
.size());
4090 for (const SCEV
*Parameter
: Parameters
) {
4091 isl::id Id
= getIdForParam(Parameter
);
4092 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4095 for (isl::id Id
: FortranIDs
)
4096 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4101 isl::set
Scop::getAssumedContext() const {
4102 assert(AssumedContext
&& "Assumed context not yet built");
4103 return AssumedContext
;
4106 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4107 if (PollyProcessUnprofitable
)
4113 unsigned OptimizableStmtsOrLoops
= 0;
4114 for (auto &Stmt
: *this) {
4115 if (Stmt
.getNumIterators() == 0)
4118 bool ContainsArrayAccs
= false;
4119 bool ContainsScalarAccs
= false;
4120 for (auto *MA
: Stmt
) {
4123 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4124 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4127 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4128 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4131 return OptimizableStmtsOrLoops
> 1;
4134 bool Scop::hasFeasibleRuntimeContext() const {
4135 auto PositiveContext
= getAssumedContext();
4136 auto NegativeContext
= getInvalidContext();
4137 PositiveContext
= addNonEmptyDomainConstraints(PositiveContext
);
4138 // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain
4139 if (!PositiveContext
)
4142 bool IsFeasible
= !(PositiveContext
.is_empty() ||
4143 PositiveContext
.is_subset(NegativeContext
));
4147 auto DomainContext
= getDomains().params();
4148 IsFeasible
= !DomainContext
.is_subset(NegativeContext
);
4149 IsFeasible
&= !Context
.is_subset(NegativeContext
);
4154 static std::string
toString(AssumptionKind Kind
) {
4157 return "No-aliasing";
4161 return "No-overflows";
4163 return "Signed-unsigned";
4165 return "Low complexity";
4167 return "Profitable";
4171 return "Finite loop";
4173 return "Invariant load";
4174 case DELINEARIZATION
:
4175 return "Delinearization";
4177 llvm_unreachable("Unknown AssumptionKind!");
4180 bool Scop::isEffectiveAssumption(isl::set Set
, AssumptionSign Sign
) {
4181 if (Sign
== AS_ASSUMPTION
) {
4182 if (Context
.is_subset(Set
))
4185 if (AssumedContext
.is_subset(Set
))
4188 if (Set
.is_disjoint(Context
))
4191 if (Set
.is_subset(InvalidContext
))
4197 bool Scop::trackAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4198 AssumptionSign Sign
, BasicBlock
*BB
) {
4199 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4202 // Do never emit trivial assumptions as they only clutter the output.
4203 if (!PollyRemarksMinimal
) {
4205 if (Sign
== AS_ASSUMPTION
)
4206 Univ
= isl::set::universe(Set
.get_space());
4208 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& Set
.is_empty()) ||
4209 (Sign
== AS_ASSUMPTION
&& Univ
.is_equal(Set
));
4217 AssumptionsAliasing
++;
4220 AssumptionsInbounds
++;
4223 AssumptionsWrapping
++;
4226 AssumptionsUnsigned
++;
4229 AssumptionsComplexity
++;
4232 AssumptionsUnprofitable
++;
4235 AssumptionsErrorBlock
++;
4238 AssumptionsInfiniteLoop
++;
4241 AssumptionsInvariantLoad
++;
4243 case DELINEARIZATION
:
4244 AssumptionsDelinearization
++;
4248 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4249 std::string Msg
= toString(Kind
) + Suffix
+ Set
.to_str();
4251 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
, BB
)
4254 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
,
4260 void Scop::addAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4261 AssumptionSign Sign
, BasicBlock
*BB
) {
4262 // Simplify the assumptions/restrictions first.
4263 Set
= Set
.gist_params(getContext());
4265 if (!trackAssumption(Kind
, Set
, Loc
, Sign
, BB
))
4268 if (Sign
== AS_ASSUMPTION
)
4269 AssumedContext
= AssumedContext
.intersect(Set
).coalesce();
4271 InvalidContext
= InvalidContext
.unite(Set
).coalesce();
4274 void Scop::recordAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4275 AssumptionSign Sign
, BasicBlock
*BB
) {
4276 assert((Set
.is_params() || BB
) &&
4277 "Assumptions without a basic block must be parameter sets");
4278 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4281 void Scop::addRecordedAssumptions() {
4282 while (!RecordedAssumptions
.empty()) {
4283 Assumption AS
= RecordedAssumptions
.pop_back_val();
4286 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
, nullptr /* BasicBlock */);
4290 // If the domain was deleted the assumptions are void.
4291 isl_set
*Dom
= getDomainConditions(AS
.BB
).release();
4295 // If a basic block was given use its domain to simplify the assumption.
4296 // In case of restrictions we know they only have to hold on the domain,
4297 // thus we can intersect them with the domain of the block. However, for
4298 // assumptions the domain has to imply them, thus:
4300 // Dom => S <==> A v B <==> A - B
4302 // To avoid the complement we will register A - B as a restriction not an
4304 isl_set
*S
= AS
.Set
.copy();
4305 if (AS
.Sign
== AS_RESTRICTION
)
4306 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4307 else /* (AS.Sign == AS_ASSUMPTION) */
4308 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4310 addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
4314 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
, BasicBlock
*BB
) {
4315 LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind
<< "\n");
4316 addAssumption(Kind
, isl::set::empty(getParamSpace()), Loc
, AS_ASSUMPTION
, BB
);
4319 isl::set
Scop::getInvalidContext() const { return InvalidContext
; }
4321 void Scop::printContext(raw_ostream
&OS
) const {
4323 OS
.indent(4) << Context
<< "\n";
4325 OS
.indent(4) << "Assumed Context:\n";
4326 OS
.indent(4) << AssumedContext
<< "\n";
4328 OS
.indent(4) << "Invalid Context:\n";
4329 OS
.indent(4) << InvalidContext
<< "\n";
4332 for (const SCEV
*Parameter
: Parameters
)
4333 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4336 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4338 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4339 if (Pair
.second
.size() == 0)
4342 noOfGroups
+= Pair
.second
.size();
4345 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4346 if (MinMaxAliasGroups
.empty()) {
4347 OS
.indent(8) << "n/a\n";
4351 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4353 // If the group has no read only accesses print the write accesses.
4354 if (Pair
.second
.empty()) {
4355 OS
.indent(8) << "[[";
4356 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4357 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4363 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4364 OS
.indent(8) << "[[";
4365 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4366 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4367 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4375 void Scop::printStatements(raw_ostream
&OS
, bool PrintInstructions
) const {
4376 OS
<< "Statements {\n";
4378 for (const ScopStmt
&Stmt
: *this) {
4380 Stmt
.print(OS
, PrintInstructions
);
4383 OS
.indent(4) << "}\n";
4386 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4389 for (auto &Array
: arrays())
4392 OS
.indent(4) << "}\n";
4394 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4396 for (auto &Array
: arrays())
4397 Array
->print(OS
, /* SizeAsPwAff */ true);
4399 OS
.indent(4) << "}\n";
4402 void Scop::print(raw_ostream
&OS
, bool PrintInstructions
) const {
4403 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4404 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4405 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4406 OS
.indent(4) << "Invariant Accesses: {\n";
4407 for (const auto &IAClass
: InvariantEquivClasses
) {
4408 const auto &MAs
= IAClass
.InvariantAccesses
;
4410 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4412 MAs
.front()->print(OS
);
4413 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4417 OS
.indent(4) << "}\n";
4418 printContext(OS
.indent(4));
4419 printArrayInfo(OS
.indent(4));
4420 printAliasAssumptions(OS
);
4421 printStatements(OS
.indent(4), PrintInstructions
);
4424 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4425 LLVM_DUMP_METHOD
void Scop::dump() const { print(dbgs(), true); }
4428 isl::ctx
Scop::getIslCtx() const { return IslCtx
.get(); }
4430 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4432 // First try to use the SCEVAffinator to generate a piecewise defined
4433 // affine function from @p E in the context of @p BB. If that tasks becomes to
4434 // complex the affinator might return a nullptr. In such a case we invalidate
4435 // the SCoP and return a dummy value. This way we do not need to add error
4436 // handling code to all users of this function.
4437 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4439 // TODO: We could use a heuristic and either use:
4440 // SCEVAffinator::takeNonNegativeAssumption
4442 // SCEVAffinator::interpretAsUnsigned
4443 // to deal with unsigned or "NonNegative" SCEVs.
4445 Affinator
.takeNonNegativeAssumption(PWAC
);
4449 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4450 invalidate(COMPLEXITY
, DL
, BB
);
4451 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4454 isl::union_set
Scop::getDomains() const {
4455 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx().get(), 0);
4456 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4458 for (const ScopStmt
&Stmt
: *this)
4459 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain().release());
4461 return isl::manage(Domain
);
4464 isl::pw_aff
Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4465 PWACtx PWAC
= getPwAff(E
, BB
);
4470 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4471 isl::union_map Accesses
= isl::union_map::empty(getParamSpace());
4473 for (ScopStmt
&Stmt
: *this) {
4474 for (MemoryAccess
*MA
: Stmt
) {
4475 if (!Predicate(*MA
))
4478 isl::set Domain
= Stmt
.getDomain();
4479 isl::map AccessDomain
= MA
->getAccessRelation();
4480 AccessDomain
= AccessDomain
.intersect_domain(Domain
);
4481 Accesses
= Accesses
.add_map(AccessDomain
);
4485 return Accesses
.coalesce();
4488 isl::union_map
Scop::getMustWrites() {
4489 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4492 isl::union_map
Scop::getMayWrites() {
4493 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4496 isl::union_map
Scop::getWrites() {
4497 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4500 isl::union_map
Scop::getReads() {
4501 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4504 isl::union_map
Scop::getAccesses() {
4505 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4508 isl::union_map
Scop::getAccesses(ScopArrayInfo
*Array
) {
4509 return getAccessesOfType(
4510 [Array
](MemoryAccess
&MA
) { return MA
.getScopArrayInfo() == Array
; });
4513 // Check whether @p Node is an extension node.
4515 // @return true if @p Node is an extension node.
4516 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4517 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4518 return isl_bool_error
;
4520 return isl_bool_true
;
4523 bool Scop::containsExtensionNode(isl::schedule Schedule
) {
4524 return isl_schedule_foreach_schedule_node_top_down(
4525 Schedule
.get(), isNotExtNode
, nullptr) == isl_stat_error
;
4528 isl::union_map
Scop::getSchedule() const {
4529 auto Tree
= getScheduleTree();
4530 if (containsExtensionNode(Tree
))
4533 return Tree
.get_map();
4536 isl::schedule
Scop::getScheduleTree() const {
4537 return Schedule
.intersect_domain(getDomains());
4540 void Scop::setSchedule(isl::union_map NewSchedule
) {
4541 auto S
= isl::schedule::from_domain(getDomains());
4542 Schedule
= S
.insert_partial_schedule(
4543 isl::multi_union_pw_aff::from_union_map(NewSchedule
));
4546 void Scop::setScheduleTree(isl::schedule NewSchedule
) {
4547 Schedule
= NewSchedule
;
4550 bool Scop::restrictDomains(isl::union_set Domain
) {
4551 bool Changed
= false;
4552 for (ScopStmt
&Stmt
: *this) {
4553 isl::union_set StmtDomain
= isl::union_set(Stmt
.getDomain());
4554 isl::union_set NewStmtDomain
= StmtDomain
.intersect(Domain
);
4556 if (StmtDomain
.is_subset(NewStmtDomain
))
4561 NewStmtDomain
= NewStmtDomain
.coalesce();
4563 if (NewStmtDomain
.is_empty())
4564 Stmt
.restrictDomain(isl::set::empty(Stmt
.getDomainSpace()));
4566 Stmt
.restrictDomain(isl::set(NewStmtDomain
));
4571 ScalarEvolution
*Scop::getSE() const { return SE
; }
4573 // Create an isl_multi_union_aff that defines an identity mapping from the
4574 // elements of USet to their N-th dimension.
4578 // Domain: { A[i,j]; B[i,j,k] }
4581 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4583 // @param USet A union set describing the elements for which to generate a
4585 // @param N The dimension to map to.
4586 // @returns A mapping from USet to its N-th dimension.
4587 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4590 assert(!USet
.is_empty());
4592 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4594 auto Lambda
= [&Result
, N
](isl::set S
) -> isl::stat
{
4595 int Dim
= S
.dim(isl::dim::set
);
4596 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4599 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4601 Result
= Result
.add_pw_multi_aff(PMA
);
4602 return isl::stat::ok
;
4605 isl::stat Res
= USet
.foreach_set(Lambda
);
4608 assert(Res
== isl::stat::ok
);
4610 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4613 void Scop::addScopStmt(BasicBlock
*BB
, StringRef Name
, Loop
*SurroundingLoop
,
4614 std::vector
<Instruction
*> Instructions
) {
4615 assert(BB
&& "Unexpected nullptr!");
4616 Stmts
.emplace_back(*this, *BB
, Name
, SurroundingLoop
, Instructions
);
4617 auto *Stmt
= &Stmts
.back();
4618 StmtMap
[BB
].push_back(Stmt
);
4619 for (Instruction
*Inst
: Instructions
) {
4620 assert(!InstStmtMap
.count(Inst
) &&
4621 "Unexpected statement corresponding to the instruction.");
4622 InstStmtMap
[Inst
] = Stmt
;
4626 void Scop::addScopStmt(Region
*R
, StringRef Name
, Loop
*SurroundingLoop
,
4627 std::vector
<Instruction
*> Instructions
) {
4628 assert(R
&& "Unexpected nullptr!");
4629 Stmts
.emplace_back(*this, *R
, Name
, SurroundingLoop
, Instructions
);
4630 auto *Stmt
= &Stmts
.back();
4632 for (Instruction
*Inst
: Instructions
) {
4633 assert(!InstStmtMap
.count(Inst
) &&
4634 "Unexpected statement corresponding to the instruction.");
4635 InstStmtMap
[Inst
] = Stmt
;
4638 for (BasicBlock
*BB
: R
->blocks()) {
4639 StmtMap
[BB
].push_back(Stmt
);
4640 if (BB
== R
->getEntry())
4642 for (Instruction
&Inst
: *BB
) {
4643 assert(!InstStmtMap
.count(&Inst
) &&
4644 "Unexpected statement corresponding to the instruction.");
4645 InstStmtMap
[&Inst
] = Stmt
;
4650 ScopStmt
*Scop::addScopStmt(isl::map SourceRel
, isl::map TargetRel
,
4653 isl::set SourceDomain
= SourceRel
.domain();
4654 isl::set TargetDomain
= TargetRel
.domain();
4655 assert(Domain
.is_subset(TargetDomain
) &&
4656 "Target access not defined for complete statement domain");
4657 assert(Domain
.is_subset(SourceDomain
) &&
4658 "Source access not defined for complete statement domain");
4660 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4662 return &(Stmts
.back());
4665 void Scop::buildSchedule(LoopInfo
&LI
) {
4666 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4667 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4668 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4669 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4670 Schedule
= LoopStack
[0].Schedule
;
4673 /// To generate a schedule for the elements in a Region we traverse the Region
4674 /// in reverse-post-order and add the contained RegionNodes in traversal order
4675 /// to the schedule of the loop that is currently at the top of the LoopStack.
4676 /// For loop-free codes, this results in a correct sequential ordering.
4687 /// Including loops requires additional processing. Whenever a loop header is
4688 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4689 /// from an empty schedule, we first process all RegionNodes that are within
4690 /// this loop and complete the sequential schedule at this loop-level before
4691 /// processing about any other nodes. To implement this
4692 /// loop-nodes-first-processing, the reverse post-order traversal is
4693 /// insufficient. Hence, we additionally check if the traversal yields
4694 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4695 /// These region-nodes are then queue and only traverse after the all nodes
4696 /// within the current loop have been processed.
4697 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4698 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4700 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4701 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4702 std::deque
<RegionNode
*> DelayList
;
4703 bool LastRNWaiting
= false;
4705 // Iterate over the region @p R in reverse post-order but queue
4706 // sub-regions/blocks iff they are not part of the last encountered but not
4707 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4708 // that we queued the last sub-region/block from the reverse post-order
4709 // iterator. If it is set we have to explore the next sub-region/block from
4710 // the iterator (if any) to guarantee progress. If it is not set we first try
4711 // the next queued sub-region/blocks.
4712 while (!WorkList
.empty() || !DelayList
.empty()) {
4715 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
4716 RN
= WorkList
.front();
4717 WorkList
.pop_front();
4718 LastRNWaiting
= false;
4720 RN
= DelayList
.front();
4721 DelayList
.pop_front();
4724 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4728 Loop
*LastLoop
= LoopStack
.back().L
;
4729 if (LastLoop
!= L
) {
4730 if (LastLoop
&& !LastLoop
->contains(L
)) {
4731 LastRNWaiting
= true;
4732 DelayList
.push_back(RN
);
4735 LoopStack
.push_back({L
, nullptr, 0});
4737 buildSchedule(RN
, LoopStack
, LI
);
4741 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4742 if (RN
->isSubRegion()) {
4743 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
4744 if (!isNonAffineSubRegion(LocalRegion
)) {
4745 buildSchedule(LocalRegion
, LoopStack
, LI
);
4750 assert(LoopStack
.rbegin() != LoopStack
.rend());
4751 auto LoopData
= LoopStack
.rbegin();
4752 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
4754 for (auto *Stmt
: getStmtListFor(RN
)) {
4755 isl::union_set UDomain
{Stmt
->getDomain()};
4756 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
4757 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
4760 // Check if we just processed the last node in this loop. If we did, finalize
4763 // - adding new schedule dimensions
4764 // - folding the resulting schedule into the parent loop schedule
4765 // - dropping the loop schedule from the LoopStack.
4767 // Then continue to check surrounding loops, which might also have been
4768 // completed by this node.
4769 size_t Dimension
= LoopStack
.size();
4770 while (LoopData
->L
&&
4771 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
4772 isl::schedule Schedule
= LoopData
->Schedule
;
4773 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
4775 assert(std::next(LoopData
) != LoopStack
.rend());
4780 isl::union_set Domain
= Schedule
.get_domain();
4781 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
4782 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
4783 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
4786 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
4788 // Now pop all loops processed up there from the LoopStack
4789 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
4792 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(BasicBlock
*BB
) const {
4793 auto StmtMapIt
= StmtMap
.find(BB
);
4794 if (StmtMapIt
== StmtMap
.end())
4796 return StmtMapIt
->second
;
4799 ScopStmt
*Scop::getIncomingStmtFor(const Use
&U
) const {
4800 auto *PHI
= cast
<PHINode
>(U
.getUser());
4801 BasicBlock
*IncomingBB
= PHI
->getIncomingBlock(U
);
4803 // If the value is a non-synthesizable from the incoming block, use the
4804 // statement that contains it as user statement.
4805 if (auto *IncomingInst
= dyn_cast
<Instruction
>(U
.get())) {
4806 if (IncomingInst
->getParent() == IncomingBB
) {
4807 if (ScopStmt
*IncomingStmt
= getStmtFor(IncomingInst
))
4808 return IncomingStmt
;
4812 // Otherwise, use the epilogue/last statement.
4813 return getLastStmtFor(IncomingBB
);
4816 ScopStmt
*Scop::getLastStmtFor(BasicBlock
*BB
) const {
4817 ArrayRef
<ScopStmt
*> StmtList
= getStmtListFor(BB
);
4818 if (!StmtList
.empty())
4819 return StmtList
.back();
4823 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(RegionNode
*RN
) const {
4824 if (RN
->isSubRegion())
4825 return getStmtListFor(RN
->getNodeAs
<Region
>());
4826 return getStmtListFor(RN
->getNodeAs
<BasicBlock
>());
4829 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(Region
*R
) const {
4830 return getStmtListFor(R
->getEntry());
4833 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
4834 if (!L
|| !R
.contains(L
))
4836 // outermostLoopInRegion always returns nullptr for top level regions
4837 if (R
.isTopLevelRegion()) {
4838 // LoopInfo's depths start at 1, we start at 0
4839 return L
->getLoopDepth() - 1;
4841 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
4843 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
4847 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
4848 for (auto &SAI
: arrays()) {
4849 if (SAI
->getName() == BaseName
)
4855 void Scop::addAccessData(MemoryAccess
*Access
) {
4856 const ScopArrayInfo
*SAI
= Access
->getOriginalScopArrayInfo();
4857 assert(SAI
&& "can only use after access relations have been constructed");
4859 if (Access
->isOriginalValueKind() && Access
->isRead())
4860 ValueUseAccs
[SAI
].push_back(Access
);
4861 else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite())
4862 PHIIncomingAccs
[SAI
].push_back(Access
);
4865 void Scop::removeAccessData(MemoryAccess
*Access
) {
4866 if (Access
->isOriginalValueKind() && Access
->isWrite()) {
4867 ValueDefAccs
.erase(Access
->getAccessValue());
4868 } else if (Access
->isOriginalValueKind() && Access
->isRead()) {
4869 auto &Uses
= ValueUseAccs
[Access
->getScopArrayInfo()];
4870 auto NewEnd
= std::remove(Uses
.begin(), Uses
.end(), Access
);
4871 Uses
.erase(NewEnd
, Uses
.end());
4872 } else if (Access
->isOriginalPHIKind() && Access
->isRead()) {
4873 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessInstruction());
4874 PHIReadAccs
.erase(PHI
);
4875 } else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite()) {
4876 auto &Incomings
= PHIIncomingAccs
[Access
->getScopArrayInfo()];
4877 auto NewEnd
= std::remove(Incomings
.begin(), Incomings
.end(), Access
);
4878 Incomings
.erase(NewEnd
, Incomings
.end());
4882 MemoryAccess
*Scop::getValueDef(const ScopArrayInfo
*SAI
) const {
4883 assert(SAI
->isValueKind());
4885 Instruction
*Val
= dyn_cast
<Instruction
>(SAI
->getBasePtr());
4889 return ValueDefAccs
.lookup(Val
);
4892 ArrayRef
<MemoryAccess
*> Scop::getValueUses(const ScopArrayInfo
*SAI
) const {
4893 assert(SAI
->isValueKind());
4894 auto It
= ValueUseAccs
.find(SAI
);
4895 if (It
== ValueUseAccs
.end())
4900 MemoryAccess
*Scop::getPHIRead(const ScopArrayInfo
*SAI
) const {
4901 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4903 if (SAI
->isExitPHIKind())
4906 PHINode
*PHI
= cast
<PHINode
>(SAI
->getBasePtr());
4907 return PHIReadAccs
.lookup(PHI
);
4910 ArrayRef
<MemoryAccess
*> Scop::getPHIIncomings(const ScopArrayInfo
*SAI
) const {
4911 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4912 auto It
= PHIIncomingAccs
.find(SAI
);
4913 if (It
== PHIIncomingAccs
.end())
4918 bool Scop::isEscaping(Instruction
*Inst
) {
4919 assert(contains(Inst
) && "The concept of escaping makes only sense for "
4920 "values defined inside the SCoP");
4922 for (Use
&Use
: Inst
->uses()) {
4923 BasicBlock
*UserBB
= getUseBlock(Use
);
4924 if (!contains(UserBB
))
4927 // When the SCoP region exit needs to be simplified, PHIs in the region exit
4928 // move to a new basic block such that its incoming blocks are not in the
4930 if (hasSingleExitEdge() && isa
<PHINode
>(Use
.getUser()) &&
4931 isExit(cast
<PHINode
>(Use
.getUser())->getParent()))
4937 Scop::ScopStatistics
Scop::getStatistics() const {
4938 ScopStatistics Result
;
4939 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4940 auto LoopStat
= ScopDetection::countBeneficialLoops(&R
, *SE
, *getLI(), 0);
4942 int NumTotalLoops
= LoopStat
.NumLoops
;
4943 Result
.NumBoxedLoops
= getBoxedLoops().size();
4944 Result
.NumAffineLoops
= NumTotalLoops
- Result
.NumBoxedLoops
;
4946 for (const ScopStmt
&Stmt
: *this) {
4947 isl::set Domain
= Stmt
.getDomain().intersect_params(getContext());
4948 bool IsInLoop
= Stmt
.getNumIterators() >= 1;
4949 for (MemoryAccess
*MA
: Stmt
) {
4953 if (MA
->isLatestValueKind()) {
4954 Result
.NumValueWrites
+= 1;
4956 Result
.NumValueWritesInLoops
+= 1;
4959 if (MA
->isLatestAnyPHIKind()) {
4960 Result
.NumPHIWrites
+= 1;
4962 Result
.NumPHIWritesInLoops
+= 1;
4966 MA
->getAccessRelation().intersect_domain(Domain
).range();
4967 if (AccSet
.is_singleton()) {
4968 Result
.NumSingletonWrites
+= 1;
4970 Result
.NumSingletonWritesInLoops
+= 1;
4978 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const Scop
&scop
) {
4979 scop
.print(OS
, PollyPrintInstructions
);
4983 //===----------------------------------------------------------------------===//
4984 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4985 AU
.addRequired
<LoopInfoWrapperPass
>();
4986 AU
.addRequired
<RegionInfoPass
>();
4987 AU
.addRequired
<DominatorTreeWrapperPass
>();
4988 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
4989 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
4990 AU
.addRequired
<AAResultsWrapperPass
>();
4991 AU
.addRequired
<AssumptionCacheTracker
>();
4992 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
4993 AU
.setPreservesAll();
4996 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
,
4997 Scop::ScopStatistics ScopStats
) {
4998 assert(Stats
.NumLoops
== ScopStats
.NumAffineLoops
+ ScopStats
.NumBoxedLoops
);
5001 NumLoopsInScop
+= Stats
.NumLoops
;
5003 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
5005 if (Stats
.MaxDepth
== 0)
5006 NumScopsDepthZero
++;
5007 else if (Stats
.MaxDepth
== 1)
5009 else if (Stats
.MaxDepth
== 2)
5011 else if (Stats
.MaxDepth
== 3)
5012 NumScopsDepthThree
++;
5013 else if (Stats
.MaxDepth
== 4)
5014 NumScopsDepthFour
++;
5015 else if (Stats
.MaxDepth
== 5)
5016 NumScopsDepthFive
++;
5018 NumScopsDepthLarger
++;
5020 NumAffineLoops
+= ScopStats
.NumAffineLoops
;
5021 NumBoxedLoops
+= ScopStats
.NumBoxedLoops
;
5023 NumValueWrites
+= ScopStats
.NumValueWrites
;
5024 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
5025 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
5026 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
5027 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
5028 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
5031 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
5032 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5034 if (!SD
.isMaxRegionInScop(*R
))
5037 Function
*F
= R
->getEntry()->getParent();
5038 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5039 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5040 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5041 auto const &DL
= F
->getParent()->getDataLayout();
5042 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5043 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
5044 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5046 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5047 S
= SB
.getScop(); // take ownership of scop object
5049 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5051 ScopDetection::LoopStats Stats
=
5052 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5053 updateLoopCountStatistic(Stats
, S
->getStatistics());
5060 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
5062 S
->print(OS
, PollyPrintInstructions
);
5064 OS
<< "Invalid Scop!\n";
5067 char ScopInfoRegionPass::ID
= 0;
5069 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5071 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
5072 "Polly - Create polyhedral description of Scops", false,
5074 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5075 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5076 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5077 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5078 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5079 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5080 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5081 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
5082 "Polly - Create polyhedral description of Scops", false,
5085 //===----------------------------------------------------------------------===//
5086 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
5087 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
5088 AssumptionCache
&AC
, OptimizationRemarkEmitter
&ORE
)
5089 : DL(DL
), SD(SD
), SE(SE
), LI(LI
), AA(AA
), DT(DT
), AC(AC
), ORE(ORE
) {
5093 void ScopInfo::recompute() {
5094 RegionToScopMap
.clear();
5095 /// Create polyhedral description of scops for all the valid regions of a
5097 for (auto &It
: SD
) {
5098 Region
*R
= const_cast<Region
*>(It
);
5099 if (!SD
.isMaxRegionInScop(*R
))
5102 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5103 std::unique_ptr
<Scop
> S
= SB
.getScop();
5106 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5107 ScopDetection::LoopStats Stats
=
5108 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5109 updateLoopCountStatistic(Stats
, S
->getStatistics());
5111 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5112 assert(Inserted
&& "Building Scop for the same region twice!");
5117 bool ScopInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
5118 FunctionAnalysisManager::Invalidator
&Inv
) {
5119 // Check whether the analysis, all analyses on functions have been preserved
5120 // or anything we're holding references to is being invalidated
5121 auto PAC
= PA
.getChecker
<ScopInfoAnalysis
>();
5122 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>()) ||
5123 Inv
.invalidate
<ScopAnalysis
>(F
, PA
) ||
5124 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
5125 Inv
.invalidate
<LoopAnalysis
>(F
, PA
) ||
5126 Inv
.invalidate
<AAManager
>(F
, PA
) ||
5127 Inv
.invalidate
<DominatorTreeAnalysis
>(F
, PA
) ||
5128 Inv
.invalidate
<AssumptionAnalysis
>(F
, PA
);
5131 AnalysisKey
ScopInfoAnalysis::Key
;
5133 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5134 FunctionAnalysisManager
&FAM
) {
5135 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5136 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5137 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5138 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5139 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5140 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5141 auto &DL
= F
.getParent()->getDataLayout();
5142 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
5143 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
};
5146 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5147 FunctionAnalysisManager
&FAM
) {
5148 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5149 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5150 // order here to keep the output persistent
5151 for (auto &It
: reverse(SI
)) {
5153 It
.second
->print(Stream
, PollyPrintInstructions
);
5155 Stream
<< "Invalid Scop!\n";
5157 return PreservedAnalyses::all();
5160 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5161 AU
.addRequired
<LoopInfoWrapperPass
>();
5162 AU
.addRequired
<RegionInfoPass
>();
5163 AU
.addRequired
<DominatorTreeWrapperPass
>();
5164 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5165 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5166 AU
.addRequired
<AAResultsWrapperPass
>();
5167 AU
.addRequired
<AssumptionCacheTracker
>();
5168 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5169 AU
.setPreservesAll();
5172 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5173 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5174 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5175 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5176 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5177 auto const &DL
= F
.getParent()->getDataLayout();
5178 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5179 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5180 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5182 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
});
5186 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5187 for (auto &It
: *Result
) {
5189 It
.second
->print(OS
, PollyPrintInstructions
);
5191 OS
<< "Invalid Scop!\n";
5195 char ScopInfoWrapperPass::ID
= 0;
5197 Pass
*polly::createScopInfoWrapperPassPass() {
5198 return new ScopInfoWrapperPass();
5201 INITIALIZE_PASS_BEGIN(
5202 ScopInfoWrapperPass
, "polly-function-scops",
5203 "Polly - Create polyhedral description of all Scops of a function", false,
5205 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5206 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5207 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5208 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5209 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5210 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5211 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5212 INITIALIZE_PASS_END(
5213 ScopInfoWrapperPass
, "polly-function-scops",
5214 "Polly - Create polyhedral description of all Scops of a function", false,