1 //===--------- ScopInfo.cpp ----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/Support/GICHelper.h"
25 #include "polly/Support/SCEVValidator.h"
26 #include "polly/Support/ScopHelper.h"
27 #include "llvm/ADT/DepthFirstIterator.h"
28 #include "llvm/ADT/MapVector.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SetVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Analysis/AliasAnalysis.h"
35 #include "llvm/Analysis/AssumptionCache.h"
36 #include "llvm/Analysis/Loads.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/LoopIterator.h"
39 #include "llvm/Analysis/RegionIterator.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/Support/Debug.h"
44 #include "isl/constraint.h"
45 #include "isl/local_space.h"
47 #include "isl/options.h"
48 #include "isl/printer.h"
49 #include "isl/schedule.h"
50 #include "isl/schedule_node.h"
52 #include "isl/union_map.h"
53 #include "isl/union_set.h"
60 using namespace polly
;
62 #define DEBUG_TYPE "polly-scops"
64 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
65 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
66 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
67 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
68 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
69 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
70 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
71 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
72 STATISTIC(AssumptionsInvariantLoad
,
73 "Number of invariant loads assumptions taken.");
74 STATISTIC(AssumptionsDelinearization
,
75 "Number of delinearization assumptions taken.");
77 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
78 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
79 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
80 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
81 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
82 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
83 STATISTIC(NumScopsDepthLarger
,
84 "Number of scops with maximal loop depth 6 and larger");
85 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
87 // The maximal number of basic sets we allow during domain construction to
88 // be created. More complex scops will result in very high compile time and
89 // are also unlikely to result in good code
90 static int const MaxDisjunctsInDomain
= 20;
92 // The number of disjunct in the context after which we stop to add more
93 // disjuncts. This parameter is there to avoid exponential growth in the
94 // number of disjunct when adding non-convex sets to the context.
95 static int const MaxDisjunctsInContext
= 4;
98 OptComputeOut("polly-analysis-computeout",
99 cl::desc("Bound the scop analysis by a maximal amount of "
100 "computational steps (0 means no bound)"),
101 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
102 cl::cat(PollyCategory
));
104 static cl::opt
<bool> PollyRemarksMinimal(
105 "polly-remarks-minimal",
106 cl::desc("Do not emit remarks about assumptions that are known"),
107 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
109 // Multiplicative reductions can be disabled separately as these kind of
110 // operations can overflow easily. Additive reductions and bit operations
111 // are in contrast pretty stable.
112 static cl::opt
<bool> DisableMultiplicativeReductions(
113 "polly-disable-multiplicative-reductions",
114 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
115 cl::init(false), cl::cat(PollyCategory
));
117 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
118 "polly-rtc-max-array-disjuncts",
119 cl::desc("The maximal number of disjunts allowed in memory accesses to "
121 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
123 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
124 "polly-rtc-max-parameters",
125 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
126 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
128 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
129 "polly-rtc-max-arrays-per-group",
130 cl::desc("The maximal number of arrays to compare in each alias group."),
131 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
133 static cl::opt
<std::string
> UserContextStr(
134 "polly-context", cl::value_desc("isl parameter set"),
135 cl::desc("Provide additional constraints on the context parameters"),
136 cl::init(""), cl::cat(PollyCategory
));
138 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
139 cl::desc("Detect and exploit reductions"),
140 cl::Hidden
, cl::ZeroOrMore
,
141 cl::init(true), cl::cat(PollyCategory
));
144 IslOnErrorAbort("polly-on-isl-error-abort",
145 cl::desc("Abort if an isl error is encountered"),
146 cl::init(true), cl::cat(PollyCategory
));
148 static cl::opt
<bool> PollyPreciseInbounds(
149 "polly-precise-inbounds",
150 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
151 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
154 PollyIgnoreInbounds("polly-ignore-inbounds",
155 cl::desc("Do not take inbounds assumptions at all"),
156 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
158 static cl::opt
<bool> PollyIgnoreParamBounds(
159 "polly-ignore-parameter-bounds",
161 "Do not add parameter bounds and do no gist simplify sets accordingly"),
162 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
164 static cl::opt
<bool> PollyPreciseFoldAccesses(
165 "polly-precise-fold-accesses",
166 cl::desc("Fold memory accesses to model more possible delinearizations "
167 "(does not scale well)"),
168 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
170 bool polly::UseInstructionNames
;
171 static cl::opt
<bool, true> XUseInstructionNames(
172 "polly-use-llvm-names",
173 cl::desc("Use LLVM-IR names when deriving statement names"),
174 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
175 cl::ZeroOrMore
, cl::cat(PollyCategory
));
177 static cl::opt
<bool> PollyPrintInstructions(
178 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
179 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
181 //===----------------------------------------------------------------------===//
183 // Create a sequence of two schedules. Either argument may be null and is
184 // interpreted as the empty schedule. Can also return null if both schedules are
186 static __isl_give isl_schedule
*
187 combineInSequence(__isl_take isl_schedule
*Prev
,
188 __isl_take isl_schedule
*Succ
) {
194 return isl_schedule_sequence(Prev
, Succ
);
197 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
198 int dim
, isl::dim type
) {
200 isl::ctx Ctx
= S
.get_ctx();
202 // The upper and lower bound for a parameter value is derived either from
203 // the data type of the parameter or from the - possibly more restrictive -
205 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
206 S
= S
.lower_bound_val(type
, dim
, V
);
207 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
208 S
= S
.upper_bound_val(type
, dim
, V
);
210 if (Range
.isFullSet())
213 if (isl_set_n_basic_set(S
.get()) > MaxDisjunctsInContext
)
216 // In case of signed wrapping, we can refine the set of valid values by
217 // excluding the part not covered by the wrapping range.
218 if (Range
.isSignWrappedSet()) {
219 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
220 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
222 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
224 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
231 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
232 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
236 if (!S
->contains(BasePtrLI
))
239 ScalarEvolution
&SE
= *S
->getSE();
241 auto *OriginBaseSCEV
=
242 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
246 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
247 if (!OriginBaseSCEVUnknown
)
250 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
254 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl_ctx
*Ctx
,
255 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
256 const DataLayout
&DL
, Scop
*S
,
257 const char *BaseName
)
258 : BasePtr(BasePtr
), ElementType(ElementType
), IsOnHeap(false), Kind(Kind
),
259 DL(DL
), S(*S
), FAD(nullptr) {
260 std::string BasePtrName
=
262 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
263 Kind
== MemoryKind::PHI
? "__phi" : "",
264 UseInstructionNames
);
265 Id
= isl_id_alloc(Ctx
, BasePtrName
.c_str(), this);
269 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
270 BasePtrOriginSAI
= nullptr;
274 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
275 if (BasePtrOriginSAI
)
276 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
279 __isl_give isl_space
*ScopArrayInfo::getSpace() const {
281 isl_space_set_alloc(isl_id_get_ctx(Id
), 0, getNumberOfDimensions());
282 Space
= isl_space_set_tuple_id(Space
, isl_dim_set
, isl_id_copy(Id
));
286 bool ScopArrayInfo::isReadOnly() {
287 isl::union_set WriteSet
= give(S
.getWrites()).range();
288 isl::space Space
= give(getSpace());
289 WriteSet
= WriteSet
.extract_set(Space
);
291 return bool(WriteSet
.is_empty());
294 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
295 if (Array
->getElementType() != getElementType())
298 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
301 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
302 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
308 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
309 if (NewElementType
== ElementType
)
312 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
313 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
315 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
318 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
319 ElementType
= NewElementType
;
321 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
322 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
326 /// Make the ScopArrayInfo model a Fortran Array
327 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
328 assert(FAD
&& "got invalid Fortran array descriptor");
330 assert(this->FAD
== FAD
&&
331 "receiving different array descriptors for same array");
335 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
339 isl::space
Space(S
.getIslCtx(), 1, 0);
341 std::string param_name
= getName();
342 param_name
+= "_fortranarr_size";
343 // TODO: see if we need to add `this` as the id user pointer
344 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
.c_str(), nullptr);
346 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
348 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
350 DimensionSizesPw
[0] = PwAff
.release();
353 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
354 bool CheckConsistency
) {
355 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
356 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
357 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
359 if (CheckConsistency
) {
360 for (int i
= 0; i
< SharedDims
; i
++) {
361 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
362 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
363 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
367 if (DimensionSizes
.size() >= NewSizes
.size())
371 DimensionSizes
.clear();
372 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
374 for (isl_pw_aff
*Size
: DimensionSizesPw
)
375 isl_pw_aff_free(Size
);
376 DimensionSizesPw
.clear();
377 for (const SCEV
*Expr
: DimensionSizes
) {
379 DimensionSizesPw
.push_back(nullptr);
382 isl_pw_aff
*Size
= S
.getPwAffOnly(Expr
);
383 DimensionSizesPw
.push_back(Size
);
388 ScopArrayInfo::~ScopArrayInfo() {
390 for (isl_pw_aff
*Size
: DimensionSizesPw
)
391 isl_pw_aff_free(Size
);
394 std::string
ScopArrayInfo::getName() const { return isl_id_get_name(Id
); }
396 int ScopArrayInfo::getElemSizeInBytes() const {
397 return DL
.getTypeAllocSize(ElementType
);
400 __isl_give isl_id
*ScopArrayInfo::getBasePtrId() const {
401 return isl_id_copy(Id
);
404 void ScopArrayInfo::dump() const { print(errs()); }
406 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
407 OS
.indent(8) << *getElementType() << " " << getName();
409 // If this is a Fortran array, then we can print the outermost dimension
410 // as a isl_pw_aff even though there is no SCEV information.
411 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
413 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
414 !getDimensionSize(0)) {
418 for (; u
< getNumberOfDimensions(); u
++) {
422 auto *Size
= getDimensionSizePw(u
);
423 OS
<< " " << Size
<< " ";
424 isl_pw_aff_free(Size
);
426 OS
<< *getDimensionSize(u
);
434 if (BasePtrOriginSAI
)
435 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
437 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
440 const ScopArrayInfo
*
441 ScopArrayInfo::getFromAccessFunction(__isl_keep isl_pw_multi_aff
*PMA
) {
442 isl_id
*Id
= isl_pw_multi_aff_get_tuple_id(PMA
, isl_dim_out
);
443 assert(Id
&& "Output dimension didn't have an ID");
444 return getFromId(Id
);
447 const ScopArrayInfo
*ScopArrayInfo::getFromId(__isl_take isl_id
*Id
) {
448 void *User
= isl_id_get_user(Id
);
449 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
454 void MemoryAccess::wrapConstantDimensions() {
455 auto *SAI
= getScopArrayInfo();
456 isl::space ArraySpace
= give(SAI
->getSpace());
457 isl::ctx Ctx
= ArraySpace
.get_ctx();
458 unsigned DimsArray
= SAI
->getNumberOfDimensions();
460 isl::multi_aff DivModAff
= isl::multi_aff::identity(
461 ArraySpace
.map_from_domain_and_range(ArraySpace
));
462 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
464 // Begin with last dimension, to iteratively carry into higher dimensions.
465 for (int i
= DimsArray
- 1; i
> 0; i
--) {
466 auto *DimSize
= SAI
->getDimensionSize(i
);
467 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
469 // This transformation is not applicable to dimensions with dynamic size.
473 // This transformation is not applicable to dimensions of size zero.
474 if (DimSize
->isZero())
477 isl::val DimSizeVal
=
478 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
479 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
481 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
483 // Compute: index % size
484 // Modulo must apply in the divide of the previous iteration, if any.
485 isl::aff Modulo
= Var
.mod_val(DimSizeVal
);
486 Modulo
= Modulo
.pullback(DivModAff
);
488 // Compute: floor(index / size)
489 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
490 Divide
= Divide
.floor();
491 Divide
= Divide
.add(PrevVar
);
492 Divide
= Divide
.pullback(DivModAff
);
494 // Apply Modulo and Divide.
495 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
496 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
499 // Apply all modulo/divides on the accesses.
500 isl::map Relation
= give(AccessRelation
);
501 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
502 Relation
= Relation
.detect_equalities();
503 AccessRelation
= Relation
.release();
506 void MemoryAccess::updateDimensionality() {
507 auto *SAI
= getScopArrayInfo();
508 isl::space ArraySpace
= give(SAI
->getSpace());
509 isl::space AccessSpace
= give(isl_map_get_space(AccessRelation
)).range();
510 isl::ctx Ctx
= ArraySpace
.get_ctx();
512 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
513 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
514 auto DimsMissing
= DimsArray
- DimsAccess
;
516 auto *BB
= getStatement()->getEntryBlock();
517 auto &DL
= BB
->getModule()->getDataLayout();
518 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
519 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
521 isl::map Map
= isl::map::from_domain_and_range(
522 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
524 for (unsigned i
= 0; i
< DimsMissing
; i
++)
525 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
527 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
528 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
530 AccessRelation
= isl_map_apply_range(AccessRelation
, Map
.release());
532 // For the non delinearized arrays, divide the access function of the last
533 // subscript by the size of the elements in the array.
535 // A stride one array access in C expressed as A[i] is expressed in
536 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
537 // two subsequent values of 'i' index two values that are stored next to
538 // each other in memory. By this division we make this characteristic
539 // obvious again. If the base pointer was accessed with offsets not divisible
540 // by the accesses element size, we will have chosen a smaller ArrayElemSize
541 // that divides the offsets of all accesses to this base pointer.
542 if (DimsAccess
== 1) {
543 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
544 AccessRelation
= isl_map_floordiv_val(AccessRelation
, V
.release());
547 // We currently do this only if we added at least one dimension, which means
548 // some dimension's indices have not been specified, an indicator that some
549 // index values have been added together.
550 // TODO: Investigate general usefulness; Effect on unit tests is to make index
551 // expressions more complicated.
553 wrapConstantDimensions();
556 computeBoundsOnAccessRelation(ArrayElemSize
);
558 // Introduce multi-element accesses in case the type loaded by this memory
559 // access is larger than the canonical element type of the array.
561 // An access ((float *)A)[i] to an array char *A is modeled as
562 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
563 if (ElemBytes
> ArrayElemSize
) {
564 assert(ElemBytes
% ArrayElemSize
== 0 &&
565 "Loaded element size should be multiple of canonical element size");
566 isl::map Map
= isl::map::from_domain_and_range(
567 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
568 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
569 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
574 LS
= isl::local_space(Map
.get_space());
575 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
577 C
= isl::constraint::alloc_inequality(LS
);
578 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
579 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
580 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
581 Map
= Map
.add_constraint(C
);
583 C
= isl::constraint::alloc_inequality(LS
);
584 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
585 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
586 C
= C
.set_constant_val(isl::val(Ctx
, 0));
587 Map
= Map
.add_constraint(C
);
588 AccessRelation
= isl_map_apply_range(AccessRelation
, Map
.release());
593 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
595 case MemoryAccess::RT_NONE
:
596 llvm_unreachable("Requested a reduction operator string for a memory "
597 "access which isn't a reduction");
598 case MemoryAccess::RT_ADD
:
600 case MemoryAccess::RT_MUL
:
602 case MemoryAccess::RT_BOR
:
604 case MemoryAccess::RT_BXOR
:
606 case MemoryAccess::RT_BAND
:
609 llvm_unreachable("Unknown reduction type");
613 /// Return the reduction type for a given binary operator.
614 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
615 const Instruction
*Load
) {
617 return MemoryAccess::RT_NONE
;
618 switch (BinOp
->getOpcode()) {
619 case Instruction::FAdd
:
620 if (!BinOp
->hasUnsafeAlgebra())
621 return MemoryAccess::RT_NONE
;
623 case Instruction::Add
:
624 return MemoryAccess::RT_ADD
;
625 case Instruction::Or
:
626 return MemoryAccess::RT_BOR
;
627 case Instruction::Xor
:
628 return MemoryAccess::RT_BXOR
;
629 case Instruction::And
:
630 return MemoryAccess::RT_BAND
;
631 case Instruction::FMul
:
632 if (!BinOp
->hasUnsafeAlgebra())
633 return MemoryAccess::RT_NONE
;
635 case Instruction::Mul
:
636 if (DisableMultiplicativeReductions
)
637 return MemoryAccess::RT_NONE
;
638 return MemoryAccess::RT_MUL
;
640 return MemoryAccess::RT_NONE
;
644 MemoryAccess::~MemoryAccess() {
646 isl_set_free(InvalidDomain
);
647 isl_map_free(AccessRelation
);
648 isl_map_free(NewAccessRelation
);
651 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
652 isl_id
*ArrayId
= getArrayId();
653 void *User
= isl_id_get_user(ArrayId
);
654 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
655 isl_id_free(ArrayId
);
659 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
660 isl_id
*ArrayId
= getLatestArrayId();
661 void *User
= isl_id_get_user(ArrayId
);
662 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
663 isl_id_free(ArrayId
);
667 __isl_give isl_id
*MemoryAccess::getOriginalArrayId() const {
668 return isl_map_get_tuple_id(AccessRelation
, isl_dim_out
);
671 __isl_give isl_id
*MemoryAccess::getLatestArrayId() const {
672 if (!hasNewAccessRelation())
673 return getOriginalArrayId();
674 return isl_map_get_tuple_id(NewAccessRelation
, isl_dim_out
);
677 __isl_give isl_map
*MemoryAccess::getAddressFunction() const {
678 return isl_map_lexmin(getAccessRelation());
681 __isl_give isl_pw_multi_aff
*MemoryAccess::applyScheduleToAccessRelation(
682 __isl_take isl_union_map
*USchedule
) const {
683 isl_map
*Schedule
, *ScheduledAccRel
;
684 isl_union_set
*UDomain
;
686 UDomain
= isl_union_set_from_set(getStatement()->getDomain());
687 USchedule
= isl_union_map_intersect_domain(USchedule
, UDomain
);
688 Schedule
= isl_map_from_union_map(USchedule
);
689 ScheduledAccRel
= isl_map_apply_domain(getAddressFunction(), Schedule
);
690 return isl_pw_multi_aff_from_map(ScheduledAccRel
);
693 __isl_give isl_map
*MemoryAccess::getOriginalAccessRelation() const {
694 return isl_map_copy(AccessRelation
);
697 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
698 return stringFromIslObj(AccessRelation
);
701 __isl_give isl_space
*MemoryAccess::getOriginalAccessRelationSpace() const {
702 return isl_map_get_space(AccessRelation
);
705 __isl_give isl_map
*MemoryAccess::getNewAccessRelation() const {
706 return isl_map_copy(NewAccessRelation
);
709 std::string
MemoryAccess::getNewAccessRelationStr() const {
710 return stringFromIslObj(NewAccessRelation
);
713 __isl_give isl_basic_map
*
714 MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
715 isl_space
*Space
= isl_space_set_alloc(Statement
->getIslCtx(), 0, 1);
716 Space
= isl_space_align_params(Space
, Statement
->getDomainSpace());
718 return isl_basic_map_from_domain_and_range(
719 isl_basic_set_universe(Statement
->getDomainSpace()),
720 isl_basic_set_universe(Space
));
723 // Formalize no out-of-bound access assumption
725 // When delinearizing array accesses we optimistically assume that the
726 // delinearized accesses do not access out of bound locations (the subscript
727 // expression of each array evaluates for each statement instance that is
728 // executed to a value that is larger than zero and strictly smaller than the
729 // size of the corresponding dimension). The only exception is the outermost
730 // dimension for which we do not need to assume any upper bound. At this point
731 // we formalize this assumption to ensure that at code generation time the
732 // relevant run-time checks can be generated.
734 // To find the set of constraints necessary to avoid out of bound accesses, we
735 // first build the set of data locations that are not within array bounds. We
736 // then apply the reverse access relation to obtain the set of iterations that
737 // may contain invalid accesses and reduce this set of iterations to the ones
738 // that are actually executed by intersecting them with the domain of the
739 // statement. If we now project out all loop dimensions, we obtain a set of
740 // parameters that may cause statement instances to be executed that may
741 // possibly yield out of bound memory accesses. The complement of these
742 // constraints is the set of constraints that needs to be assumed to ensure such
743 // statement instances are never executed.
744 void MemoryAccess::assumeNoOutOfBound() {
745 if (PollyIgnoreInbounds
)
747 auto *SAI
= getScopArrayInfo();
748 isl::space Space
= give(getOriginalAccessRelationSpace()).range();
749 isl::set Outside
= isl::set::empty(Space
);
750 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
751 isl::local_space
LS(Space
);
752 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
753 isl::pw_aff Zero
= isl::pw_aff(LS
);
755 isl::set DimOutside
= Var
.lt_set(Zero
);
756 isl::pw_aff SizeE
= give(SAI
->getDimensionSizePw(i
));
757 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
758 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
759 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
761 Outside
= Outside
.unite(DimOutside
);
764 Outside
= Outside
.apply(give(getAccessRelation()).reverse());
765 Outside
= Outside
.intersect(give(Statement
->getDomain()));
766 Outside
= Outside
.params();
768 // Remove divs to avoid the construction of overly complicated assumptions.
769 // Doing so increases the set of parameter combinations that are assumed to
770 // not appear. This is always save, but may make the resulting run-time check
771 // bail out more often than strictly necessary.
772 Outside
= Outside
.remove_divs();
773 Outside
= Outside
.complement();
774 const auto &Loc
= getAccessInstruction()
775 ? getAccessInstruction()->getDebugLoc()
777 if (!PollyPreciseInbounds
)
778 Outside
= Outside
.gist_params(give(Statement
->getDomain()).params());
779 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
.release(), Loc
,
783 void MemoryAccess::buildMemIntrinsicAccessRelation() {
784 assert(isMemoryIntrinsic());
785 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
787 isl::pw_aff SubscriptPWA
= give(getPwAff(Subscripts
[0]));
788 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
791 if (Subscripts
[1] == nullptr) {
792 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
794 isl::pw_aff LengthPWA
= give(getPwAff(Subscripts
[1]));
795 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
796 isl::space RangeSpace
= LengthMap
.get_space().range();
797 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
799 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
800 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
801 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
802 LengthMap
= LengthMap
.sum(SubscriptMap
);
804 LengthMap
.set_tuple_id(isl::dim::in
, give(getStatement()->getDomainId()))
808 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
809 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
811 auto MAI
= MemAccInst(getAccessInstruction());
812 if (isa
<MemIntrinsic
>(MAI
))
815 Value
*Ptr
= MAI
.getPointerOperand();
816 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
819 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
820 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
823 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
824 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
825 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
827 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
828 if (Range
.isFullSet())
831 if (Range
.isWrappedSet() || Range
.isSignWrappedSet())
834 bool isWrapping
= Range
.isSignWrappedSet();
836 unsigned BW
= Range
.getBitWidth();
837 const auto One
= APInt(BW
, 1);
838 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
839 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
841 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
842 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
844 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
846 isl::map Relation
= give(AccessRelation
);
847 isl::set AccessRange
= Relation
.range();
848 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
850 AccessRelation
= Relation
.intersect_range(AccessRange
).release();
853 void MemoryAccess::foldAccessRelation() {
854 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
857 int Size
= Subscripts
.size();
859 isl::map NewAccessRelation
= give(isl_map_copy(AccessRelation
));
861 for (int i
= Size
- 2; i
>= 0; --i
) {
863 isl::map MapOne
, MapTwo
;
864 isl::pw_aff DimSize
= give(getPwAff(Sizes
[i
+ 1]));
866 isl::space SpaceSize
= DimSize
.get_space();
868 give(isl_space_get_dim_id(SpaceSize
.get(), isl_dim_param
, 0));
870 Space
= give(isl_map_copy(AccessRelation
)).get_space();
871 Space
= Space
.range().map_from_set();
872 Space
= Space
.align_params(SpaceSize
);
874 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
876 MapOne
= isl::map::universe(Space
);
877 for (int j
= 0; j
< Size
; ++j
)
878 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
879 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
881 MapTwo
= isl::map::universe(Space
);
882 for (int j
= 0; j
< Size
; ++j
)
883 if (j
< i
|| j
> i
+ 1)
884 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
886 isl::local_space
LS(Space
);
888 C
= isl::constraint::alloc_equality(LS
);
889 C
= C
.set_constant_si(-1);
890 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
891 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
892 MapTwo
= MapTwo
.add_constraint(C
);
893 C
= isl::constraint::alloc_equality(LS
);
894 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
895 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
896 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
897 MapTwo
= MapTwo
.add_constraint(C
);
898 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
900 MapOne
= MapOne
.unite(MapTwo
);
901 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
904 isl::id BaseAddrId
= give(getScopArrayInfo()->getBasePtrId());
905 isl::space Space
= give(Statement
->getDomainSpace());
906 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
907 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
908 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
910 NewAccessRelation
.gist_domain(give(Statement
->getDomain()));
912 // Access dimension folding might in certain cases increase the number of
913 // disjuncts in the memory access, which can possibly complicate the generated
914 // run-time checks and can lead to costly compilation.
915 if (!PollyPreciseFoldAccesses
&&
916 isl_map_n_basic_map(NewAccessRelation
.get()) >
917 isl_map_n_basic_map(AccessRelation
)) {
919 isl_map_free(AccessRelation
);
920 AccessRelation
= NewAccessRelation
.release();
924 /// Check if @p Expr is divisible by @p Size.
925 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
930 // Only one factor needs to be divisible.
931 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
932 for (auto *FactorExpr
: MulExpr
->operands())
933 if (isDivisible(FactorExpr
, Size
, SE
))
938 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
940 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
941 for (auto *OpExpr
: NAryExpr
->operands())
942 if (!isDivisible(OpExpr
, Size
, SE
))
947 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
948 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
949 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
950 return MulSCEV
== Expr
;
953 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
954 assert(!AccessRelation
&& "AccessReltation already built");
956 // Initialize the invalid domain which describes all iterations for which the
957 // access relation is not modeled correctly.
958 auto *StmtInvalidDomain
= getStatement()->getInvalidDomain();
959 InvalidDomain
= isl_set_empty(isl_set_get_space(StmtInvalidDomain
));
960 isl_set_free(StmtInvalidDomain
);
962 isl_ctx
*Ctx
= isl_id_get_ctx(Id
);
963 isl_id
*BaseAddrId
= SAI
->getBasePtrId();
965 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
966 buildMemIntrinsicAccessRelation();
968 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
973 // We overapproximate non-affine accesses with a possible access to the
974 // whole array. For read accesses it does not make a difference, if an
975 // access must or may happen. However, for write accesses it is important to
976 // differentiate between writes that must happen and writes that may happen.
978 AccessRelation
= isl_map_from_basic_map(createBasicAccessMap(Statement
));
981 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
985 isl_space
*Space
= isl_space_alloc(Ctx
, 0, Statement
->getNumIterators(), 0);
986 AccessRelation
= isl_map_universe(Space
);
988 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
989 isl_pw_aff
*Affine
= getPwAff(Subscripts
[i
]);
990 isl_map
*SubscriptMap
= isl_map_from_pw_aff(Affine
);
991 AccessRelation
= isl_map_flat_range_product(AccessRelation
, SubscriptMap
);
994 Space
= Statement
->getDomainSpace();
995 AccessRelation
= isl_map_set_tuple_id(
996 AccessRelation
, isl_dim_in
, isl_space_get_tuple_id(Space
, isl_dim_set
));
998 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
1000 AccessRelation
= isl_map_gist_domain(AccessRelation
, Statement
->getDomain());
1001 isl_space_free(Space
);
1004 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1005 AccessType AccType
, Value
*BaseAddress
,
1006 Type
*ElementType
, bool Affine
,
1007 ArrayRef
<const SCEV
*> Subscripts
,
1008 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1010 : Kind(Kind
), AccType(AccType
), RedType(RT_NONE
), Statement(Stmt
),
1011 InvalidDomain(nullptr), BaseAddr(BaseAddress
), ElementType(ElementType
),
1012 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1013 AccessValue(AccessValue
), IsAffine(Affine
),
1014 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1015 NewAccessRelation(nullptr), FAD(nullptr) {
1016 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1017 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1019 std::string IdName
= Stmt
->getBaseName() + Access
;
1020 Id
= isl_id_alloc(Stmt
->getParent()->getIslCtx(), IdName
.c_str(), this);
1023 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
,
1024 __isl_take isl_map
*AccRel
)
1025 : Kind(MemoryKind::Array
), AccType(AccType
), RedType(RT_NONE
),
1026 Statement(Stmt
), InvalidDomain(nullptr), AccessInstruction(nullptr),
1027 IsAffine(true), AccessRelation(nullptr), NewAccessRelation(AccRel
),
1029 auto *ArrayInfoId
= isl_map_get_tuple_id(NewAccessRelation
, isl_dim_out
);
1030 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1031 Sizes
.push_back(nullptr);
1032 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1033 Sizes
.push_back(SAI
->getDimensionSize(i
));
1034 ElementType
= SAI
->getElementType();
1035 BaseAddr
= SAI
->getBasePtr();
1036 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1037 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1039 std::string IdName
= Stmt
->getBaseName() + Access
;
1040 Id
= isl_id_alloc(Stmt
->getParent()->getIslCtx(), IdName
.c_str(), this);
1043 void MemoryAccess::realignParams() {
1044 auto *Ctx
= Statement
->getParent()->getContext();
1045 InvalidDomain
= isl_set_gist_params(InvalidDomain
, isl_set_copy(Ctx
));
1046 AccessRelation
= isl_map_gist_params(AccessRelation
, Ctx
);
1049 const std::string
MemoryAccess::getReductionOperatorStr() const {
1050 return MemoryAccess::getReductionOperatorStr(getReductionType());
1053 __isl_give isl_id
*MemoryAccess::getId() const { return isl_id_copy(Id
); }
1055 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1056 MemoryAccess::ReductionType RT
) {
1057 if (RT
== MemoryAccess::RT_NONE
)
1060 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1064 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1066 void MemoryAccess::print(raw_ostream
&OS
) const {
1069 OS
.indent(12) << "ReadAccess :=\t";
1072 OS
.indent(12) << "MustWriteAccess :=\t";
1075 OS
.indent(12) << "MayWriteAccess :=\t";
1079 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1082 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1086 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1087 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1088 if (hasNewAccessRelation())
1089 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1092 void MemoryAccess::dump() const { print(errs()); }
1094 __isl_give isl_pw_aff
*MemoryAccess::getPwAff(const SCEV
*E
) {
1095 auto *Stmt
= getStatement();
1096 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1097 isl_set
*StmtDom
= isl_set_reset_tuple_id(getStatement()->getDomain());
1098 isl_set
*NewInvalidDom
= isl_set_intersect(StmtDom
, PWAC
.second
);
1099 InvalidDomain
= isl_set_union(InvalidDomain
, NewInvalidDom
);
1103 // Create a map in the size of the provided set domain, that maps from the
1104 // one element of the provided set domain to another element of the provided
1106 // The mapping is limited to all points that are equal in all but the last
1107 // dimension and for which the last dimension of the input is strict smaller
1108 // than the last dimension of the output.
1110 // getEqualAndLarger(set[i0, i1, ..., iX]):
1112 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1113 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1115 static isl_map
*getEqualAndLarger(__isl_take isl_space
*setDomain
) {
1116 isl_space
*Space
= isl_space_map_from_set(setDomain
);
1117 isl_map
*Map
= isl_map_universe(Space
);
1118 unsigned lastDimension
= isl_map_dim(Map
, isl_dim_in
) - 1;
1120 // Set all but the last dimension to be equal for the input and output
1122 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1123 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1124 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1125 Map
= isl_map_equate(Map
, isl_dim_in
, i
, isl_dim_out
, i
);
1127 // Set the last dimension of the input to be strict smaller than the
1128 // last dimension of the output.
1130 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1131 Map
= isl_map_order_lt(Map
, isl_dim_in
, lastDimension
, isl_dim_out
,
1136 __isl_give isl_set
*
1137 MemoryAccess::getStride(__isl_take
const isl_map
*Schedule
) const {
1138 isl_map
*S
= const_cast<isl_map
*>(Schedule
);
1139 isl_map
*AccessRelation
= getAccessRelation();
1140 isl_space
*Space
= isl_space_range(isl_map_get_space(S
));
1141 isl_map
*NextScatt
= getEqualAndLarger(Space
);
1143 S
= isl_map_reverse(S
);
1144 NextScatt
= isl_map_lexmin(NextScatt
);
1146 NextScatt
= isl_map_apply_range(NextScatt
, isl_map_copy(S
));
1147 NextScatt
= isl_map_apply_range(NextScatt
, isl_map_copy(AccessRelation
));
1148 NextScatt
= isl_map_apply_domain(NextScatt
, S
);
1149 NextScatt
= isl_map_apply_domain(NextScatt
, AccessRelation
);
1151 isl_set
*Deltas
= isl_map_deltas(NextScatt
);
1155 bool MemoryAccess::isStrideX(__isl_take
const isl_map
*Schedule
,
1156 int StrideWidth
) const {
1157 isl_set
*Stride
, *StrideX
;
1160 Stride
= getStride(Schedule
);
1161 StrideX
= isl_set_universe(isl_set_get_space(Stride
));
1162 for (unsigned i
= 0; i
< isl_set_dim(StrideX
, isl_dim_set
) - 1; i
++)
1163 StrideX
= isl_set_fix_si(StrideX
, isl_dim_set
, i
, 0);
1164 StrideX
= isl_set_fix_si(StrideX
, isl_dim_set
,
1165 isl_set_dim(StrideX
, isl_dim_set
) - 1, StrideWidth
);
1166 IsStrideX
= isl_set_is_subset(Stride
, StrideX
);
1168 isl_set_free(StrideX
);
1169 isl_set_free(Stride
);
1174 bool MemoryAccess::isStrideZero(__isl_take
const isl_map
*Schedule
) const {
1175 return isStrideX(Schedule
, 0);
1178 bool MemoryAccess::isStrideOne(__isl_take
const isl_map
*Schedule
) const {
1179 return isStrideX(Schedule
, 1);
1182 void MemoryAccess::setAccessRelation(__isl_take isl_map
*NewAccess
) {
1183 isl_map_free(AccessRelation
);
1184 AccessRelation
= NewAccess
;
1187 void MemoryAccess::setNewAccessRelation(__isl_take isl_map
*NewAccess
) {
1191 // Check domain space compatibility.
1192 auto *NewSpace
= isl_map_get_space(NewAccess
);
1193 auto *NewDomainSpace
= isl_space_domain(isl_space_copy(NewSpace
));
1194 auto *OriginalDomainSpace
= getStatement()->getDomainSpace();
1195 assert(isl_space_has_equal_tuples(OriginalDomainSpace
, NewDomainSpace
));
1196 isl_space_free(NewDomainSpace
);
1197 isl_space_free(OriginalDomainSpace
);
1199 // Reads must be executed unconditionally. Writes might be executed in a
1202 // Check whether there is an access for every statement instance.
1203 auto *StmtDomain
= getStatement()->getDomain();
1204 StmtDomain
= isl_set_intersect_params(
1205 StmtDomain
, getStatement()->getParent()->getContext());
1206 auto *NewDomain
= isl_map_domain(isl_map_copy(NewAccess
));
1207 assert(isl_set_is_subset(StmtDomain
, NewDomain
) &&
1208 "Partial READ accesses not supported");
1209 isl_set_free(NewDomain
);
1210 isl_set_free(StmtDomain
);
1213 auto *NewAccessSpace
= isl_space_range(NewSpace
);
1214 assert(isl_space_has_tuple_id(NewAccessSpace
, isl_dim_set
) &&
1215 "Must specify the array that is accessed");
1216 auto *NewArrayId
= isl_space_get_tuple_id(NewAccessSpace
, isl_dim_set
);
1217 auto *SAI
= static_cast<ScopArrayInfo
*>(isl_id_get_user(NewArrayId
));
1218 assert(SAI
&& "Must set a ScopArrayInfo");
1220 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1221 InvariantEquivClassTy
*EqClass
=
1222 getStatement()->getParent()->lookupInvariantEquivClass(
1225 "Access functions to indirect arrays must have an invariant and "
1226 "hoisted base pointer");
1229 // Check whether access dimensions correspond to number of dimensions of the
1231 auto Dims
= SAI
->getNumberOfDimensions();
1232 assert(isl_space_dim(NewAccessSpace
, isl_dim_set
) == Dims
&&
1233 "Access dims must match array dims");
1234 isl_space_free(NewAccessSpace
);
1235 isl_id_free(NewArrayId
);
1238 isl_map_free(NewAccessRelation
);
1239 NewAccessRelation
= NewAccess
;
1242 bool MemoryAccess::isLatestPartialAccess() const {
1243 isl::set StmtDom
= give(getStatement()->getDomain());
1244 isl::set AccDom
= give(isl_map_domain(getLatestAccessRelation()));
1246 return isl_set_is_subset(StmtDom
.keep(), AccDom
.keep()) == isl_bool_false
;
1249 //===----------------------------------------------------------------------===//
1251 __isl_give isl_map
*ScopStmt::getSchedule() const {
1252 isl_set
*Domain
= getDomain();
1253 if (isl_set_is_empty(Domain
)) {
1254 isl_set_free(Domain
);
1255 return isl_map_from_aff(
1256 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1258 auto *Schedule
= getParent()->getSchedule();
1260 isl_set_free(Domain
);
1263 Schedule
= isl_union_map_intersect_domain(
1264 Schedule
, isl_union_set_from_set(isl_set_copy(Domain
)));
1265 if (isl_union_map_is_empty(Schedule
)) {
1266 isl_set_free(Domain
);
1267 isl_union_map_free(Schedule
);
1268 return isl_map_from_aff(
1269 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1271 auto *M
= isl_map_from_union_map(Schedule
);
1272 M
= isl_map_coalesce(M
);
1273 M
= isl_map_gist_domain(M
, Domain
);
1274 M
= isl_map_coalesce(M
);
1278 __isl_give isl_pw_aff
*ScopStmt::getPwAff(const SCEV
*E
, bool NonNegative
) {
1279 PWACtx PWAC
= getParent()->getPwAff(E
, getEntryBlock(), NonNegative
);
1280 InvalidDomain
= isl_set_union(InvalidDomain
, PWAC
.second
);
1284 void ScopStmt::restrictDomain(__isl_take isl_set
*NewDomain
) {
1285 assert(isl_set_is_subset(NewDomain
, Domain
) &&
1286 "New domain is not a subset of old domain!");
1287 isl_set_free(Domain
);
1291 void ScopStmt::buildAccessRelations() {
1292 Scop
&S
= *getParent();
1293 for (MemoryAccess
*Access
: MemAccs
) {
1294 Type
*ElementType
= Access
->getElementType();
1297 if (Access
->isPHIKind())
1298 Ty
= MemoryKind::PHI
;
1299 else if (Access
->isExitPHIKind())
1300 Ty
= MemoryKind::ExitPHI
;
1301 else if (Access
->isValueKind())
1302 Ty
= MemoryKind::Value
;
1304 Ty
= MemoryKind::Array
;
1306 auto *SAI
= S
.getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1307 ElementType
, Access
->Sizes
, Ty
);
1308 Access
->buildAccessRelation(SAI
);
1312 MemoryAccess
*ScopStmt::lookupPHIReadOf(PHINode
*PHI
) const {
1313 for (auto *MA
: *this) {
1316 if (!MA
->isLatestAnyPHIKind())
1319 if (MA
->getAccessInstruction() == PHI
)
1325 void ScopStmt::addAccess(MemoryAccess
*Access
) {
1326 Instruction
*AccessInst
= Access
->getAccessInstruction();
1328 if (Access
->isArrayKind()) {
1329 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1330 MAL
.emplace_front(Access
);
1331 } else if (Access
->isValueKind() && Access
->isWrite()) {
1332 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1333 assert(Parent
.getStmtFor(AccessVal
) == this);
1334 assert(!ValueWrites
.lookup(AccessVal
));
1336 ValueWrites
[AccessVal
] = Access
;
1337 } else if (Access
->isValueKind() && Access
->isRead()) {
1338 Value
*AccessVal
= Access
->getAccessValue();
1339 assert(!ValueReads
.lookup(AccessVal
));
1341 ValueReads
[AccessVal
] = Access
;
1342 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1343 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1344 assert(!PHIWrites
.lookup(PHI
));
1346 PHIWrites
[PHI
] = Access
;
1349 MemAccs
.push_back(Access
);
1352 void ScopStmt::realignParams() {
1353 for (MemoryAccess
*MA
: *this)
1354 MA
->realignParams();
1356 auto *Ctx
= Parent
.getContext();
1357 InvalidDomain
= isl_set_gist_params(InvalidDomain
, isl_set_copy(Ctx
));
1358 Domain
= isl_set_gist_params(Domain
, Ctx
);
1361 /// Add @p BSet to the set @p User if @p BSet is bounded.
1362 static isl_stat
collectBoundedParts(__isl_take isl_basic_set
*BSet
,
1364 isl_set
**BoundedParts
= static_cast<isl_set
**>(User
);
1365 if (isl_basic_set_is_bounded(BSet
))
1366 *BoundedParts
= isl_set_union(*BoundedParts
, isl_set_from_basic_set(BSet
));
1368 isl_basic_set_free(BSet
);
1372 /// Return the bounded parts of @p S.
1373 static __isl_give isl_set
*collectBoundedParts(__isl_take isl_set
*S
) {
1374 isl_set
*BoundedParts
= isl_set_empty(isl_set_get_space(S
));
1375 isl_set_foreach_basic_set(S
, collectBoundedParts
, &BoundedParts
);
1377 return BoundedParts
;
1380 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1382 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1383 /// both with regards to the dimension @p Dim.
1384 static std::pair
<__isl_give isl_set
*, __isl_give isl_set
*>
1385 partitionSetParts(__isl_take isl_set
*S
, unsigned Dim
) {
1387 for (unsigned u
= 0, e
= isl_set_n_dim(S
); u
< e
; u
++)
1388 S
= isl_set_lower_bound_si(S
, isl_dim_set
, u
, 0);
1390 unsigned NumDimsS
= isl_set_n_dim(S
);
1391 isl_set
*OnlyDimS
= isl_set_copy(S
);
1393 // Remove dimensions that are greater than Dim as they are not interesting.
1394 assert(NumDimsS
>= Dim
+ 1);
1396 isl_set_project_out(OnlyDimS
, isl_dim_set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1398 // Create artificial parametric upper bounds for dimensions smaller than Dim
1399 // as we are not interested in them.
1400 OnlyDimS
= isl_set_insert_dims(OnlyDimS
, isl_dim_param
, 0, Dim
);
1401 for (unsigned u
= 0; u
< Dim
; u
++) {
1402 isl_constraint
*C
= isl_inequality_alloc(
1403 isl_local_space_from_space(isl_set_get_space(OnlyDimS
)));
1404 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, u
, 1);
1405 C
= isl_constraint_set_coefficient_si(C
, isl_dim_set
, u
, -1);
1406 OnlyDimS
= isl_set_add_constraint(OnlyDimS
, C
);
1409 // Collect all bounded parts of OnlyDimS.
1410 isl_set
*BoundedParts
= collectBoundedParts(OnlyDimS
);
1412 // Create the dimensions greater than Dim again.
1413 BoundedParts
= isl_set_insert_dims(BoundedParts
, isl_dim_set
, Dim
+ 1,
1414 NumDimsS
- Dim
- 1);
1416 // Remove the artificial upper bound parameters again.
1417 BoundedParts
= isl_set_remove_dims(BoundedParts
, isl_dim_param
, 0, Dim
);
1419 isl_set
*UnboundedParts
= isl_set_subtract(S
, isl_set_copy(BoundedParts
));
1420 return std::make_pair(UnboundedParts
, BoundedParts
);
1423 /// Set the dimension Ids from @p From in @p To.
1424 static __isl_give isl_set
*setDimensionIds(__isl_keep isl_set
*From
,
1425 __isl_take isl_set
*To
) {
1426 for (unsigned u
= 0, e
= isl_set_n_dim(From
); u
< e
; u
++) {
1427 isl_id
*DimId
= isl_set_get_dim_id(From
, isl_dim_set
, u
);
1428 To
= isl_set_set_dim_id(To
, isl_dim_set
, u
, DimId
);
1433 /// Create the conditions under which @p L @p Pred @p R is true.
1434 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1435 __isl_take isl_pw_aff
*L
,
1436 __isl_take isl_pw_aff
*R
) {
1438 case ICmpInst::ICMP_EQ
:
1439 return isl_pw_aff_eq_set(L
, R
);
1440 case ICmpInst::ICMP_NE
:
1441 return isl_pw_aff_ne_set(L
, R
);
1442 case ICmpInst::ICMP_SLT
:
1443 return isl_pw_aff_lt_set(L
, R
);
1444 case ICmpInst::ICMP_SLE
:
1445 return isl_pw_aff_le_set(L
, R
);
1446 case ICmpInst::ICMP_SGT
:
1447 return isl_pw_aff_gt_set(L
, R
);
1448 case ICmpInst::ICMP_SGE
:
1449 return isl_pw_aff_ge_set(L
, R
);
1450 case ICmpInst::ICMP_ULT
:
1451 return isl_pw_aff_lt_set(L
, R
);
1452 case ICmpInst::ICMP_UGT
:
1453 return isl_pw_aff_gt_set(L
, R
);
1454 case ICmpInst::ICMP_ULE
:
1455 return isl_pw_aff_le_set(L
, R
);
1456 case ICmpInst::ICMP_UGE
:
1457 return isl_pw_aff_ge_set(L
, R
);
1459 llvm_unreachable("Non integer predicate not supported");
1463 /// Create the conditions under which @p L @p Pred @p R is true.
1465 /// Helper function that will make sure the dimensions of the result have the
1466 /// same isl_id's as the @p Domain.
1467 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1468 __isl_take isl_pw_aff
*L
,
1469 __isl_take isl_pw_aff
*R
,
1470 __isl_keep isl_set
*Domain
) {
1471 isl_set
*ConsequenceCondSet
= buildConditionSet(Pred
, L
, R
);
1472 return setDimensionIds(Domain
, ConsequenceCondSet
);
1475 /// Build the conditions sets for the switch @p SI in the @p Domain.
1477 /// This will fill @p ConditionSets with the conditions under which control
1478 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1479 /// have as many elements as @p SI has successors.
1481 buildConditionSets(ScopStmt
&Stmt
, SwitchInst
*SI
, Loop
*L
,
1482 __isl_keep isl_set
*Domain
,
1483 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1485 Value
*Condition
= getConditionFromTerminator(SI
);
1486 assert(Condition
&& "No condition for switch");
1488 Scop
&S
= *Stmt
.getParent();
1489 ScalarEvolution
&SE
= *S
.getSE();
1490 isl_pw_aff
*LHS
, *RHS
;
1491 LHS
= Stmt
.getPwAff(SE
.getSCEVAtScope(Condition
, L
));
1493 unsigned NumSuccessors
= SI
->getNumSuccessors();
1494 ConditionSets
.resize(NumSuccessors
);
1495 for (auto &Case
: SI
->cases()) {
1496 unsigned Idx
= Case
.getSuccessorIndex();
1497 ConstantInt
*CaseValue
= Case
.getCaseValue();
1499 RHS
= Stmt
.getPwAff(SE
.getSCEV(CaseValue
));
1500 isl_set
*CaseConditionSet
=
1501 buildConditionSet(ICmpInst::ICMP_EQ
, isl_pw_aff_copy(LHS
), RHS
, Domain
);
1502 ConditionSets
[Idx
] = isl_set_coalesce(
1503 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1506 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1507 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1508 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1510 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1511 ConditionSets
[0] = setDimensionIds(
1512 Domain
, isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
));
1514 isl_pw_aff_free(LHS
);
1519 /// Build the conditions sets for the branch condition @p Condition in
1522 /// This will fill @p ConditionSets with the conditions under which control
1523 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1524 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1525 /// context under which @p Condition is true/false will be returned as the
1526 /// new elements of @p ConditionSets.
1528 buildConditionSets(ScopStmt
&Stmt
, Value
*Condition
, TerminatorInst
*TI
,
1529 Loop
*L
, __isl_keep isl_set
*Domain
,
1530 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1532 Scop
&S
= *Stmt
.getParent();
1533 isl_set
*ConsequenceCondSet
= nullptr;
1534 if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1535 if (CCond
->isZero())
1536 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1538 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1539 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1540 auto Opcode
= BinOp
->getOpcode();
1541 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1543 bool Valid
= buildConditionSets(Stmt
, BinOp
->getOperand(0), TI
, L
, Domain
,
1545 buildConditionSets(Stmt
, BinOp
->getOperand(1), TI
, L
, Domain
,
1548 while (!ConditionSets
.empty())
1549 isl_set_free(ConditionSets
.pop_back_val());
1553 isl_set_free(ConditionSets
.pop_back_val());
1554 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1555 isl_set_free(ConditionSets
.pop_back_val());
1556 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1558 if (Opcode
== Instruction::And
)
1559 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1561 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1563 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1565 "Condition of exiting branch was neither constant nor ICmp!");
1567 ScalarEvolution
&SE
= *S
.getSE();
1568 isl_pw_aff
*LHS
, *RHS
;
1569 // For unsigned comparisons we assumed the signed bit of neither operand
1570 // to be set. The comparison is equal to a signed comparison under this
1572 bool NonNeg
= ICond
->isUnsigned();
1573 LHS
= Stmt
.getPwAff(SE
.getSCEVAtScope(ICond
->getOperand(0), L
), NonNeg
);
1574 RHS
= Stmt
.getPwAff(SE
.getSCEVAtScope(ICond
->getOperand(1), L
), NonNeg
);
1575 ConsequenceCondSet
=
1576 buildConditionSet(ICond
->getPredicate(), LHS
, RHS
, Domain
);
1579 // If no terminator was given we are only looking for parameter constraints
1580 // under which @p Condition is true/false.
1582 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1583 assert(ConsequenceCondSet
);
1584 ConsequenceCondSet
= isl_set_coalesce(
1585 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1587 isl_set
*AlternativeCondSet
= nullptr;
1589 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1592 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1593 isl_set_copy(ConsequenceCondSet
));
1595 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1599 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc());
1600 isl_set_free(AlternativeCondSet
);
1601 isl_set_free(ConsequenceCondSet
);
1605 ConditionSets
.push_back(ConsequenceCondSet
);
1606 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1611 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1613 /// This will fill @p ConditionSets with the conditions under which control
1614 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1615 /// have as many elements as @p TI has successors.
1617 buildConditionSets(ScopStmt
&Stmt
, TerminatorInst
*TI
, Loop
*L
,
1618 __isl_keep isl_set
*Domain
,
1619 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1621 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1622 return buildConditionSets(Stmt
, SI
, L
, Domain
, ConditionSets
);
1624 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1626 if (TI
->getNumSuccessors() == 1) {
1627 ConditionSets
.push_back(isl_set_copy(Domain
));
1631 Value
*Condition
= getConditionFromTerminator(TI
);
1632 assert(Condition
&& "No condition for Terminator");
1634 return buildConditionSets(Stmt
, Condition
, TI
, L
, Domain
, ConditionSets
);
1637 void ScopStmt::buildDomain() {
1638 isl_id
*Id
= isl_id_alloc(getIslCtx(), getBaseName(), this);
1640 Domain
= getParent()->getDomainConditions(this);
1641 Domain
= isl_set_set_tuple_id(Domain
, Id
);
1644 void ScopStmt::collectSurroundingLoops() {
1645 for (unsigned u
= 0, e
= isl_set_n_dim(Domain
); u
< e
; u
++) {
1646 isl_id
*DimId
= isl_set_get_dim_id(Domain
, isl_dim_set
, u
);
1647 NestLoops
.push_back(static_cast<Loop
*>(isl_id_get_user(DimId
)));
1652 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, Loop
*SurroundingLoop
)
1653 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(nullptr),
1654 R(&R
), Build(nullptr), SurroundingLoop(SurroundingLoop
) {
1656 BaseName
= getIslCompatibleName(
1657 "Stmt", R
.getNameStr(), parent
.getNextStmtIdx(), "", UseInstructionNames
);
1660 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, Loop
*SurroundingLoop
,
1661 std::vector
<Instruction
*> Instructions
)
1662 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1663 R(nullptr), Build(nullptr), SurroundingLoop(SurroundingLoop
),
1664 Instructions(Instructions
) {
1666 BaseName
= getIslCompatibleName("Stmt", &bb
, parent
.getNextStmtIdx(), "",
1667 UseInstructionNames
);
1670 ScopStmt::ScopStmt(Scop
&parent
, __isl_take isl_map
*SourceRel
,
1671 __isl_take isl_map
*TargetRel
, __isl_take isl_set
*NewDomain
)
1672 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
), BB(nullptr),
1673 R(nullptr), Build(nullptr) {
1674 BaseName
= getIslCompatibleName("CopyStmt_", "",
1675 std::to_string(parent
.getCopyStmtsNum()));
1676 auto *Id
= isl_id_alloc(getIslCtx(), getBaseName(), this);
1677 Domain
= isl_set_set_tuple_id(Domain
, isl_id_copy(Id
));
1678 TargetRel
= isl_map_set_tuple_id(TargetRel
, isl_dim_in
, Id
);
1680 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1681 parent
.addAccessFunction(Access
);
1683 SourceRel
= isl_map_set_tuple_id(SourceRel
, isl_dim_in
, isl_id_copy(Id
));
1684 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1685 parent
.addAccessFunction(Access
);
1689 void ScopStmt::init(LoopInfo
&LI
) {
1690 assert(!Domain
&& "init must be called only once");
1693 collectSurroundingLoops();
1694 buildAccessRelations();
1696 if (DetectReductions
)
1697 checkForReductions();
1700 /// Collect loads which might form a reduction chain with @p StoreMA.
1702 /// Check if the stored value for @p StoreMA is a binary operator with one or
1703 /// two loads as operands. If the binary operand is commutative & associative,
1704 /// used only once (by @p StoreMA) and its load operands are also used only
1705 /// once, we have found a possible reduction chain. It starts at an operand
1706 /// load and includes the binary operator and @p StoreMA.
1708 /// Note: We allow only one use to ensure the load and binary operator cannot
1709 /// escape this block or into any other store except @p StoreMA.
1710 void ScopStmt::collectCandiateReductionLoads(
1711 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1712 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1716 // Skip if there is not one binary operator between the load and the store
1717 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1721 // Skip if the binary operators has multiple uses
1722 if (BinOp
->getNumUses() != 1)
1725 // Skip if the opcode of the binary operator is not commutative/associative
1726 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1729 // Skip if the binary operator is outside the current SCoP
1730 if (BinOp
->getParent() != Store
->getParent())
1733 // Skip if it is a multiplicative reduction and we disabled them
1734 if (DisableMultiplicativeReductions
&&
1735 (BinOp
->getOpcode() == Instruction::Mul
||
1736 BinOp
->getOpcode() == Instruction::FMul
))
1739 // Check the binary operator operands for a candidate load
1740 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1741 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1742 if (!PossibleLoad0
&& !PossibleLoad1
)
1745 // A load is only a candidate if it cannot escape (thus has only this use)
1746 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1747 if (PossibleLoad0
->getParent() == Store
->getParent())
1748 Loads
.push_back(&getArrayAccessFor(PossibleLoad0
));
1749 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1750 if (PossibleLoad1
->getParent() == Store
->getParent())
1751 Loads
.push_back(&getArrayAccessFor(PossibleLoad1
));
1754 /// Check for reductions in this ScopStmt.
1756 /// Iterate over all store memory accesses and check for valid binary reduction
1757 /// like chains. For all candidates we check if they have the same base address
1758 /// and there are no other accesses which overlap with them. The base address
1759 /// check rules out impossible reductions candidates early. The overlap check,
1760 /// together with the "only one user" check in collectCandiateReductionLoads,
1761 /// guarantees that none of the intermediate results will escape during
1762 /// execution of the loop nest. We basically check here that no other memory
1763 /// access can access the same memory as the potential reduction.
1764 void ScopStmt::checkForReductions() {
1765 SmallVector
<MemoryAccess
*, 2> Loads
;
1766 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1768 // First collect candidate load-store reduction chains by iterating over all
1769 // stores and collecting possible reduction loads.
1770 for (MemoryAccess
*StoreMA
: MemAccs
) {
1771 if (StoreMA
->isRead())
1775 collectCandiateReductionLoads(StoreMA
, Loads
);
1776 for (MemoryAccess
*LoadMA
: Loads
)
1777 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1780 // Then check each possible candidate pair.
1781 for (const auto &CandidatePair
: Candidates
) {
1783 isl_map
*LoadAccs
= CandidatePair
.first
->getAccessRelation();
1784 isl_map
*StoreAccs
= CandidatePair
.second
->getAccessRelation();
1786 // Skip those with obviously unequal base addresses.
1787 if (!isl_map_has_equal_space(LoadAccs
, StoreAccs
)) {
1788 isl_map_free(LoadAccs
);
1789 isl_map_free(StoreAccs
);
1793 // And check if the remaining for overlap with other memory accesses.
1794 isl_map
*AllAccsRel
= isl_map_union(LoadAccs
, StoreAccs
);
1795 AllAccsRel
= isl_map_intersect_domain(AllAccsRel
, getDomain());
1796 isl_set
*AllAccs
= isl_map_range(AllAccsRel
);
1798 for (MemoryAccess
*MA
: MemAccs
) {
1799 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1803 isl_map_intersect_domain(MA
->getAccessRelation(), getDomain());
1804 isl_set
*Accs
= isl_map_range(AccRel
);
1806 if (isl_set_has_equal_space(AllAccs
, Accs
)) {
1807 isl_set
*OverlapAccs
= isl_set_intersect(Accs
, isl_set_copy(AllAccs
));
1808 Valid
= Valid
&& isl_set_is_empty(OverlapAccs
);
1809 isl_set_free(OverlapAccs
);
1815 isl_set_free(AllAccs
);
1819 const LoadInst
*Load
=
1820 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1821 MemoryAccess::ReductionType RT
=
1822 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1824 // If no overlapping access was found we mark the load and store as
1826 CandidatePair
.first
->markAsReductionLike(RT
);
1827 CandidatePair
.second
->markAsReductionLike(RT
);
1831 std::string
ScopStmt::getDomainStr() const { return stringFromIslObj(Domain
); }
1833 std::string
ScopStmt::getScheduleStr() const {
1834 auto *S
= getSchedule();
1837 auto Str
= stringFromIslObj(S
);
1842 void ScopStmt::setInvalidDomain(__isl_take isl_set
*ID
) {
1843 isl_set_free(InvalidDomain
);
1847 BasicBlock
*ScopStmt::getEntryBlock() const {
1849 return getBasicBlock();
1850 return getRegion()->getEntry();
1853 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1855 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1857 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1858 return NestLoops
[Dimension
];
1861 isl_ctx
*ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1863 __isl_give isl_set
*ScopStmt::getDomain() const { return isl_set_copy(Domain
); }
1865 __isl_give isl_space
*ScopStmt::getDomainSpace() const {
1866 return isl_set_get_space(Domain
);
1869 __isl_give isl_id
*ScopStmt::getDomainId() const {
1870 return isl_set_get_tuple_id(Domain
);
1873 ScopStmt::~ScopStmt() {
1874 isl_set_free(Domain
);
1875 isl_set_free(InvalidDomain
);
1878 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1879 OS
<< "Instructions {\n";
1881 for (Instruction
*Inst
: Instructions
)
1882 OS
.indent(16) << *Inst
<< "\n";
1884 OS
.indent(16) << "}\n";
1887 void ScopStmt::print(raw_ostream
&OS
) const {
1888 OS
<< "\t" << getBaseName() << "\n";
1889 OS
.indent(12) << "Domain :=\n";
1892 OS
.indent(16) << getDomainStr() << ";\n";
1894 OS
.indent(16) << "n/a\n";
1896 OS
.indent(12) << "Schedule :=\n";
1899 OS
.indent(16) << getScheduleStr() << ";\n";
1901 OS
.indent(16) << "n/a\n";
1903 for (MemoryAccess
*Access
: MemAccs
)
1906 if (PollyPrintInstructions
)
1907 printInstructions(OS
.indent(12));
1910 void ScopStmt::dump() const { print(dbgs()); }
1912 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
1913 if (MA
->isRead() && MA
->isOriginalValueKind()) {
1914 bool Found
= ValueReads
.erase(MA
->getAccessValue());
1916 assert(Found
&& "Expected access data not found");
1918 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
1919 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
1921 assert(Found
&& "Expected access data not found");
1923 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
1924 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1926 assert(Found
&& "Expected access data not found");
1930 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
1931 // Remove the memory accesses from this statement together with all scalar
1932 // accesses that were caused by it. MemoryKind::Value READs have no access
1933 // instruction, hence would not be removed by this function. However, it is
1934 // only used for invariant LoadInst accesses, its arguments are always affine,
1935 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1936 // accesses to be removed.
1937 auto Predicate
= [&](MemoryAccess
*Acc
) {
1938 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
1940 for (auto *MA
: MemAccs
) {
1942 removeAccessData(MA
);
1944 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
1946 InstructionToAccess
.erase(MA
->getAccessInstruction());
1949 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
) {
1950 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
1951 assert(MAIt
!= MemAccs
.end());
1952 MemAccs
.erase(MAIt
);
1954 removeAccessData(MA
);
1956 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
1957 if (It
!= InstructionToAccess
.end()) {
1958 It
->second
.remove(MA
);
1959 if (It
->second
.empty())
1960 InstructionToAccess
.erase(MA
->getAccessInstruction());
1964 //===----------------------------------------------------------------------===//
1965 /// Scop class implement
1967 void Scop::setContext(__isl_take isl_set
*NewContext
) {
1968 NewContext
= isl_set_align_params(NewContext
, isl_set_get_space(Context
));
1969 isl_set_free(Context
);
1970 Context
= NewContext
;
1974 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1975 struct SCEVSensitiveParameterRewriter
1976 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
1977 ValueToValueMap
&VMap
;
1980 SCEVSensitiveParameterRewriter(ValueToValueMap
&VMap
, ScalarEvolution
&SE
)
1981 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
1983 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
1984 ValueToValueMap
&VMap
) {
1985 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
1986 return SSPR
.visit(E
);
1989 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
1990 auto *Start
= visit(E
->getStart());
1991 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
1992 visit(E
->getStepRecurrence(SE
)),
1993 E
->getLoop(), SCEV::FlagAnyWrap
);
1994 return SE
.getAddExpr(Start
, AddRec
);
1997 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
1998 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
1999 return SE
.getUnknown(NewValue
);
2004 /// Check whether we should remap a SCEV expression.
2005 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
2006 ValueToValueMap
&VMap
;
2007 bool FoundInside
= false;
2011 SCEVFindInsideScop(ValueToValueMap
&VMap
, ScalarEvolution
&SE
, Scop
*S
)
2012 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
2014 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
2015 ValueToValueMap
&VMap
, Scop
*S
) {
2016 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
2018 return SFIS
.FoundInside
;
2021 bool follow(const SCEV
*E
) {
2022 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
2023 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
2024 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
2025 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
2026 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
2028 return !FoundInside
;
2030 bool isDone() { return FoundInside
; }
2034 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) {
2035 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
2036 // doesn't like addition between an AddRec and an expression that
2037 // doesn't have a dominance relationship with it.)
2038 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
2042 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
2045 // This table of function names is used to translate parameter names in more
2046 // human-readable names. This makes it easier to interpret Polly analysis
2048 StringMap
<std::string
> KnownNames
= {
2049 {"_Z13get_global_idj", "global_id"},
2050 {"_Z12get_local_idj", "local_id"},
2051 {"_Z15get_global_sizej", "global_size"},
2052 {"_Z14get_local_sizej", "local_size"},
2053 {"_Z12get_work_dimv", "work_dim"},
2054 {"_Z17get_global_offsetj", "global_offset"},
2055 {"_Z12get_group_idj", "group_id"},
2056 {"_Z14get_num_groupsj", "num_groups"},
2059 static std::string
getCallParamName(CallInst
*Call
) {
2061 raw_string_ostream
OS(Result
);
2062 std::string Name
= Call
->getCalledFunction()->getName();
2064 auto Iterator
= KnownNames
.find(Name
);
2065 if (Iterator
!= KnownNames
.end())
2066 Name
= "__" + Iterator
->getValue();
2068 for (auto &Operand
: Call
->arg_operands()) {
2069 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
2070 OS
<< "_" << Op
->getValue();
2076 void Scop::createParameterId(const SCEV
*Parameter
) {
2077 assert(Parameters
.count(Parameter
));
2078 assert(!ParameterIds
.count(Parameter
));
2080 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
2082 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
2083 Value
*Val
= ValueParameter
->getValue();
2084 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
2086 if (Call
&& isConstCall(Call
)) {
2087 ParameterName
= getCallParamName(Call
);
2088 } else if (UseInstructionNames
) {
2089 // If this parameter references a specific Value and this value has a name
2090 // we use this name as it is likely to be unique and more useful than just
2093 ParameterName
= Val
->getName();
2094 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
2095 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
2096 if (LoadOrigin
->hasName()) {
2097 ParameterName
+= "_loaded_from_";
2099 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
2104 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2107 auto *Id
= isl_id_alloc(getIslCtx(), ParameterName
.c_str(),
2108 const_cast<void *>((const void *)Parameter
));
2109 ParameterIds
[Parameter
] = Id
;
2112 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2113 for (const SCEV
*Parameter
: NewParameters
) {
2114 // Normalize the SCEV to get the representing element for an invariant load.
2115 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2116 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2118 if (Parameters
.insert(Parameter
))
2119 createParameterId(Parameter
);
2123 __isl_give isl_id
*Scop::getIdForParam(const SCEV
*Parameter
) {
2124 // Normalize the SCEV to get the representing element for an invariant load.
2125 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2126 return isl_id_copy(ParameterIds
.lookup(Parameter
));
2129 __isl_give isl_set
*
2130 Scop::addNonEmptyDomainConstraints(__isl_take isl_set
*C
) const {
2131 isl_set
*DomainContext
= isl_union_set_params(getDomains());
2132 return isl_set_intersect_params(C
, DomainContext
);
2135 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2136 return DT
.dominates(BB
, getEntry());
2139 void Scop::addUserAssumptions(AssumptionCache
&AC
, DominatorTree
&DT
,
2141 auto &F
= getFunction();
2142 for (auto &Assumption
: AC
.assumptions()) {
2143 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2144 if (!CI
|| CI
->getNumArgOperands() != 1)
2147 bool InScop
= contains(CI
);
2148 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2151 auto *L
= LI
.getLoopFor(CI
->getParent());
2152 auto *Val
= CI
->getArgOperand(0);
2153 ParameterSetTy DetectedParams
;
2154 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2155 emitOptimizationRemarkAnalysis(F
.getContext(), DEBUG_TYPE
, F
,
2157 "Non-affine user assumption ignored.");
2161 // Collect all newly introduced parameters.
2162 ParameterSetTy NewParams
;
2163 for (auto *Param
: DetectedParams
) {
2164 Param
= extractConstantFactor(Param
, *SE
).second
;
2165 Param
= getRepresentingInvariantLoadSCEV(Param
);
2166 if (Parameters
.count(Param
))
2168 NewParams
.insert(Param
);
2171 SmallVector
<isl_set
*, 2> ConditionSets
;
2172 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2173 auto &Stmt
= InScop
? *getStmtFor(CI
->getParent()) : *Stmts
.begin();
2174 auto *Dom
= InScop
? getDomainConditions(&Stmt
) : isl_set_copy(Context
);
2175 bool Valid
= buildConditionSets(Stmt
, Val
, TI
, L
, Dom
, ConditionSets
);
2181 isl_set
*AssumptionCtx
= nullptr;
2183 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2184 isl_set_free(ConditionSets
[0]);
2186 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2187 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2190 // Project out newly introduced parameters as they are not otherwise useful.
2191 if (!NewParams
.empty()) {
2192 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2193 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2194 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2197 if (!NewParams
.count(Param
))
2201 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2205 emitOptimizationRemarkAnalysis(
2206 F
.getContext(), DEBUG_TYPE
, F
, CI
->getDebugLoc(),
2207 "Use user assumption: " + stringFromIslObj(AssumptionCtx
));
2208 Context
= isl_set_intersect(Context
, AssumptionCtx
);
2212 void Scop::addUserContext() {
2213 if (UserContextStr
.empty())
2216 isl_set
*UserContext
=
2217 isl_set_read_from_str(getIslCtx(), UserContextStr
.c_str());
2218 isl_space
*Space
= getParamSpace();
2219 if (isl_space_dim(Space
, isl_dim_param
) !=
2220 isl_set_dim(UserContext
, isl_dim_param
)) {
2221 auto SpaceStr
= isl_space_to_str(Space
);
2222 errs() << "Error: the context provided in -polly-context has not the same "
2223 << "number of dimensions than the computed context. Due to this "
2224 << "mismatch, the -polly-context option is ignored. Please provide "
2225 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2227 isl_set_free(UserContext
);
2228 isl_space_free(Space
);
2232 for (unsigned i
= 0; i
< isl_space_dim(Space
, isl_dim_param
); i
++) {
2233 auto *NameContext
= isl_set_get_dim_name(Context
, isl_dim_param
, i
);
2234 auto *NameUserContext
= isl_set_get_dim_name(UserContext
, isl_dim_param
, i
);
2236 if (strcmp(NameContext
, NameUserContext
) != 0) {
2237 auto SpaceStr
= isl_space_to_str(Space
);
2238 errs() << "Error: the name of dimension " << i
2239 << " provided in -polly-context "
2240 << "is '" << NameUserContext
<< "', but the name in the computed "
2241 << "context is '" << NameContext
2242 << "'. Due to this name mismatch, "
2243 << "the -polly-context option is ignored. Please provide "
2244 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2246 isl_set_free(UserContext
);
2247 isl_space_free(Space
);
2252 isl_set_set_dim_id(UserContext
, isl_dim_param
, i
,
2253 isl_space_get_dim_id(Space
, isl_dim_param
, i
));
2256 Context
= isl_set_intersect(Context
, UserContext
);
2257 isl_space_free(Space
);
2260 void Scop::buildInvariantEquivalenceClasses() {
2261 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2263 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2264 for (LoadInst
*LInst
: RIL
) {
2265 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2267 Type
*Ty
= LInst
->getType();
2268 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2270 InvEquivClassVMap
[LInst
] = ClassRep
;
2275 InvariantEquivClasses
.emplace_back(
2276 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2280 void Scop::buildContext() {
2281 isl_space
*Space
= isl_space_params_alloc(getIslCtx(), 0);
2282 Context
= isl_set_universe(isl_space_copy(Space
));
2283 InvalidContext
= isl_set_empty(isl_space_copy(Space
));
2284 AssumedContext
= isl_set_universe(Space
);
2287 void Scop::addParameterBounds() {
2289 for (auto *Parameter
: Parameters
) {
2290 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2292 addRangeBoundsToSet(give(Context
), SRange
, PDim
++, isl::dim::param
)
2297 // We use the outermost dimension to generate GPU transfers for Fortran arrays
2298 // even when the array bounds are not known statically. To do so, we need the
2299 // outermost dimension information. We add this into the context so that the
2300 // outermost dimension is available during codegen.
2301 // We currently do not care about dimensions other than the outermost
2302 // dimension since it doesn't affect transfers.
2303 static isl_set
*addFortranArrayOutermostDimParams(__isl_give isl_set
*Context
,
2304 Scop::array_range Arrays
) {
2306 std::vector
<isl_id
*> OutermostSizeIds
;
2307 for (auto Array
: Arrays
) {
2308 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2309 // for its outermost dimension. Fortran arrays will have this since the
2310 // outermost dimension size can be picked up from their runtime description.
2311 // TODO: actually need to check if it has a FAD, but for now this works.
2312 if (Array
->getNumberOfDimensions() > 0) {
2313 isl_pw_aff
*PwAff
= Array
->getDimensionSizePw(0);
2317 isl_id
*Id
= isl_pw_aff_get_dim_id(PwAff
, isl_dim_param
, 0);
2318 isl_pw_aff_free(PwAff
);
2319 assert(Id
&& "Invalid Id for PwAff expression in Fortran array");
2320 OutermostSizeIds
.push_back(Id
);
2324 const int NumTrueParams
= isl_set_dim(Context
, isl_dim_param
);
2325 Context
= isl_set_add_dims(Context
, isl_dim_param
, OutermostSizeIds
.size());
2327 for (size_t i
= 0; i
< OutermostSizeIds
.size(); i
++) {
2328 Context
= isl_set_set_dim_id(Context
, isl_dim_param
, NumTrueParams
+ i
,
2329 OutermostSizeIds
[i
]);
2331 isl_set_lower_bound_si(Context
, isl_dim_param
, NumTrueParams
+ i
, 0);
2337 void Scop::realignParams() {
2338 if (PollyIgnoreParamBounds
)
2341 // Add all parameters into a common model.
2342 isl_space
*Space
= isl_space_params_alloc(getIslCtx(), ParameterIds
.size());
2345 for (const auto *Parameter
: Parameters
) {
2346 isl_id
*id
= getIdForParam(Parameter
);
2347 Space
= isl_space_set_dim_id(Space
, isl_dim_param
, PDim
++, id
);
2350 // Align the parameters of all data structures to the model.
2351 Context
= isl_set_align_params(Context
, Space
);
2353 // Add the outermost dimension of the Fortran arrays into the Context.
2354 // See the description of the function for more information.
2355 Context
= addFortranArrayOutermostDimParams(Context
, arrays());
2357 // As all parameters are known add bounds to them.
2358 addParameterBounds();
2360 for (ScopStmt
&Stmt
: *this)
2361 Stmt
.realignParams();
2362 // Simplify the schedule according to the context too.
2363 Schedule
= isl_schedule_gist_domain_params(Schedule
, getContext());
2366 static __isl_give isl_set
*
2367 simplifyAssumptionContext(__isl_take isl_set
*AssumptionContext
,
2369 // If we have modeled all blocks in the SCoP that have side effects we can
2370 // simplify the context with the constraints that are needed for anything to
2371 // be executed at all. However, if we have error blocks in the SCoP we already
2372 // assumed some parameter combinations cannot occur and removed them from the
2373 // domains, thus we cannot use the remaining domain to simplify the
2375 if (!S
.hasErrorBlock()) {
2376 isl_set
*DomainParameters
= isl_union_set_params(S
.getDomains());
2378 isl_set_gist_params(AssumptionContext
, DomainParameters
);
2381 AssumptionContext
= isl_set_gist_params(AssumptionContext
, S
.getContext());
2382 return AssumptionContext
;
2385 void Scop::simplifyContexts() {
2386 // The parameter constraints of the iteration domains give us a set of
2387 // constraints that need to hold for all cases where at least a single
2388 // statement iteration is executed in the whole scop. We now simplify the
2389 // assumed context under the assumption that such constraints hold and at
2390 // least a single statement iteration is executed. For cases where no
2391 // statement instances are executed, the assumptions we have taken about
2392 // the executed code do not matter and can be changed.
2394 // WARNING: This only holds if the assumptions we have taken do not reduce
2395 // the set of statement instances that are executed. Otherwise we
2396 // may run into a case where the iteration domains suggest that
2397 // for a certain set of parameter constraints no code is executed,
2398 // but in the original program some computation would have been
2399 // performed. In such a case, modifying the run-time conditions and
2400 // possibly influencing the run-time check may cause certain scops
2401 // to not be executed.
2405 // When delinearizing the following code:
2407 // for (long i = 0; i < 100; i++)
2408 // for (long j = 0; j < m; j++)
2411 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2412 // otherwise we would access out of bound data. Now, knowing that code is
2413 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2414 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2415 InvalidContext
= isl_set_align_params(InvalidContext
, getParamSpace());
2418 /// Add the minimal/maximal access in @p Set to @p User.
2420 buildMinMaxAccess(isl::set Set
, Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2421 isl::pw_multi_aff MinPMA
, MaxPMA
;
2422 isl::pw_aff LastDimAff
;
2425 isl::ctx Ctx
= Set
.get_ctx();
2427 Set
= Set
.remove_divs();
2429 if (isl_set_n_basic_set(Set
.get()) >= MaxDisjunctsInDomain
)
2430 return isl::stat::error
;
2432 // Restrict the number of parameters involved in the access as the lexmin/
2433 // lexmax computation will take too long if this number is high.
2435 // Experiments with a simple test case using an i7 4800MQ:
2437 // #Parameters involved | Time (in sec)
2446 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2447 unsigned InvolvedParams
= 0;
2448 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2449 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2452 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2453 return isl::stat::error
;
2456 if (isl_set_n_basic_set(Set
.get()) > RunTimeChecksMaxAccessDisjuncts
)
2457 return isl::stat::error
;
2459 MinPMA
= Set
.lexmin_pw_multi_aff();
2460 MaxPMA
= Set
.lexmax_pw_multi_aff();
2462 if (isl_ctx_last_error(Ctx
.get()) == isl_error_quota
)
2463 return isl::stat::error
;
2465 MinPMA
= MinPMA
.coalesce();
2466 MaxPMA
= MaxPMA
.coalesce();
2468 // Adjust the last dimension of the maximal access by one as we want to
2469 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2470 // we test during code generation might now point after the end of the
2471 // allocated array but we will never dereference it anyway.
2472 assert(MaxPMA
.dim(isl::dim::out
) && "Assumed at least one output dimension");
2473 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2474 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2475 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2476 OneAff
= OneAff
.add_constant_si(1);
2477 LastDimAff
= LastDimAff
.add(OneAff
);
2478 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2480 MinMaxAccesses
.push_back(std::make_pair(MinPMA
.copy(), MaxPMA
.copy()));
2482 return isl::stat::ok
;
2485 static __isl_give isl_set
*getAccessDomain(MemoryAccess
*MA
) {
2486 isl_set
*Domain
= MA
->getStatement()->getDomain();
2487 Domain
= isl_set_project_out(Domain
, isl_dim_set
, 0, isl_set_n_dim(Domain
));
2488 return isl_set_reset_tuple_id(Domain
);
2491 /// Wrapper function to calculate minimal/maximal accesses to each array.
2492 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2493 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2495 MinMaxAccesses
.reserve(AliasGroup
.size());
2497 isl::union_set Domains
= give(S
.getDomains());
2498 isl::union_map Accesses
= isl::union_map::empty(give(S
.getParamSpace()));
2500 for (MemoryAccess
*MA
: AliasGroup
)
2501 Accesses
= Accesses
.add_map(give(MA
->getAccessRelation()));
2503 Accesses
= Accesses
.intersect_domain(Domains
);
2504 isl::union_set Locations
= Accesses
.range();
2505 Locations
= Locations
.coalesce();
2506 Locations
= Locations
.detect_equalities();
2508 auto Lambda
= [&MinMaxAccesses
, &S
](isl::set Set
) -> isl::stat
{
2509 return buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2511 return Locations
.foreach_set(Lambda
) == isl::stat::ok
;
2514 /// Helper to treat non-affine regions and basic blocks the same.
2518 /// Return the block that is the representing block for @p RN.
2519 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2520 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2521 : RN
->getNodeAs
<BasicBlock
>();
2524 /// Return the @p idx'th block that is executed after @p RN.
2525 static inline BasicBlock
*
2526 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2527 if (RN
->isSubRegion()) {
2529 return RN
->getNodeAs
<Region
>()->getExit();
2531 return TI
->getSuccessor(idx
);
2534 /// Return the smallest loop surrounding @p RN.
2535 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2536 if (!RN
->isSubRegion()) {
2537 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2538 Loop
*L
= LI
.getLoopFor(BB
);
2540 // Unreachable statements are not considered to belong to a LLVM loop, as
2541 // they are not part of an actual loop in the control flow graph.
2542 // Nevertheless, we handle certain unreachable statements that are common
2543 // when modeling run-time bounds checks as being part of the loop to be
2544 // able to model them and to later eliminate the run-time bounds checks.
2546 // Specifically, for basic blocks that terminate in an unreachable and
2547 // where the immediate predecessor is part of a loop, we assume these
2548 // basic blocks belong to the loop the predecessor belongs to. This
2549 // allows us to model the following code.
2551 // for (i = 0; i < N; i++) {
2553 // abort(); <- this abort might be translated to an
2558 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2559 L
= LI
.getLoopFor(BB
->getPrevNode());
2563 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2564 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2565 while (L
&& NonAffineSubRegion
->contains(L
))
2566 L
= L
->getParentLoop();
2570 /// Get the number of blocks in @p L.
2572 /// The number of blocks in a loop are the number of basic blocks actually
2573 /// belonging to the loop, as well as all single basic blocks that the loop
2574 /// exits to and which terminate in an unreachable instruction. We do not
2575 /// allow such basic blocks in the exit of a scop, hence they belong to the
2576 /// scop and represent run-time conditions which we want to model and
2577 /// subsequently speculate away.
2579 /// @see getRegionNodeLoop for additional details.
2580 unsigned getNumBlocksInLoop(Loop
*L
) {
2581 unsigned NumBlocks
= L
->getNumBlocks();
2582 SmallVector
<llvm::BasicBlock
*, 4> ExitBlocks
;
2583 L
->getExitBlocks(ExitBlocks
);
2585 for (auto ExitBlock
: ExitBlocks
) {
2586 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2592 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2593 if (!RN
->isSubRegion())
2596 Region
*R
= RN
->getNodeAs
<Region
>();
2597 return std::distance(R
->block_begin(), R
->block_end());
2600 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2601 const DominatorTree
&DT
) {
2602 if (!RN
->isSubRegion())
2603 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2604 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2605 if (isErrorBlock(*BB
, R
, LI
, DT
))
2612 static inline __isl_give isl_set
*addDomainDimId(__isl_take isl_set
*Domain
,
2613 unsigned Dim
, Loop
*L
) {
2614 Domain
= isl_set_lower_bound_si(Domain
, isl_dim_set
, Dim
, -1);
2616 isl_id_alloc(isl_set_get_ctx(Domain
), nullptr, static_cast<void *>(L
));
2617 return isl_set_set_dim_id(Domain
, isl_dim_set
, Dim
, DimId
);
2620 __isl_give isl_set
*Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2621 return getDomainConditions(Stmt
->getEntryBlock());
2624 __isl_give isl_set
*Scop::getDomainConditions(BasicBlock
*BB
) const {
2625 auto DIt
= DomainMap
.find(BB
);
2626 if (DIt
!= DomainMap
.end())
2627 return isl_set_copy(DIt
->getSecond());
2629 auto &RI
= *R
.getRegionInfo();
2630 auto *BBR
= RI
.getRegionFor(BB
);
2631 while (BBR
->getEntry() == BB
)
2632 BBR
= BBR
->getParent();
2633 return getDomainConditions(BBR
->getEntry());
2636 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
) {
2638 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2639 auto *EntryBB
= R
->getEntry();
2640 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2641 int LD
= getRelativeLoopDepth(L
);
2642 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD
+ 1));
2645 S
= addDomainDimId(S
, LD
+ 1, L
);
2646 L
= L
->getParentLoop();
2649 // Initialize the invalid domain.
2650 auto *EntryStmt
= getStmtFor(EntryBB
);
2651 EntryStmt
->setInvalidDomain(isl_set_empty(isl_set_get_space(S
)));
2653 DomainMap
[EntryBB
] = S
;
2655 if (IsOnlyNonAffineRegion
)
2656 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2658 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
))
2661 if (!propagateDomainConstraints(R
, DT
, LI
))
2664 // Error blocks and blocks dominated by them have been assumed to never be
2665 // executed. Representing them in the Scop does not add any value. In fact,
2666 // it is likely to cause issues during construction of the ScopStmts. The
2667 // contents of error blocks have not been verified to be expressible and
2668 // will cause problems when building up a ScopStmt for them.
2669 // Furthermore, basic blocks dominated by error blocks may reference
2670 // instructions in the error block which, if the error block is not modeled,
2671 // can themselves not be constructed properly. To this end we will replace
2672 // the domains of error blocks and those only reachable via error blocks
2673 // with an empty set. Additionally, we will record for each block under which
2674 // parameter combination it would be reached via an error block in its
2675 // InvalidDomain. This information is needed during load hoisting.
2676 if (!propagateInvalidStmtDomains(R
, DT
, LI
))
2682 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2683 /// to be compatible to domains constructed for loop @p NewL.
2685 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2686 /// edge from @p OldL to @p NewL.
2687 static __isl_give isl_set
*adjustDomainDimensions(Scop
&S
,
2688 __isl_take isl_set
*Dom
,
2689 Loop
*OldL
, Loop
*NewL
) {
2691 // If the loops are the same there is nothing to do.
2695 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2696 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2697 // If both loops are non-affine loops there is nothing to do.
2698 if (OldDepth
== -1 && NewDepth
== -1)
2701 // Distinguish three cases:
2702 // 1) The depth is the same but the loops are not.
2703 // => One loop was left one was entered.
2704 // 2) The depth increased from OldL to NewL.
2705 // => One loop was entered, none was left.
2706 // 3) The depth decreased from OldL to NewL.
2707 // => Loops were left were difference of the depths defines how many.
2708 if (OldDepth
== NewDepth
) {
2709 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2710 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NewDepth
, 1);
2711 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2712 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2713 } else if (OldDepth
< NewDepth
) {
2714 assert(OldDepth
+ 1 == NewDepth
);
2715 auto &R
= S
.getRegion();
2717 assert(NewL
->getParentLoop() == OldL
||
2718 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2719 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2720 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2722 assert(OldDepth
> NewDepth
);
2723 int Diff
= OldDepth
- NewDepth
;
2724 int NumDim
= isl_set_n_dim(Dom
);
2725 assert(NumDim
>= Diff
);
2726 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NumDim
- Diff
, Diff
);
2732 bool Scop::propagateInvalidStmtDomains(Region
*R
, DominatorTree
&DT
,
2734 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2735 for (auto *RN
: RTraversal
) {
2737 // Recurse for affine subregions but go on for basic blocks and non-affine
2739 if (RN
->isSubRegion()) {
2740 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2741 if (!isNonAffineSubRegion(SubRegion
)) {
2742 propagateInvalidStmtDomains(SubRegion
, DT
, LI
);
2747 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2748 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2749 ScopStmt
*Stmt
= getStmtFor(BB
);
2750 isl_set
*&Domain
= DomainMap
[BB
];
2751 assert(Domain
&& "Cannot propagate a nullptr");
2753 auto *InvalidDomain
= Stmt
->getInvalidDomain();
2754 bool IsInvalidBlock
=
2755 ContainsErrorBlock
|| isl_set_is_subset(Domain
, InvalidDomain
);
2757 if (!IsInvalidBlock
) {
2758 InvalidDomain
= isl_set_intersect(InvalidDomain
, isl_set_copy(Domain
));
2760 isl_set_free(InvalidDomain
);
2761 InvalidDomain
= Domain
;
2762 isl_set
*DomPar
= isl_set_params(isl_set_copy(Domain
));
2763 recordAssumption(ERRORBLOCK
, DomPar
, BB
->getTerminator()->getDebugLoc(),
2768 if (isl_set_is_empty(InvalidDomain
)) {
2769 Stmt
->setInvalidDomain(InvalidDomain
);
2773 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2774 auto *TI
= BB
->getTerminator();
2775 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2776 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2777 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2778 auto *SuccStmt
= getStmtFor(SuccBB
);
2780 // Skip successors outside the SCoP.
2785 if (DT
.dominates(SuccBB
, BB
))
2788 auto *SuccBBLoop
= SuccStmt
->getSurroundingLoop();
2789 auto *AdjustedInvalidDomain
= adjustDomainDimensions(
2790 *this, isl_set_copy(InvalidDomain
), BBLoop
, SuccBBLoop
);
2791 auto *SuccInvalidDomain
= SuccStmt
->getInvalidDomain();
2793 isl_set_union(SuccInvalidDomain
, AdjustedInvalidDomain
);
2794 SuccInvalidDomain
= isl_set_coalesce(SuccInvalidDomain
);
2795 unsigned NumConjucts
= isl_set_n_basic_set(SuccInvalidDomain
);
2796 SuccStmt
->setInvalidDomain(SuccInvalidDomain
);
2798 // Check if the maximal number of domain disjunctions was reached.
2799 // In case this happens we will bail.
2800 if (NumConjucts
< MaxDisjunctsInDomain
)
2803 isl_set_free(InvalidDomain
);
2804 invalidate(COMPLEXITY
, TI
->getDebugLoc());
2808 Stmt
->setInvalidDomain(InvalidDomain
);
2814 void Scop::propagateDomainConstraintsToRegionExit(
2815 BasicBlock
*BB
, Loop
*BBLoop
,
2816 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
) {
2818 // Check if the block @p BB is the entry of a region. If so we propagate it's
2819 // domain to the exit block of the region. Otherwise we are done.
2820 auto *RI
= R
.getRegionInfo();
2821 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2822 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2823 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2826 // Do not propagate the domain if there is a loop backedge inside the region
2827 // that would prevent the exit block from being executed.
2829 while (L
&& contains(L
)) {
2830 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2831 BBLoop
->getLoopLatches(LatchBBs
);
2832 for (auto *LatchBB
: LatchBBs
)
2833 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2835 L
= L
->getParentLoop();
2838 auto *Domain
= DomainMap
[BB
];
2839 assert(Domain
&& "Cannot propagate a nullptr");
2841 auto *ExitStmt
= getStmtFor(ExitBB
);
2842 auto *ExitBBLoop
= ExitStmt
->getSurroundingLoop();
2844 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2845 // adjust the domain before we can propagate it.
2846 auto *AdjustedDomain
=
2847 adjustDomainDimensions(*this, isl_set_copy(Domain
), BBLoop
, ExitBBLoop
);
2848 auto *&ExitDomain
= DomainMap
[ExitBB
];
2850 // If the exit domain is not yet created we set it otherwise we "add" the
2853 ExitDomain
? isl_set_union(AdjustedDomain
, ExitDomain
) : AdjustedDomain
;
2855 // Initialize the invalid domain.
2856 ExitStmt
->setInvalidDomain(isl_set_empty(isl_set_get_space(ExitDomain
)));
2858 FinishedExitBlocks
.insert(ExitBB
);
2861 bool Scop::buildDomainsWithBranchConstraints(Region
*R
, DominatorTree
&DT
,
2863 // To create the domain for each block in R we iterate over all blocks and
2864 // subregions in R and propagate the conditions under which the current region
2865 // element is executed. To this end we iterate in reverse post order over R as
2866 // it ensures that we first visit all predecessors of a region node (either a
2867 // basic block or a subregion) before we visit the region node itself.
2868 // Initially, only the domain for the SCoP region entry block is set and from
2869 // there we propagate the current domain to all successors, however we add the
2870 // condition that the successor is actually executed next.
2871 // As we are only interested in non-loop carried constraints here we can
2872 // simply skip loop back edges.
2874 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2875 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2876 for (auto *RN
: RTraversal
) {
2878 // Recurse for affine subregions but go on for basic blocks and non-affine
2880 if (RN
->isSubRegion()) {
2881 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2882 if (!isNonAffineSubRegion(SubRegion
)) {
2883 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
))
2889 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
2890 HasErrorBlock
= true;
2892 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2893 TerminatorInst
*TI
= BB
->getTerminator();
2895 if (isa
<UnreachableInst
>(TI
))
2898 isl_set
*Domain
= DomainMap
.lookup(BB
);
2901 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
));
2903 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2904 // Propagate the domain from BB directly to blocks that have a superset
2905 // domain, at the moment only region exit nodes of regions that start in BB.
2906 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
);
2908 // If all successors of BB have been set a domain through the propagation
2909 // above we do not need to build condition sets but can just skip this
2910 // block. However, it is important to note that this is a local property
2911 // with regards to the region @p R. To this end FinishedExitBlocks is a
2913 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
2914 return FinishedExitBlocks
.count(SuccBB
);
2916 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
2919 // Build the condition sets for the successor nodes of the current region
2920 // node. If it is a non-affine subregion we will always execute the single
2921 // exit node, hence the single entry node domain is the condition set. For
2922 // basic blocks we use the helper function buildConditionSets.
2923 SmallVector
<isl_set
*, 8> ConditionSets
;
2924 if (RN
->isSubRegion())
2925 ConditionSets
.push_back(isl_set_copy(Domain
));
2926 else if (!buildConditionSets(*getStmtFor(BB
), TI
, BBLoop
, Domain
,
2930 // Now iterate over the successors and set their initial domain based on
2931 // their condition set. We skip back edges here and have to be careful when
2932 // we leave a loop not to keep constraints over a dimension that doesn't
2934 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
2935 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
2936 isl_set
*CondSet
= ConditionSets
[u
];
2937 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2939 auto *SuccStmt
= getStmtFor(SuccBB
);
2940 // Skip blocks outside the region.
2942 isl_set_free(CondSet
);
2946 // If we propagate the domain of some block to "SuccBB" we do not have to
2947 // adjust the domain.
2948 if (FinishedExitBlocks
.count(SuccBB
)) {
2949 isl_set_free(CondSet
);
2954 if (DT
.dominates(SuccBB
, BB
)) {
2955 isl_set_free(CondSet
);
2959 auto *SuccBBLoop
= SuccStmt
->getSurroundingLoop();
2960 CondSet
= adjustDomainDimensions(*this, CondSet
, BBLoop
, SuccBBLoop
);
2962 // Set the domain for the successor or merge it with an existing domain in
2963 // case there are multiple paths (without loop back edges) to the
2965 isl_set
*&SuccDomain
= DomainMap
[SuccBB
];
2968 SuccDomain
= isl_set_coalesce(isl_set_union(SuccDomain
, CondSet
));
2970 // Initialize the invalid domain.
2971 SuccStmt
->setInvalidDomain(isl_set_empty(isl_set_get_space(CondSet
)));
2972 SuccDomain
= CondSet
;
2975 SuccDomain
= isl_set_detect_equalities(SuccDomain
);
2977 // Check if the maximal number of domain disjunctions was reached.
2978 // In case this happens we will clean up and bail.
2979 if (isl_set_n_basic_set(SuccDomain
) < MaxDisjunctsInDomain
)
2982 invalidate(COMPLEXITY
, DebugLoc());
2983 while (++u
< ConditionSets
.size())
2984 isl_set_free(ConditionSets
[u
]);
2992 __isl_give isl_set
*
2993 Scop::getPredecessorDomainConstraints(BasicBlock
*BB
,
2994 __isl_keep isl_set
*Domain
,
2995 DominatorTree
&DT
, LoopInfo
&LI
) {
2996 // If @p BB is the ScopEntry we are done
2997 if (R
.getEntry() == BB
)
2998 return isl_set_universe(isl_set_get_space(Domain
));
3000 // The region info of this function.
3001 auto &RI
= *R
.getRegionInfo();
3003 auto *BBLoop
= getStmtFor(BB
)->getSurroundingLoop();
3005 // A domain to collect all predecessor domains, thus all conditions under
3006 // which the block is executed. To this end we start with the empty domain.
3007 isl_set
*PredDom
= isl_set_empty(isl_set_get_space(Domain
));
3009 // Set of regions of which the entry block domain has been propagated to BB.
3010 // all predecessors inside any of the regions can be skipped.
3011 SmallSet
<Region
*, 8> PropagatedRegions
;
3013 for (auto *PredBB
: predecessors(BB
)) {
3015 if (DT
.dominates(BB
, PredBB
))
3018 // If the predecessor is in a region we used for propagation we can skip it.
3019 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
3020 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
3025 // Check if there is a valid region we can use for propagation, thus look
3026 // for a region that contains the predecessor and has @p BB as exit block.
3027 auto *PredR
= RI
.getRegionFor(PredBB
);
3028 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
3031 // If a valid region for propagation was found use the entry of that region
3032 // for propagation, otherwise the PredBB directly.
3033 if (PredR
->getExit() == BB
) {
3034 PredBB
= PredR
->getEntry();
3035 PropagatedRegions
.insert(PredR
);
3038 auto *PredBBDom
= getDomainConditions(PredBB
);
3039 auto *PredBBLoop
= getStmtFor(PredBB
)->getSurroundingLoop();
3040 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
3042 PredDom
= isl_set_union(PredDom
, PredBBDom
);
3048 bool Scop::propagateDomainConstraints(Region
*R
, DominatorTree
&DT
,
3050 // Iterate over the region R and propagate the domain constrains from the
3051 // predecessors to the current node. In contrast to the
3052 // buildDomainsWithBranchConstraints function, this one will pull the domain
3053 // information from the predecessors instead of pushing it to the successors.
3054 // Additionally, we assume the domains to be already present in the domain
3055 // map here. However, we iterate again in reverse post order so we know all
3056 // predecessors have been visited before a block or non-affine subregion is
3059 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
3060 for (auto *RN
: RTraversal
) {
3062 // Recurse for affine subregions but go on for basic blocks and non-affine
3064 if (RN
->isSubRegion()) {
3065 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
3066 if (!isNonAffineSubRegion(SubRegion
)) {
3067 if (!propagateDomainConstraints(SubRegion
, DT
, LI
))
3073 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
3074 isl_set
*&Domain
= DomainMap
[BB
];
3077 // Under the union of all predecessor conditions we can reach this block.
3078 auto *PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
3079 Domain
= isl_set_coalesce(isl_set_intersect(Domain
, PredDom
));
3080 Domain
= isl_set_align_params(Domain
, getParamSpace());
3082 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
3083 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
3084 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
))
3091 /// Create a map to map from a given iteration to a subsequent iteration.
3093 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3094 /// is incremented by one and all other dimensions are equal, e.g.,
3095 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3097 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3098 static __isl_give isl_map
*
3099 createNextIterationMap(__isl_take isl_space
*SetSpace
, unsigned Dim
) {
3100 auto *MapSpace
= isl_space_map_from_set(SetSpace
);
3101 auto *NextIterationMap
= isl_map_universe(isl_space_copy(MapSpace
));
3102 for (unsigned u
= 0; u
< isl_map_dim(NextIterationMap
, isl_dim_in
); u
++)
3105 isl_map_equate(NextIterationMap
, isl_dim_in
, u
, isl_dim_out
, u
);
3106 auto *C
= isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace
));
3107 C
= isl_constraint_set_constant_si(C
, 1);
3108 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, Dim
, 1);
3109 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, Dim
, -1);
3110 NextIterationMap
= isl_map_add_constraint(NextIterationMap
, C
);
3111 return NextIterationMap
;
3114 bool Scop::addLoopBoundsToHeaderDomain(Loop
*L
, LoopInfo
&LI
) {
3115 int LoopDepth
= getRelativeLoopDepth(L
);
3116 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
3118 BasicBlock
*HeaderBB
= L
->getHeader();
3119 assert(DomainMap
.count(HeaderBB
));
3120 isl_set
*&HeaderBBDom
= DomainMap
[HeaderBB
];
3122 isl_map
*NextIterationMap
=
3123 createNextIterationMap(isl_set_get_space(HeaderBBDom
), LoopDepth
);
3125 isl_set
*UnionBackedgeCondition
=
3126 isl_set_empty(isl_set_get_space(HeaderBBDom
));
3128 SmallVector
<llvm::BasicBlock
*, 4> LatchBlocks
;
3129 L
->getLoopLatches(LatchBlocks
);
3131 for (BasicBlock
*LatchBB
: LatchBlocks
) {
3133 // If the latch is only reachable via error statements we skip it.
3134 isl_set
*LatchBBDom
= DomainMap
.lookup(LatchBB
);
3138 isl_set
*BackedgeCondition
= nullptr;
3140 TerminatorInst
*TI
= LatchBB
->getTerminator();
3141 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
3142 assert(BI
&& "Only branch instructions allowed in loop latches");
3144 if (BI
->isUnconditional())
3145 BackedgeCondition
= isl_set_copy(LatchBBDom
);
3147 SmallVector
<isl_set
*, 8> ConditionSets
;
3148 int idx
= BI
->getSuccessor(0) != HeaderBB
;
3149 if (!buildConditionSets(*getStmtFor(LatchBB
), TI
, L
, LatchBBDom
,
3151 isl_map_free(NextIterationMap
);
3152 isl_set_free(UnionBackedgeCondition
);
3156 // Free the non back edge condition set as we do not need it.
3157 isl_set_free(ConditionSets
[1 - idx
]);
3159 BackedgeCondition
= ConditionSets
[idx
];
3162 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3163 assert(LatchLoopDepth
>= LoopDepth
);
3165 isl_set_project_out(BackedgeCondition
, isl_dim_set
, LoopDepth
+ 1,
3166 LatchLoopDepth
- LoopDepth
);
3167 UnionBackedgeCondition
=
3168 isl_set_union(UnionBackedgeCondition
, BackedgeCondition
);
3171 isl_map
*ForwardMap
= isl_map_lex_le(isl_set_get_space(HeaderBBDom
));
3172 for (int i
= 0; i
< LoopDepth
; i
++)
3173 ForwardMap
= isl_map_equate(ForwardMap
, isl_dim_in
, i
, isl_dim_out
, i
);
3175 isl_set
*UnionBackedgeConditionComplement
=
3176 isl_set_complement(UnionBackedgeCondition
);
3177 UnionBackedgeConditionComplement
= isl_set_lower_bound_si(
3178 UnionBackedgeConditionComplement
, isl_dim_set
, LoopDepth
, 0);
3179 UnionBackedgeConditionComplement
=
3180 isl_set_apply(UnionBackedgeConditionComplement
, ForwardMap
);
3181 HeaderBBDom
= isl_set_subtract(HeaderBBDom
, UnionBackedgeConditionComplement
);
3182 HeaderBBDom
= isl_set_apply(HeaderBBDom
, NextIterationMap
);
3184 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
3185 HeaderBBDom
= Parts
.second
;
3187 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3188 // the bounded assumptions to the context as they are already implied by the
3190 if (Affinator
.hasNSWAddRecForLoop(L
)) {
3191 isl_set_free(Parts
.first
);
3195 isl_set
*UnboundedCtx
= isl_set_params(Parts
.first
);
3196 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3197 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3201 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3202 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3204 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3205 if (!PointerBaseInst
)
3208 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3212 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3215 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3216 isl::union_map Writes
) {
3217 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3218 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3221 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3222 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3223 if (!isa
<LoadInst
>(BasePtrInst
))
3224 return contains(BasePtrInst
);
3229 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3230 if (!PollyUseRuntimeAliasChecks
)
3233 if (buildAliasGroups(AA
)) {
3234 // Aliasing assumptions do not go through addAssumption but we still want to
3235 // collect statistics so we do it here explicitly.
3236 if (MinMaxAliasGroups
.size())
3237 AssumptionsAliasing
++;
3241 // If a problem occurs while building the alias groups we need to delete
3242 // this SCoP and pretend it wasn't valid in the first place. To this end
3243 // we make the assumed context infeasible.
3244 invalidate(ALIASING
, DebugLoc());
3246 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3247 << " could not be created as the number of parameters involved "
3248 "is too high. The SCoP will be "
3249 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3250 "the maximal number of parameters but be advised that the "
3251 "compile time might increase exponentially.\n\n");
3255 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3256 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3257 AliasSetTracker
AST(AA
);
3259 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3260 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3261 for (ScopStmt
&Stmt
: *this) {
3263 isl_set
*StmtDomain
= Stmt
.getDomain();
3264 bool StmtDomainEmpty
= isl_set_is_empty(StmtDomain
);
3265 isl_set_free(StmtDomain
);
3267 // Statements with an empty domain will never be executed.
3268 if (StmtDomainEmpty
)
3271 for (MemoryAccess
*MA
: Stmt
) {
3272 if (MA
->isScalarKind())
3275 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3276 MemAccInst
Acc(MA
->getAccessInstruction());
3277 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3278 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3280 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3285 AliasGroupVectorTy AliasGroups
;
3286 for (AliasSet
&AS
: AST
) {
3287 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3291 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3294 AliasGroups
.push_back(std::move(AG
));
3297 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3300 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3301 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3303 AliasGroupTy
&AG
= AliasGroups
[u
];
3304 AliasGroupTy::iterator AGI
= AG
.begin();
3305 isl_set
*AGDomain
= getAccessDomain(*AGI
);
3306 while (AGI
!= AG
.end()) {
3307 MemoryAccess
*MA
= *AGI
;
3308 isl_set
*MADomain
= getAccessDomain(MA
);
3309 if (isl_set_is_disjoint(AGDomain
, MADomain
)) {
3310 NewAG
.push_back(MA
);
3311 AGI
= AG
.erase(AGI
);
3312 isl_set_free(MADomain
);
3314 AGDomain
= isl_set_union(AGDomain
, MADomain
);
3318 if (NewAG
.size() > 1)
3319 AliasGroups
.push_back(std::move(NewAG
));
3320 isl_set_free(AGDomain
);
3324 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3325 // To create sound alias checks we perform the following steps:
3326 // o) We partition each group into read only and non read only accesses.
3327 // o) For each group with more than one base pointer we then compute minimal
3328 // and maximal accesses to each array of a group in read only and non
3329 // read only partitions separately.
3330 AliasGroupVectorTy AliasGroups
;
3331 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3333 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3335 splitAliasGroupsByDomain(AliasGroups
);
3337 for (AliasGroupTy
&AG
: AliasGroups
) {
3338 if (!hasFeasibleRuntimeContext())
3342 IslMaxOperationsGuard
MaxOpGuard(getIslCtx(), OptComputeOut
);
3343 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3347 if (isl_ctx_last_error(getIslCtx()) == isl_error_quota
) {
3348 invalidate(COMPLEXITY
, DebugLoc());
3356 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3357 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3358 AliasGroupTy ReadOnlyAccesses
;
3359 AliasGroupTy ReadWriteAccesses
;
3360 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3361 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3363 auto &F
= getFunction();
3365 if (AliasGroup
.size() < 2)
3368 for (MemoryAccess
*Access
: AliasGroup
) {
3369 emitOptimizationRemarkAnalysis(
3370 F
.getContext(), DEBUG_TYPE
, F
,
3371 Access
->getAccessInstruction()->getDebugLoc(),
3372 "Possibly aliasing pointer, use restrict keyword.");
3374 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3375 if (HasWriteAccess
.count(Array
)) {
3376 ReadWriteArrays
.insert(Array
);
3377 ReadWriteAccesses
.push_back(Access
);
3379 ReadOnlyArrays
.insert(Array
);
3380 ReadOnlyAccesses
.push_back(Access
);
3384 // If there are no read-only pointers, and less than two read-write pointers,
3385 // no alias check is needed.
3386 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3389 // If there is no read-write pointer, no alias check is needed.
3390 if (ReadWriteArrays
.empty())
3393 // For non-affine accesses, no alias check can be generated as we cannot
3394 // compute a sufficiently tight lower and upper bound: bail out.
3395 for (MemoryAccess
*MA
: AliasGroup
) {
3396 if (!MA
->isAffine()) {
3397 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc());
3402 // Ensure that for all memory accesses for which we generate alias checks,
3403 // their base pointers are available.
3404 for (MemoryAccess
*MA
: AliasGroup
) {
3405 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3406 addRequiredInvariantLoad(
3407 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3410 MinMaxAliasGroups
.emplace_back();
3411 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3412 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3413 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3418 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3423 // Bail out if the number of values we need to compare is too large.
3424 // This is important as the number of comparisons grows quadratically with
3425 // the number of values we need to compare.
3426 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3427 RunTimeChecksMaxArraysPerGroup
)
3431 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3439 /// Get the smallest loop that contains @p S but is not in @p S.
3440 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3441 // Start with the smallest loop containing the entry and expand that
3442 // loop until it contains all blocks in the region. If there is a loop
3443 // containing all blocks in the region check if it is itself contained
3444 // and if so take the parent loop as it will be the smallest containing
3445 // the region but not contained by it.
3446 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3448 bool AllContained
= true;
3449 for (auto *BB
: S
.blocks())
3450 AllContained
&= L
->contains(BB
);
3453 L
= L
->getParentLoop();
3456 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3459 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3460 ScopDetection::DetectionContext
&DC
)
3461 : SE(&ScalarEvolution
), R(R
), name(R
.getNameStr()), IsOptimized(false),
3462 HasSingleExitEdge(R
.getExitingBlock()), HasErrorBlock(false),
3463 MaxLoopDepth(0), CopyStmtsNum(0), DC(DC
),
3464 IslCtx(isl_ctx_alloc(), isl_ctx_free
), Context(nullptr),
3465 Affinator(this, LI
), AssumedContext(nullptr), InvalidContext(nullptr),
3467 if (IslOnErrorAbort
)
3468 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT
);
3472 void Scop::foldSizeConstantsToRight() {
3473 isl_union_set
*Accessed
= isl_union_map_range(getAccesses());
3475 for (auto Array
: arrays()) {
3476 if (Array
->getNumberOfDimensions() <= 1)
3479 isl_space
*Space
= Array
->getSpace();
3481 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3483 if (!isl_union_set_contains(Accessed
, Space
)) {
3484 isl_space_free(Space
);
3488 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3490 isl_map
*Transform
=
3491 isl_map_universe(isl_space_map_from_set(Array
->getSpace()));
3493 std::vector
<int> Int
;
3495 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3496 for (int i
= 0; i
< Dims
; i
++) {
3498 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3499 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3500 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3502 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3504 if (i
== Dims
- 1) {
3506 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3507 isl_basic_set_free(DimHull
);
3511 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3512 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3513 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3518 if (isl_val_is_int(Val
))
3519 ValInt
= isl_val_get_num_si(Val
);
3522 Int
.push_back(ValInt
);
3524 isl_constraint
*C
= isl_constraint_alloc_equality(
3525 isl_local_space_from_space(isl_map_get_space(Transform
)));
3526 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3527 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3528 Transform
= isl_map_add_constraint(Transform
, C
);
3529 isl_basic_set_free(DimHull
);
3533 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3534 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3537 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3541 Int
.push_back(ValInt
);
3542 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3543 isl_basic_set_free(DimHull
);
3544 isl_basic_set_free(ZeroSet
);
3547 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3549 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3550 isl_set_free(Elements
);
3551 isl_set_free(MappedElements
);
3552 isl_map_free(Transform
);
3556 isl_set_free(MappedElements
);
3558 bool CanFold
= true;
3563 unsigned NumDims
= Array
->getNumberOfDimensions();
3564 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3565 if (Int
[0] != Int
[i
] && Int
[i
])
3569 isl_set_free(Elements
);
3570 isl_map_free(Transform
);
3574 for (auto &Access
: AccessFunctions
)
3575 if (Access
->getScopArrayInfo() == Array
)
3576 Access
->setAccessRelation(isl_map_apply_range(
3577 Access
->getAccessRelation(), isl_map_copy(Transform
)));
3579 isl_map_free(Transform
);
3581 std::vector
<const SCEV
*> Sizes
;
3582 for (unsigned i
= 0; i
< NumDims
; i
++) {
3583 auto Size
= Array
->getDimensionSize(i
);
3585 if (i
== NumDims
- 1)
3586 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3587 Sizes
.push_back(Size
);
3590 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3592 isl_set_free(Elements
);
3594 isl_union_set_free(Accessed
);
3598 void Scop::markFortranArrays() {
3599 for (ScopStmt
&Stmt
: Stmts
) {
3600 for (MemoryAccess
*MemAcc
: Stmt
) {
3601 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3605 // TODO: const_cast-ing to edit
3606 ScopArrayInfo
*SAI
=
3607 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3608 assert(SAI
&& "memory access into a Fortran array does not "
3609 "have an associated ScopArrayInfo");
3610 SAI
->applyAndSetFAD(FAD
);
3615 void Scop::finalizeAccesses() {
3616 updateAccessDimensionality();
3617 foldSizeConstantsToRight();
3618 foldAccessRelations();
3619 assumeNoOutOfBounds();
3620 markFortranArrays();
3624 isl_set_free(Context
);
3625 isl_set_free(AssumedContext
);
3626 isl_set_free(InvalidContext
);
3627 isl_schedule_free(Schedule
);
3629 for (auto &It
: ParameterIds
)
3630 isl_id_free(It
.second
);
3632 for (auto It
: DomainMap
)
3633 isl_set_free(It
.second
);
3635 for (auto &AS
: RecordedAssumptions
)
3636 isl_set_free(AS
.Set
);
3638 // Free the alias groups
3639 for (MinMaxVectorPairTy
&MinMaxAccessPair
: MinMaxAliasGroups
) {
3640 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.first
) {
3641 isl_pw_multi_aff_free(MMA
.first
);
3642 isl_pw_multi_aff_free(MMA
.second
);
3644 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.second
) {
3645 isl_pw_multi_aff_free(MMA
.first
);
3646 isl_pw_multi_aff_free(MMA
.second
);
3650 for (const auto &IAClass
: InvariantEquivClasses
)
3651 isl_set_free(IAClass
.ExecutionContext
);
3653 // Explicitly release all Scop objects and the underlying isl objects before
3654 // we release the isl context.
3656 ScopArrayInfoSet
.clear();
3657 ScopArrayInfoMap
.clear();
3658 ScopArrayNameMap
.clear();
3659 AccessFunctions
.clear();
3662 void Scop::updateAccessDimensionality() {
3663 // Check all array accesses for each base pointer and find a (virtual) element
3664 // size for the base pointer that divides all access functions.
3665 for (ScopStmt
&Stmt
: *this)
3666 for (MemoryAccess
*Access
: Stmt
) {
3667 if (!Access
->isArrayKind())
3669 ScopArrayInfo
*Array
=
3670 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3672 if (Array
->getNumberOfDimensions() != 1)
3674 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3675 const SCEV
*Subscript
= Access
->getSubscript(0);
3676 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3678 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3679 Array
->updateElementType(Ty
);
3682 for (auto &Stmt
: *this)
3683 for (auto &Access
: Stmt
)
3684 Access
->updateDimensionality();
3687 void Scop::foldAccessRelations() {
3688 for (auto &Stmt
: *this)
3689 for (auto &Access
: Stmt
)
3690 Access
->foldAccessRelation();
3693 void Scop::assumeNoOutOfBounds() {
3694 for (auto &Stmt
: *this)
3695 for (auto &Access
: Stmt
)
3696 Access
->assumeNoOutOfBound();
3699 void Scop::simplifySCoP(bool AfterHoisting
) {
3700 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3701 ScopStmt
&Stmt
= *StmtIt
;
3703 bool RemoveStmt
= Stmt
.isEmpty();
3705 RemoveStmt
= !DomainMap
[Stmt
.getEntryBlock()];
3707 // Remove read only statements only after invariant loop hoisting.
3708 if (!RemoveStmt
&& AfterHoisting
) {
3709 bool OnlyRead
= true;
3710 for (MemoryAccess
*MA
: Stmt
) {
3718 RemoveStmt
= OnlyRead
;
3726 // Remove the statement because it is unnecessary.
3727 if (Stmt
.isRegionStmt())
3728 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks())
3731 StmtMap
.erase(Stmt
.getBasicBlock());
3733 StmtIt
= Stmts
.erase(StmtIt
);
3737 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3738 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3742 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3743 LInst
= cast
<LoadInst
>(Rep
);
3745 Type
*Ty
= LInst
->getType();
3746 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3747 for (auto &IAClass
: InvariantEquivClasses
) {
3748 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3751 auto &MAs
= IAClass
.InvariantAccesses
;
3752 for (auto *MA
: MAs
)
3753 if (MA
->getAccessInstruction() == Val
)
3760 /// Check if @p MA can always be hoisted without execution context.
3761 static bool canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3762 bool MAInvalidCtxIsEmpty
,
3763 bool NonHoistableCtxIsEmpty
) {
3764 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3765 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3766 // TODO: We can provide more information for better but more expensive
3768 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3769 LInst
->getAlignment(), DL
))
3772 // If the location might be overwritten we do not hoist it unconditionally.
3774 // TODO: This is probably to conservative.
3775 if (!NonHoistableCtxIsEmpty
)
3778 // If a dereferenceable load is in a statement that is modeled precisely we
3780 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3783 // Even if the statement is not modeled precisely we can hoist the load if it
3784 // does not involve any parameters that might have been specialized by the
3785 // statement domain.
3786 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3787 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3792 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3797 auto *StmtInvalidCtx
= Stmt
.getInvalidContext();
3798 bool StmtInvalidCtxIsEmpty
= isl_set_is_empty(StmtInvalidCtx
);
3800 // Get the context under which the statement is executed but remove the error
3801 // context under which this statement is reached.
3802 isl_set
*DomainCtx
= isl_set_params(Stmt
.getDomain());
3803 DomainCtx
= isl_set_subtract(DomainCtx
, StmtInvalidCtx
);
3805 if (isl_set_n_basic_set(DomainCtx
) >= MaxDisjunctsInDomain
) {
3806 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3807 invalidate(COMPLEXITY
, AccInst
->getDebugLoc());
3808 isl_set_free(DomainCtx
);
3809 for (auto &InvMA
: InvMAs
)
3810 isl_set_free(InvMA
.NonHoistableCtx
);
3814 // Project out all parameters that relate to loads in the statement. Otherwise
3815 // we could have cyclic dependences on the constraints under which the
3816 // hoisted loads are executed and we could not determine an order in which to
3817 // pre-load them. This happens because not only lower bounds are part of the
3818 // domain but also upper bounds.
3819 for (auto &InvMA
: InvMAs
) {
3820 auto *MA
= InvMA
.MA
;
3821 Instruction
*AccInst
= MA
->getAccessInstruction();
3822 if (SE
->isSCEVable(AccInst
->getType())) {
3823 SetVector
<Value
*> Values
;
3824 for (const SCEV
*Parameter
: Parameters
) {
3826 findValues(Parameter
, *SE
, Values
);
3827 if (!Values
.count(AccInst
))
3830 if (isl_id
*ParamId
= getIdForParam(Parameter
)) {
3831 int Dim
= isl_set_find_dim_by_id(DomainCtx
, isl_dim_param
, ParamId
);
3833 DomainCtx
= isl_set_eliminate(DomainCtx
, isl_dim_param
, Dim
, 1);
3834 isl_id_free(ParamId
);
3840 for (auto &InvMA
: InvMAs
) {
3841 auto *MA
= InvMA
.MA
;
3842 auto *NHCtx
= InvMA
.NonHoistableCtx
;
3844 // Check for another invariant access that accesses the same location as
3845 // MA and if found consolidate them. Otherwise create a new equivalence
3846 // class at the end of InvariantEquivClasses.
3847 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3848 Type
*Ty
= LInst
->getType();
3849 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3851 auto *MAInvalidCtx
= MA
->getInvalidContext();
3852 bool NonHoistableCtxIsEmpty
= isl_set_is_empty(NHCtx
);
3853 bool MAInvalidCtxIsEmpty
= isl_set_is_empty(MAInvalidCtx
);
3856 // Check if we know that this pointer can be speculatively accessed.
3857 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3858 NonHoistableCtxIsEmpty
)) {
3859 MACtx
= isl_set_universe(isl_set_get_space(DomainCtx
));
3860 isl_set_free(MAInvalidCtx
);
3861 isl_set_free(NHCtx
);
3863 MACtx
= isl_set_copy(DomainCtx
);
3864 MACtx
= isl_set_subtract(MACtx
, isl_set_union(MAInvalidCtx
, NHCtx
));
3865 MACtx
= isl_set_gist_params(MACtx
, getContext());
3868 bool Consolidated
= false;
3869 for (auto &IAClass
: InvariantEquivClasses
) {
3870 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3873 // If the pointer and the type is equal check if the access function wrt.
3874 // to the domain is equal too. It can happen that the domain fixes
3875 // parameter values and these can be different for distinct part of the
3876 // SCoP. If this happens we cannot consolidate the loads but need to
3877 // create a new invariant load equivalence class.
3878 auto &MAs
= IAClass
.InvariantAccesses
;
3880 auto *LastMA
= MAs
.front();
3882 auto *AR
= isl_map_range(MA
->getAccessRelation());
3883 auto *LastAR
= isl_map_range(LastMA
->getAccessRelation());
3884 bool SameAR
= isl_set_is_equal(AR
, LastAR
);
3886 isl_set_free(LastAR
);
3892 // Add MA to the list of accesses that are in this class.
3895 Consolidated
= true;
3897 // Unify the execution context of the class and this statement.
3898 isl_set
*&IAClassDomainCtx
= IAClass
.ExecutionContext
;
3899 if (IAClassDomainCtx
)
3901 isl_set_coalesce(isl_set_union(IAClassDomainCtx
, MACtx
));
3903 IAClassDomainCtx
= MACtx
;
3910 // If we did not consolidate MA, thus did not find an equivalence class
3911 // for it, we create a new one.
3912 InvariantEquivClasses
.emplace_back(
3913 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3916 isl_set_free(DomainCtx
);
3919 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
3920 // TODO: Loads that are not loop carried, hence are in a statement with
3921 // zero iterators, are by construction invariant, though we
3922 // currently "hoist" them anyway. This is necessary because we allow
3923 // them to be treated as parameters (e.g., in conditions) and our code
3924 // generation would otherwise use the old value.
3926 auto &Stmt
= *Access
->getStatement();
3927 BasicBlock
*BB
= Stmt
.getEntryBlock();
3929 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
3930 Access
->isMemoryIntrinsic())
3933 // Skip accesses that have an invariant base pointer which is defined but
3934 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3935 // returns a pointer that is used as a base address. However, as we want
3936 // to hoist indirect pointers, we allow the base pointer to be defined in
3937 // the region if it is also a memory access. Each ScopArrayInfo object
3938 // that has a base pointer origin has a base pointer that is loaded and
3939 // that it is invariant, thus it will be hoisted too. However, if there is
3940 // no base pointer origin we check that the base pointer is defined
3941 // outside the region.
3942 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
3943 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
3946 isl::map AccessRelation
= give(Access
->getAccessRelation());
3947 assert(!AccessRelation
.is_empty());
3949 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
3952 AccessRelation
= AccessRelation
.intersect_domain(give(Stmt
.getDomain()));
3953 isl::set SafeToLoad
;
3955 auto &DL
= getFunction().getParent()->getDataLayout();
3956 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
3958 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
3959 } else if (BB
!= LI
->getParent()) {
3960 // Skip accesses in non-affine subregions as they might not be executed
3961 // under the same condition as the entry of the non-affine subregion.
3964 SafeToLoad
= AccessRelation
.range();
3967 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
3968 isl::set WrittenCtx
= Written
.params();
3969 bool IsWritten
= !WrittenCtx
.is_empty();
3974 WrittenCtx
= WrittenCtx
.remove_divs();
3976 isl_set_n_basic_set(WrittenCtx
.get()) >= MaxDisjunctsInDomain
;
3977 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
3980 addAssumption(INVARIANTLOAD
, WrittenCtx
.copy(), LI
->getDebugLoc(),
3985 void Scop::verifyInvariantLoads() {
3986 auto &RIL
= getRequiredInvariantLoads();
3987 for (LoadInst
*LI
: RIL
) {
3988 assert(LI
&& contains(LI
));
3989 ScopStmt
*Stmt
= getStmtFor(LI
);
3990 if (Stmt
&& Stmt
->getArrayAccessOrNULLFor(LI
)) {
3991 invalidate(INVARIANTLOAD
, LI
->getDebugLoc());
3997 void Scop::hoistInvariantLoads() {
3998 if (!PollyInvariantLoadHoisting
)
4001 isl::union_map Writes
= give(getWrites());
4002 for (ScopStmt
&Stmt
: *this) {
4003 InvariantAccessesTy InvariantAccesses
;
4005 for (MemoryAccess
*Access
: Stmt
)
4006 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
4007 InvariantAccesses
.push_back({Access
, NHCtx
.release()});
4009 // Transfer the memory access from the statement to the SCoP.
4010 for (auto InvMA
: InvariantAccesses
)
4011 Stmt
.removeMemoryAccess(InvMA
.MA
);
4012 addInvariantLoads(Stmt
, InvariantAccesses
);
4016 /// Find the canonical scop array info object for a set of invariant load
4017 /// hoisted loads. The canonical array is the one that corresponds to the
4018 /// first load in the list of accesses which is used as base pointer of a
4020 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
4021 MemoryAccessList
&Accesses
) {
4022 for (MemoryAccess
*Access
: Accesses
) {
4023 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
4024 Access
->getAccessInstruction(), MemoryKind::Array
);
4026 return CanonicalArray
;
4031 /// Check if @p Array severs as base array in an invariant load.
4032 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
4033 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
4034 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
4035 if (Access2
->getScopArrayInfo() == Array
)
4040 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4041 /// with a reference to @p New.
4042 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
4043 const ScopArrayInfo
*New
) {
4044 for (ScopStmt
&Stmt
: *S
)
4045 for (MemoryAccess
*Access
: Stmt
) {
4046 if (Access
->getLatestScopArrayInfo() != Old
)
4049 isl_id
*Id
= New
->getBasePtrId();
4050 isl_map
*Map
= Access
->getAccessRelation();
4051 Map
= isl_map_set_tuple_id(Map
, isl_dim_out
, Id
);
4052 Access
->setAccessRelation(Map
);
4056 void Scop::canonicalizeDynamicBasePtrs() {
4057 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
4058 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
4060 const ScopArrayInfo
*CanonicalBasePtrSAI
=
4061 findCanonicalArray(this, BasePtrAccesses
);
4063 if (!CanonicalBasePtrSAI
)
4066 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
4067 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
4068 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
4069 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
4070 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
4073 // we currently do not canonicalize arrays where some accesses are
4074 // hoisted as invariant loads. If we would, we need to update the access
4075 // function of the invariant loads as well. However, as this is not a
4076 // very common situation, we leave this for now to avoid further
4077 // complexity increases.
4078 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
4081 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
4086 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
4087 ArrayRef
<const SCEV
*> Sizes
,
4089 const char *BaseName
) {
4090 assert((BasePtr
|| BaseName
) &&
4091 "BasePtr and BaseName can not be nullptr at the same time.");
4092 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
4093 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
4094 : ScopArrayNameMap
[BaseName
];
4096 auto &DL
= getFunction().getParent()->getDataLayout();
4097 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
4098 DL
, this, BaseName
));
4099 ScopArrayInfoSet
.insert(SAI
.get());
4101 SAI
->updateElementType(ElementType
);
4102 // In case of mismatching array sizes, we bail out by setting the run-time
4103 // context to false.
4104 if (!SAI
->updateSizes(Sizes
))
4105 invalidate(DELINEARIZATION
, DebugLoc());
4110 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
4111 const std::string
&BaseName
,
4112 const std::vector
<unsigned> &Sizes
) {
4113 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
4114 std::vector
<const SCEV
*> SCEVSizes
;
4116 for (auto size
: Sizes
)
4118 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
4120 SCEVSizes
.push_back(nullptr);
4122 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
4123 MemoryKind::Array
, BaseName
.c_str());
4127 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
4129 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
4133 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
4134 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
4135 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
4139 std::string
Scop::getContextStr() const { return stringFromIslObj(Context
); }
4141 std::string
Scop::getAssumedContextStr() const {
4142 assert(AssumedContext
&& "Assumed context not yet built");
4143 return stringFromIslObj(AssumedContext
);
4146 std::string
Scop::getInvalidContextStr() const {
4147 return stringFromIslObj(InvalidContext
);
4150 std::string
Scop::getNameStr() const {
4151 std::string ExitName
, EntryName
;
4152 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4153 return EntryName
+ "---" + ExitName
;
4156 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4157 std::string ExitName
, EntryName
;
4158 raw_string_ostream
ExitStr(ExitName
);
4159 raw_string_ostream
EntryStr(EntryName
);
4161 R
.getEntry()->printAsOperand(EntryStr
, false);
4165 R
.getExit()->printAsOperand(ExitStr
, false);
4168 ExitName
= "FunctionExit";
4170 return std::make_pair(EntryName
, ExitName
);
4173 __isl_give isl_set
*Scop::getContext() const { return isl_set_copy(Context
); }
4174 __isl_give isl_space
*Scop::getParamSpace() const {
4175 return isl_set_get_space(Context
);
4178 __isl_give isl_set
*Scop::getAssumedContext() const {
4179 assert(AssumedContext
&& "Assumed context not yet built");
4180 return isl_set_copy(AssumedContext
);
4183 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4184 if (PollyProcessUnprofitable
)
4190 unsigned OptimizableStmtsOrLoops
= 0;
4191 for (auto &Stmt
: *this) {
4192 if (Stmt
.getNumIterators() == 0)
4195 bool ContainsArrayAccs
= false;
4196 bool ContainsScalarAccs
= false;
4197 for (auto *MA
: Stmt
) {
4200 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4201 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4204 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4205 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4208 return OptimizableStmtsOrLoops
> 1;
4211 bool Scop::hasFeasibleRuntimeContext() const {
4212 auto *PositiveContext
= getAssumedContext();
4213 auto *NegativeContext
= getInvalidContext();
4214 PositiveContext
= addNonEmptyDomainConstraints(PositiveContext
);
4215 bool IsFeasible
= !(isl_set_is_empty(PositiveContext
) ||
4216 isl_set_is_subset(PositiveContext
, NegativeContext
));
4217 isl_set_free(PositiveContext
);
4219 isl_set_free(NegativeContext
);
4223 auto *DomainContext
= isl_union_set_params(getDomains());
4224 IsFeasible
= !isl_set_is_subset(DomainContext
, NegativeContext
);
4225 IsFeasible
&= !isl_set_is_subset(Context
, NegativeContext
);
4226 isl_set_free(NegativeContext
);
4227 isl_set_free(DomainContext
);
4232 static std::string
toString(AssumptionKind Kind
) {
4235 return "No-aliasing";
4239 return "No-overflows";
4241 return "Signed-unsigned";
4243 return "Low complexity";
4245 return "Profitable";
4249 return "Finite loop";
4251 return "Invariant load";
4252 case DELINEARIZATION
:
4253 return "Delinearization";
4255 llvm_unreachable("Unknown AssumptionKind!");
4258 bool Scop::isEffectiveAssumption(__isl_keep isl_set
*Set
, AssumptionSign Sign
) {
4259 if (Sign
== AS_ASSUMPTION
) {
4260 if (isl_set_is_subset(Context
, Set
))
4263 if (isl_set_is_subset(AssumedContext
, Set
))
4266 if (isl_set_is_disjoint(Set
, Context
))
4269 if (isl_set_is_subset(Set
, InvalidContext
))
4275 bool Scop::trackAssumption(AssumptionKind Kind
, __isl_keep isl_set
*Set
,
4276 DebugLoc Loc
, AssumptionSign Sign
) {
4277 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4280 // Do never emit trivial assumptions as they only clutter the output.
4281 if (!PollyRemarksMinimal
) {
4282 isl_set
*Univ
= nullptr;
4283 if (Sign
== AS_ASSUMPTION
)
4284 Univ
= isl_set_universe(isl_set_get_space(Set
));
4286 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& isl_set_is_empty(Set
)) ||
4287 (Sign
== AS_ASSUMPTION
&& isl_set_is_equal(Univ
, Set
));
4296 AssumptionsAliasing
++;
4299 AssumptionsInbounds
++;
4302 AssumptionsWrapping
++;
4305 AssumptionsUnsigned
++;
4308 AssumptionsComplexity
++;
4311 AssumptionsUnprofitable
++;
4314 AssumptionsErrorBlock
++;
4317 AssumptionsInfiniteLoop
++;
4320 AssumptionsInvariantLoad
++;
4322 case DELINEARIZATION
:
4323 AssumptionsDelinearization
++;
4327 auto &F
= getFunction();
4328 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4329 std::string Msg
= toString(Kind
) + Suffix
+ stringFromIslObj(Set
);
4330 emitOptimizationRemarkAnalysis(F
.getContext(), DEBUG_TYPE
, F
, Loc
, Msg
);
4334 void Scop::addAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4335 DebugLoc Loc
, AssumptionSign Sign
) {
4336 // Simplify the assumptions/restrictions first.
4337 Set
= isl_set_gist_params(Set
, getContext());
4339 if (!trackAssumption(Kind
, Set
, Loc
, Sign
)) {
4344 if (Sign
== AS_ASSUMPTION
) {
4345 AssumedContext
= isl_set_intersect(AssumedContext
, Set
);
4346 AssumedContext
= isl_set_coalesce(AssumedContext
);
4348 InvalidContext
= isl_set_union(InvalidContext
, Set
);
4349 InvalidContext
= isl_set_coalesce(InvalidContext
);
4353 void Scop::recordAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4354 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4355 assert((isl_set_is_params(Set
) || BB
) &&
4356 "Assumptions without a basic block must be parameter sets");
4357 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4360 void Scop::addRecordedAssumptions() {
4361 while (!RecordedAssumptions
.empty()) {
4362 const Assumption
&AS
= RecordedAssumptions
.pop_back_val();
4365 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
);
4369 // If the domain was deleted the assumptions are void.
4370 isl_set
*Dom
= getDomainConditions(AS
.BB
);
4372 isl_set_free(AS
.Set
);
4376 // If a basic block was given use its domain to simplify the assumption.
4377 // In case of restrictions we know they only have to hold on the domain,
4378 // thus we can intersect them with the domain of the block. However, for
4379 // assumptions the domain has to imply them, thus:
4381 // Dom => S <==> A v B <==> A - B
4383 // To avoid the complement we will register A - B as a restriction not an
4385 isl_set
*S
= AS
.Set
;
4386 if (AS
.Sign
== AS_RESTRICTION
)
4387 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4388 else /* (AS.Sign == AS_ASSUMPTION) */
4389 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4391 addAssumption(AS
.Kind
, S
, AS
.Loc
, AS_RESTRICTION
);
4395 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
) {
4396 addAssumption(Kind
, isl_set_empty(getParamSpace()), Loc
, AS_ASSUMPTION
);
4399 __isl_give isl_set
*Scop::getInvalidContext() const {
4400 return isl_set_copy(InvalidContext
);
4403 void Scop::printContext(raw_ostream
&OS
) const {
4405 OS
.indent(4) << Context
<< "\n";
4407 OS
.indent(4) << "Assumed Context:\n";
4408 OS
.indent(4) << AssumedContext
<< "\n";
4410 OS
.indent(4) << "Invalid Context:\n";
4411 OS
.indent(4) << InvalidContext
<< "\n";
4414 for (const SCEV
*Parameter
: Parameters
)
4415 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4418 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4420 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4421 if (Pair
.second
.size() == 0)
4424 noOfGroups
+= Pair
.second
.size();
4427 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4428 if (MinMaxAliasGroups
.empty()) {
4429 OS
.indent(8) << "n/a\n";
4433 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4435 // If the group has no read only accesses print the write accesses.
4436 if (Pair
.second
.empty()) {
4437 OS
.indent(8) << "[[";
4438 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4439 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4445 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4446 OS
.indent(8) << "[[";
4447 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4448 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4449 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4457 void Scop::printStatements(raw_ostream
&OS
) const {
4458 OS
<< "Statements {\n";
4460 for (const ScopStmt
&Stmt
: *this)
4461 OS
.indent(4) << Stmt
;
4463 OS
.indent(4) << "}\n";
4466 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4469 for (auto &Array
: arrays())
4472 OS
.indent(4) << "}\n";
4474 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4476 for (auto &Array
: arrays())
4477 Array
->print(OS
, /* SizeAsPwAff */ true);
4479 OS
.indent(4) << "}\n";
4482 void Scop::print(raw_ostream
&OS
) const {
4483 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4484 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4485 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4486 OS
.indent(4) << "Invariant Accesses: {\n";
4487 for (const auto &IAClass
: InvariantEquivClasses
) {
4488 const auto &MAs
= IAClass
.InvariantAccesses
;
4490 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4492 MAs
.front()->print(OS
);
4493 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4497 OS
.indent(4) << "}\n";
4498 printContext(OS
.indent(4));
4499 printArrayInfo(OS
.indent(4));
4500 printAliasAssumptions(OS
);
4501 printStatements(OS
.indent(4));
4504 void Scop::dump() const { print(dbgs()); }
4506 isl_ctx
*Scop::getIslCtx() const { return IslCtx
.get(); }
4508 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4510 // First try to use the SCEVAffinator to generate a piecewise defined
4511 // affine function from @p E in the context of @p BB. If that tasks becomes to
4512 // complex the affinator might return a nullptr. In such a case we invalidate
4513 // the SCoP and return a dummy value. This way we do not need to add error
4514 // handling code to all users of this function.
4515 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4517 // TODO: We could use a heuristic and either use:
4518 // SCEVAffinator::takeNonNegativeAssumption
4520 // SCEVAffinator::interpretAsUnsigned
4521 // to deal with unsigned or "NonNegative" SCEVs.
4523 Affinator
.takeNonNegativeAssumption(PWAC
);
4527 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4528 invalidate(COMPLEXITY
, DL
);
4529 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4532 __isl_give isl_union_set
*Scop::getDomains() const {
4533 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx(), 0);
4534 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4536 for (const ScopStmt
&Stmt
: *this)
4537 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain());
4542 __isl_give isl_pw_aff
*Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4543 PWACtx PWAC
= getPwAff(E
, BB
);
4544 isl_set_free(PWAC
.second
);
4548 __isl_give isl_union_map
*
4549 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4550 isl_union_map
*Accesses
= isl_union_map_empty(getParamSpace());
4552 for (ScopStmt
&Stmt
: *this) {
4553 for (MemoryAccess
*MA
: Stmt
) {
4554 if (!Predicate(*MA
))
4557 isl_set
*Domain
= Stmt
.getDomain();
4558 isl_map
*AccessDomain
= MA
->getAccessRelation();
4559 AccessDomain
= isl_map_intersect_domain(AccessDomain
, Domain
);
4560 Accesses
= isl_union_map_add_map(Accesses
, AccessDomain
);
4563 return isl_union_map_coalesce(Accesses
);
4566 __isl_give isl_union_map
*Scop::getMustWrites() {
4567 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4570 __isl_give isl_union_map
*Scop::getMayWrites() {
4571 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4574 __isl_give isl_union_map
*Scop::getWrites() {
4575 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4578 __isl_give isl_union_map
*Scop::getReads() {
4579 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4582 __isl_give isl_union_map
*Scop::getAccesses() {
4583 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4586 // Check whether @p Node is an extension node.
4588 // @return true if @p Node is an extension node.
4589 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4590 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4591 return isl_bool_error
;
4593 return isl_bool_true
;
4596 bool Scop::containsExtensionNode(__isl_keep isl_schedule
*Schedule
) {
4597 return isl_schedule_foreach_schedule_node_top_down(Schedule
, isNotExtNode
,
4598 nullptr) == isl_stat_error
;
4601 __isl_give isl_union_map
*Scop::getSchedule() const {
4602 auto *Tree
= getScheduleTree();
4603 if (containsExtensionNode(Tree
)) {
4604 isl_schedule_free(Tree
);
4607 auto *S
= isl_schedule_get_map(Tree
);
4608 isl_schedule_free(Tree
);
4612 __isl_give isl_schedule
*Scop::getScheduleTree() const {
4613 return isl_schedule_intersect_domain(isl_schedule_copy(Schedule
),
4617 void Scop::setSchedule(__isl_take isl_union_map
*NewSchedule
) {
4618 auto *S
= isl_schedule_from_domain(getDomains());
4619 S
= isl_schedule_insert_partial_schedule(
4620 S
, isl_multi_union_pw_aff_from_union_map(NewSchedule
));
4621 isl_schedule_free(Schedule
);
4625 void Scop::setScheduleTree(__isl_take isl_schedule
*NewSchedule
) {
4626 isl_schedule_free(Schedule
);
4627 Schedule
= NewSchedule
;
4630 bool Scop::restrictDomains(__isl_take isl_union_set
*Domain
) {
4631 bool Changed
= false;
4632 for (ScopStmt
&Stmt
: *this) {
4633 isl_union_set
*StmtDomain
= isl_union_set_from_set(Stmt
.getDomain());
4634 isl_union_set
*NewStmtDomain
= isl_union_set_intersect(
4635 isl_union_set_copy(StmtDomain
), isl_union_set_copy(Domain
));
4637 if (isl_union_set_is_subset(StmtDomain
, NewStmtDomain
)) {
4638 isl_union_set_free(StmtDomain
);
4639 isl_union_set_free(NewStmtDomain
);
4645 isl_union_set_free(StmtDomain
);
4646 NewStmtDomain
= isl_union_set_coalesce(NewStmtDomain
);
4648 if (isl_union_set_is_empty(NewStmtDomain
)) {
4649 Stmt
.restrictDomain(isl_set_empty(Stmt
.getDomainSpace()));
4650 isl_union_set_free(NewStmtDomain
);
4652 Stmt
.restrictDomain(isl_set_from_union_set(NewStmtDomain
));
4654 isl_union_set_free(Domain
);
4658 ScalarEvolution
*Scop::getSE() const { return SE
; }
4660 // Create an isl_multi_union_aff that defines an identity mapping from the
4661 // elements of USet to their N-th dimension.
4665 // Domain: { A[i,j]; B[i,j,k] }
4668 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4670 // @param USet A union set describing the elements for which to generate a
4672 // @param N The dimension to map to.
4673 // @returns A mapping from USet to its N-th dimension.
4674 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4677 assert(!USet
.is_empty());
4679 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4681 auto Lambda
= [&Result
, N
](isl::set S
) -> isl::stat
{
4682 int Dim
= S
.dim(isl::dim::set
);
4683 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4686 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4688 Result
= Result
.add_pw_multi_aff(PMA
);
4689 return isl::stat::ok
;
4692 isl::stat Res
= USet
.foreach_set(Lambda
);
4695 assert(Res
== isl::stat::ok
);
4697 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4700 void Scop::addScopStmt(BasicBlock
*BB
, Loop
*SurroundingLoop
,
4701 std::vector
<Instruction
*> Instructions
) {
4702 assert(BB
&& "Unexpected nullptr!");
4703 Stmts
.emplace_back(*this, *BB
, SurroundingLoop
, Instructions
);
4704 auto *Stmt
= &Stmts
.back();
4708 void Scop::addScopStmt(Region
*R
, Loop
*SurroundingLoop
) {
4709 assert(R
&& "Unexpected nullptr!");
4710 Stmts
.emplace_back(*this, *R
, SurroundingLoop
);
4711 auto *Stmt
= &Stmts
.back();
4712 for (BasicBlock
*BB
: R
->blocks())
4716 ScopStmt
*Scop::addScopStmt(__isl_take isl_map
*SourceRel
,
4717 __isl_take isl_map
*TargetRel
,
4718 __isl_take isl_set
*Domain
) {
4720 isl_set
*SourceDomain
= isl_map_domain(isl_map_copy(SourceRel
));
4721 isl_set
*TargetDomain
= isl_map_domain(isl_map_copy(TargetRel
));
4722 assert(isl_set_is_subset(Domain
, TargetDomain
) &&
4723 "Target access not defined for complete statement domain");
4724 assert(isl_set_is_subset(Domain
, SourceDomain
) &&
4725 "Source access not defined for complete statement domain");
4726 isl_set_free(SourceDomain
);
4727 isl_set_free(TargetDomain
);
4729 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4731 return &(Stmts
.back());
4734 void Scop::buildSchedule(LoopInfo
&LI
) {
4735 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4736 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4737 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4738 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4739 Schedule
= LoopStack
[0].Schedule
;
4742 /// To generate a schedule for the elements in a Region we traverse the Region
4743 /// in reverse-post-order and add the contained RegionNodes in traversal order
4744 /// to the schedule of the loop that is currently at the top of the LoopStack.
4745 /// For loop-free codes, this results in a correct sequential ordering.
4756 /// Including loops requires additional processing. Whenever a loop header is
4757 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4758 /// from an empty schedule, we first process all RegionNodes that are within
4759 /// this loop and complete the sequential schedule at this loop-level before
4760 /// processing about any other nodes. To implement this
4761 /// loop-nodes-first-processing, the reverse post-order traversal is
4762 /// insufficient. Hence, we additionally check if the traversal yields
4763 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4764 /// These region-nodes are then queue and only traverse after the all nodes
4765 /// within the current loop have been processed.
4766 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4767 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4769 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4770 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4771 std::deque
<RegionNode
*> DelayList
;
4772 bool LastRNWaiting
= false;
4774 // Iterate over the region @p R in reverse post-order but queue
4775 // sub-regions/blocks iff they are not part of the last encountered but not
4776 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4777 // that we queued the last sub-region/block from the reverse post-order
4778 // iterator. If it is set we have to explore the next sub-region/block from
4779 // the iterator (if any) to guarantee progress. If it is not set we first try
4780 // the next queued sub-region/blocks.
4781 while (!WorkList
.empty() || !DelayList
.empty()) {
4784 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.size() == 0) {
4785 RN
= WorkList
.front();
4786 WorkList
.pop_front();
4787 LastRNWaiting
= false;
4789 RN
= DelayList
.front();
4790 DelayList
.pop_front();
4793 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4797 Loop
*LastLoop
= LoopStack
.back().L
;
4798 if (LastLoop
!= L
) {
4799 if (LastLoop
&& !LastLoop
->contains(L
)) {
4800 LastRNWaiting
= true;
4801 DelayList
.push_back(RN
);
4804 LoopStack
.push_back({L
, nullptr, 0});
4806 buildSchedule(RN
, LoopStack
, LI
);
4812 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4814 if (RN
->isSubRegion()) {
4815 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
4816 if (!isNonAffineSubRegion(LocalRegion
)) {
4817 buildSchedule(LocalRegion
, LoopStack
, LI
);
4822 auto &LoopData
= LoopStack
.back();
4823 LoopData
.NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
4825 if (auto *Stmt
= getStmtFor(RN
)) {
4826 auto *UDomain
= isl_union_set_from_set(Stmt
->getDomain());
4827 auto *StmtSchedule
= isl_schedule_from_domain(UDomain
);
4828 LoopData
.Schedule
= combineInSequence(LoopData
.Schedule
, StmtSchedule
);
4831 // Check if we just processed the last node in this loop. If we did, finalize
4834 // - adding new schedule dimensions
4835 // - folding the resulting schedule into the parent loop schedule
4836 // - dropping the loop schedule from the LoopStack.
4838 // Then continue to check surrounding loops, which might also have been
4839 // completed by this node.
4840 while (LoopData
.L
&&
4841 LoopData
.NumBlocksProcessed
== getNumBlocksInLoop(LoopData
.L
)) {
4842 auto *Schedule
= LoopData
.Schedule
;
4843 auto NumBlocksProcessed
= LoopData
.NumBlocksProcessed
;
4845 LoopStack
.pop_back();
4846 auto &NextLoopData
= LoopStack
.back();
4849 isl::union_set Domain
= give(isl_schedule_get_domain(Schedule
));
4850 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, LoopStack
.size());
4851 Schedule
= isl_schedule_insert_partial_schedule(Schedule
, MUPA
.release());
4852 NextLoopData
.Schedule
=
4853 combineInSequence(NextLoopData
.Schedule
, Schedule
);
4856 NextLoopData
.NumBlocksProcessed
+= NumBlocksProcessed
;
4857 LoopData
= NextLoopData
;
4861 ScopStmt
*Scop::getStmtFor(BasicBlock
*BB
) const {
4862 auto StmtMapIt
= StmtMap
.find(BB
);
4863 if (StmtMapIt
== StmtMap
.end())
4865 return StmtMapIt
->second
;
4868 ScopStmt
*Scop::getStmtFor(RegionNode
*RN
) const {
4869 if (RN
->isSubRegion())
4870 return getStmtFor(RN
->getNodeAs
<Region
>());
4871 return getStmtFor(RN
->getNodeAs
<BasicBlock
>());
4874 ScopStmt
*Scop::getStmtFor(Region
*R
) const {
4875 ScopStmt
*Stmt
= getStmtFor(R
->getEntry());
4876 assert(!Stmt
|| Stmt
->getRegion() == R
);
4880 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
4881 if (!L
|| !R
.contains(L
))
4883 // outermostLoopInRegion always returns nullptr for top level regions
4884 if (R
.isTopLevelRegion()) {
4885 // LoopInfo's depths start at 1, we start at 0
4886 return L
->getLoopDepth() - 1;
4888 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
4890 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
4894 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
4895 for (auto &SAI
: arrays()) {
4896 if (SAI
->getName() == BaseName
)
4902 //===----------------------------------------------------------------------===//
4903 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4904 AU
.addRequired
<LoopInfoWrapperPass
>();
4905 AU
.addRequired
<RegionInfoPass
>();
4906 AU
.addRequired
<DominatorTreeWrapperPass
>();
4907 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
4908 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
4909 AU
.addRequired
<AAResultsWrapperPass
>();
4910 AU
.addRequired
<AssumptionCacheTracker
>();
4911 AU
.setPreservesAll();
4914 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
) {
4915 NumLoopsInScop
+= Stats
.NumLoops
;
4917 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
4919 if (Stats
.MaxDepth
== 1)
4921 else if (Stats
.MaxDepth
== 2)
4923 else if (Stats
.MaxDepth
== 3)
4924 NumScopsDepthThree
++;
4925 else if (Stats
.MaxDepth
== 4)
4926 NumScopsDepthFour
++;
4927 else if (Stats
.MaxDepth
== 5)
4928 NumScopsDepthFive
++;
4930 NumScopsDepthLarger
++;
4933 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
4934 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
4936 if (!SD
.isMaxRegionInScop(*R
))
4939 Function
*F
= R
->getEntry()->getParent();
4940 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
4941 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
4942 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
4943 auto const &DL
= F
->getParent()->getDataLayout();
4944 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
4945 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
4947 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
);
4948 S
= SB
.getScop(); // take ownership of scop object
4951 ScopDetection::LoopStats Stats
=
4952 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
4953 updateLoopCountStatistic(Stats
);
4959 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
4963 OS
<< "Invalid Scop!\n";
4966 char ScopInfoRegionPass::ID
= 0;
4968 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
4970 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
4971 "Polly - Create polyhedral description of Scops", false,
4973 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
4974 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
4975 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
4976 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
4977 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
4978 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
4979 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
4980 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
4981 "Polly - Create polyhedral description of Scops", false,
4984 //===----------------------------------------------------------------------===//
4985 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
4986 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
4987 AssumptionCache
&AC
) {
4988 /// Create polyhedral description of scops for all the valid regions of a
4990 for (auto &It
: SD
) {
4991 Region
*R
= const_cast<Region
*>(It
);
4992 if (!SD
.isMaxRegionInScop(*R
))
4995 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
);
4996 std::unique_ptr
<Scop
> S
= SB
.getScop();
4999 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5000 assert(Inserted
&& "Building Scop for the same region twice!");
5005 AnalysisKey
ScopInfoAnalysis::Key
;
5007 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5008 FunctionAnalysisManager
&FAM
) {
5009 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5010 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5011 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5012 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5013 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5014 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5015 auto &DL
= F
.getParent()->getDataLayout();
5016 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
};
5019 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5020 FunctionAnalysisManager
&FAM
) {
5021 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5022 for (auto &It
: SI
) {
5024 It
.second
->print(Stream
);
5026 Stream
<< "Invalid Scop!\n";
5028 return PreservedAnalyses::all();
5031 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5032 AU
.addRequired
<LoopInfoWrapperPass
>();
5033 AU
.addRequired
<RegionInfoPass
>();
5034 AU
.addRequired
<DominatorTreeWrapperPass
>();
5035 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5036 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5037 AU
.addRequired
<AAResultsWrapperPass
>();
5038 AU
.addRequired
<AssumptionCacheTracker
>();
5039 AU
.setPreservesAll();
5042 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5043 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5044 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5045 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5046 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5047 auto const &DL
= F
.getParent()->getDataLayout();
5048 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5049 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5051 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
});
5055 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5056 for (auto &It
: *Result
) {
5058 It
.second
->print(OS
);
5060 OS
<< "Invalid Scop!\n";
5064 char ScopInfoWrapperPass::ID
= 0;
5066 Pass
*polly::createScopInfoWrapperPassPass() {
5067 return new ScopInfoWrapperPass();
5070 INITIALIZE_PASS_BEGIN(
5071 ScopInfoWrapperPass
, "polly-function-scops",
5072 "Polly - Create polyhedral description of all Scops of a function", false,
5074 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5075 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5076 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5077 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5078 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5079 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5080 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5081 INITIALIZE_PASS_END(
5082 ScopInfoWrapperPass
, "polly-function-scops",
5083 "Polly - Create polyhedral description of all Scops of a function", false,