1 //===--------- ScopInfo.cpp ----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/Support/GICHelper.h"
25 #include "polly/Support/SCEVValidator.h"
26 #include "polly/Support/ScopHelper.h"
27 #include "llvm/ADT/DepthFirstIterator.h"
28 #include "llvm/ADT/MapVector.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SetVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Analysis/AliasAnalysis.h"
35 #include "llvm/Analysis/AssumptionCache.h"
36 #include "llvm/Analysis/Loads.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/LoopIterator.h"
39 #include "llvm/Analysis/RegionIterator.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/Support/Debug.h"
44 #include "isl/constraint.h"
45 #include "isl/local_space.h"
47 #include "isl/options.h"
48 #include "isl/printer.h"
49 #include "isl/schedule.h"
50 #include "isl/schedule_node.h"
52 #include "isl/union_map.h"
53 #include "isl/union_set.h"
60 using namespace polly
;
62 #define DEBUG_TYPE "polly-scops"
64 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
65 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
66 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
67 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
68 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
69 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
70 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
71 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
72 STATISTIC(AssumptionsInvariantLoad
,
73 "Number of invariant loads assumptions taken.");
74 STATISTIC(AssumptionsDelinearization
,
75 "Number of delinearization assumptions taken.");
77 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
78 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
79 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
80 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
81 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
82 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
83 STATISTIC(NumScopsDepthLarger
,
84 "Number of scops with maximal loop depth 6 and larger");
85 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
87 // The maximal number of basic sets we allow during domain construction to
88 // be created. More complex scops will result in very high compile time and
89 // are also unlikely to result in good code
90 static int const MaxDisjunctsInDomain
= 20;
92 // The number of disjunct in the context after which we stop to add more
93 // disjuncts. This parameter is there to avoid exponential growth in the
94 // number of disjunct when adding non-convex sets to the context.
95 static int const MaxDisjunctsInContext
= 4;
97 static cl::opt
<bool> PollyRemarksMinimal(
98 "polly-remarks-minimal",
99 cl::desc("Do not emit remarks about assumptions that are known"),
100 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
102 // Multiplicative reductions can be disabled separately as these kind of
103 // operations can overflow easily. Additive reductions and bit operations
104 // are in contrast pretty stable.
105 static cl::opt
<bool> DisableMultiplicativeReductions(
106 "polly-disable-multiplicative-reductions",
107 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
108 cl::init(false), cl::cat(PollyCategory
));
110 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
111 "polly-rtc-max-parameters",
112 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
113 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
115 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
116 "polly-rtc-max-arrays-per-group",
117 cl::desc("The maximal number of arrays to compare in each alias group."),
118 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
120 static cl::opt
<std::string
> UserContextStr(
121 "polly-context", cl::value_desc("isl parameter set"),
122 cl::desc("Provide additional constraints on the context parameters"),
123 cl::init(""), cl::cat(PollyCategory
));
125 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
126 cl::desc("Detect and exploit reductions"),
127 cl::Hidden
, cl::ZeroOrMore
,
128 cl::init(true), cl::cat(PollyCategory
));
131 IslOnErrorAbort("polly-on-isl-error-abort",
132 cl::desc("Abort if an isl error is encountered"),
133 cl::init(true), cl::cat(PollyCategory
));
135 static cl::opt
<bool> PollyPreciseInbounds(
136 "polly-precise-inbounds",
137 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
138 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
141 PollyIgnoreInbounds("polly-ignore-inbounds",
142 cl::desc("Do not take inbounds assumptions at all"),
143 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
145 static cl::opt
<bool> PollyIgnoreParamBounds(
146 "polly-ignore-parameter-bounds",
148 "Do not add parameter bounds and do no gist simplify sets accordingly"),
149 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
151 static cl::opt
<bool> PollyPreciseFoldAccesses(
152 "polly-precise-fold-accesses",
153 cl::desc("Fold memory accesses to model more possible delinearizations "
154 "(does not scale well)"),
155 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
157 bool polly::UseInstructionNames
;
158 static cl::opt
<bool, true> XUseInstructionNames(
159 "polly-use-llvm-names",
160 cl::desc("Use LLVM-IR names when deriving statement names"),
161 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
162 cl::ZeroOrMore
, cl::cat(PollyCategory
));
164 //===----------------------------------------------------------------------===//
166 // Create a sequence of two schedules. Either argument may be null and is
167 // interpreted as the empty schedule. Can also return null if both schedules are
169 static __isl_give isl_schedule
*
170 combineInSequence(__isl_take isl_schedule
*Prev
,
171 __isl_take isl_schedule
*Succ
) {
177 return isl_schedule_sequence(Prev
, Succ
);
180 static __isl_give isl_set
*addRangeBoundsToSet(__isl_take isl_set
*S
,
181 const ConstantRange
&Range
,
183 enum isl_dim_type type
) {
185 isl_ctx
*Ctx
= isl_set_get_ctx(S
);
187 // The upper and lower bound for a parameter value is derived either from
188 // the data type of the parameter or from the - possibly more restrictive -
190 V
= isl_valFromAPInt(Ctx
, Range
.getSignedMin(), true);
191 S
= isl_set_lower_bound_val(S
, type
, dim
, V
);
192 V
= isl_valFromAPInt(Ctx
, Range
.getSignedMax(), true);
193 S
= isl_set_upper_bound_val(S
, type
, dim
, V
);
195 if (Range
.isFullSet())
198 if (isl_set_n_basic_set(S
) > MaxDisjunctsInContext
)
201 // In case of signed wrapping, we can refine the set of valid values by
202 // excluding the part not covered by the wrapping range.
203 if (Range
.isSignWrappedSet()) {
204 V
= isl_valFromAPInt(Ctx
, Range
.getLower(), true);
205 isl_set
*SLB
= isl_set_lower_bound_val(isl_set_copy(S
), type
, dim
, V
);
207 V
= isl_valFromAPInt(Ctx
, Range
.getUpper(), true);
208 V
= isl_val_sub_ui(V
, 1);
209 isl_set
*SUB
= isl_set_upper_bound_val(S
, type
, dim
, V
);
210 S
= isl_set_union(SLB
, SUB
);
216 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
217 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
221 if (!S
->contains(BasePtrLI
))
224 ScalarEvolution
&SE
= *S
->getSE();
226 auto *OriginBaseSCEV
=
227 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
231 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
232 if (!OriginBaseSCEVUnknown
)
235 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
239 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl_ctx
*Ctx
,
240 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
241 const DataLayout
&DL
, Scop
*S
,
242 const char *BaseName
)
243 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
244 std::string BasePtrName
=
246 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
247 Kind
== MemoryKind::PHI
? "__phi" : "",
248 UseInstructionNames
);
249 Id
= isl_id_alloc(Ctx
, BasePtrName
.c_str(), this);
253 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
254 BasePtrOriginSAI
= nullptr;
258 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
259 if (BasePtrOriginSAI
)
260 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
263 __isl_give isl_space
*ScopArrayInfo::getSpace() const {
265 isl_space_set_alloc(isl_id_get_ctx(Id
), 0, getNumberOfDimensions());
266 Space
= isl_space_set_tuple_id(Space
, isl_dim_set
, isl_id_copy(Id
));
270 bool ScopArrayInfo::isReadOnly() {
271 isl_union_set
*WriteSet
= isl_union_map_range(S
.getWrites());
272 isl_space
*Space
= getSpace();
273 WriteSet
= isl_union_set_intersect(
274 WriteSet
, isl_union_set_from_set(isl_set_universe(Space
)));
276 bool IsReadOnly
= isl_union_set_is_empty(WriteSet
);
277 isl_union_set_free(WriteSet
);
282 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
283 if (Array
->getElementType() != getElementType())
286 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
289 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
290 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
296 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
297 if (NewElementType
== ElementType
)
300 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
301 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
303 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
306 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
307 ElementType
= NewElementType
;
309 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
310 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
314 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
315 bool CheckConsistency
) {
316 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
317 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
318 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
320 if (CheckConsistency
) {
321 for (int i
= 0; i
< SharedDims
; i
++) {
322 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
323 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
324 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
328 if (DimensionSizes
.size() >= NewSizes
.size())
332 DimensionSizes
.clear();
333 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
335 for (isl_pw_aff
*Size
: DimensionSizesPw
)
336 isl_pw_aff_free(Size
);
337 DimensionSizesPw
.clear();
338 for (const SCEV
*Expr
: DimensionSizes
) {
340 DimensionSizesPw
.push_back(nullptr);
343 isl_pw_aff
*Size
= S
.getPwAffOnly(Expr
);
344 DimensionSizesPw
.push_back(Size
);
349 ScopArrayInfo::~ScopArrayInfo() {
351 for (isl_pw_aff
*Size
: DimensionSizesPw
)
352 isl_pw_aff_free(Size
);
355 std::string
ScopArrayInfo::getName() const { return isl_id_get_name(Id
); }
357 int ScopArrayInfo::getElemSizeInBytes() const {
358 return DL
.getTypeAllocSize(ElementType
);
361 __isl_give isl_id
*ScopArrayInfo::getBasePtrId() const {
362 return isl_id_copy(Id
);
365 void ScopArrayInfo::dump() const { print(errs()); }
367 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
368 OS
.indent(8) << *getElementType() << " " << getName();
370 if (getNumberOfDimensions() > 0 && !getDimensionSize(0)) {
374 for (; u
< getNumberOfDimensions(); u
++) {
378 auto *Size
= getDimensionSizePw(u
);
379 OS
<< " " << Size
<< " ";
380 isl_pw_aff_free(Size
);
382 OS
<< *getDimensionSize(u
);
390 if (BasePtrOriginSAI
)
391 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
393 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
396 const ScopArrayInfo
*
397 ScopArrayInfo::getFromAccessFunction(__isl_keep isl_pw_multi_aff
*PMA
) {
398 isl_id
*Id
= isl_pw_multi_aff_get_tuple_id(PMA
, isl_dim_out
);
399 assert(Id
&& "Output dimension didn't have an ID");
400 return getFromId(Id
);
403 const ScopArrayInfo
*ScopArrayInfo::getFromId(__isl_take isl_id
*Id
) {
404 void *User
= isl_id_get_user(Id
);
405 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
410 void MemoryAccess::wrapConstantDimensions() {
411 auto *SAI
= getScopArrayInfo();
412 auto *ArraySpace
= SAI
->getSpace();
413 auto *Ctx
= isl_space_get_ctx(ArraySpace
);
414 unsigned DimsArray
= SAI
->getNumberOfDimensions();
416 auto *DivModAff
= isl_multi_aff_identity(isl_space_map_from_domain_and_range(
417 isl_space_copy(ArraySpace
), isl_space_copy(ArraySpace
)));
418 auto *LArraySpace
= isl_local_space_from_space(ArraySpace
);
420 // Begin with last dimension, to iteratively carry into higher dimensions.
421 for (int i
= DimsArray
- 1; i
> 0; i
--) {
422 auto *DimSize
= SAI
->getDimensionSize(i
);
423 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
425 // This transformation is not applicable to dimensions with dynamic size.
429 // This transformation is not applicable to dimensions of size zero.
430 if (DimSize
->isZero())
433 auto *DimSizeVal
= isl_valFromAPInt(Ctx
, DimSizeCst
->getAPInt(), false);
434 auto *Var
= isl_aff_var_on_domain(isl_local_space_copy(LArraySpace
),
436 auto *PrevVar
= isl_aff_var_on_domain(isl_local_space_copy(LArraySpace
),
439 // Compute: index % size
440 // Modulo must apply in the divide of the previous iteration, if any.
441 auto *Modulo
= isl_aff_copy(Var
);
442 Modulo
= isl_aff_mod_val(Modulo
, isl_val_copy(DimSizeVal
));
443 Modulo
= isl_aff_pullback_multi_aff(Modulo
, isl_multi_aff_copy(DivModAff
));
445 // Compute: floor(index / size)
447 Divide
= isl_aff_div(
449 isl_aff_val_on_domain(isl_local_space_copy(LArraySpace
), DimSizeVal
));
450 Divide
= isl_aff_floor(Divide
);
451 Divide
= isl_aff_add(Divide
, PrevVar
);
452 Divide
= isl_aff_pullback_multi_aff(Divide
, isl_multi_aff_copy(DivModAff
));
454 // Apply Modulo and Divide.
455 DivModAff
= isl_multi_aff_set_aff(DivModAff
, i
, Modulo
);
456 DivModAff
= isl_multi_aff_set_aff(DivModAff
, i
- 1, Divide
);
459 // Apply all modulo/divides on the accesses.
461 isl_map_apply_range(AccessRelation
, isl_map_from_multi_aff(DivModAff
));
462 AccessRelation
= isl_map_detect_equalities(AccessRelation
);
463 isl_local_space_free(LArraySpace
);
466 void MemoryAccess::updateDimensionality() {
467 auto *SAI
= getScopArrayInfo();
468 auto *ArraySpace
= SAI
->getSpace();
469 auto *AccessSpace
= isl_space_range(isl_map_get_space(AccessRelation
));
470 auto *Ctx
= isl_space_get_ctx(AccessSpace
);
472 auto DimsArray
= isl_space_dim(ArraySpace
, isl_dim_set
);
473 auto DimsAccess
= isl_space_dim(AccessSpace
, isl_dim_set
);
474 auto DimsMissing
= DimsArray
- DimsAccess
;
476 auto *BB
= getStatement()->getEntryBlock();
477 auto &DL
= BB
->getModule()->getDataLayout();
478 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
479 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
481 auto *Map
= isl_map_from_domain_and_range(
482 isl_set_universe(AccessSpace
),
483 isl_set_universe(isl_space_copy(ArraySpace
)));
485 for (unsigned i
= 0; i
< DimsMissing
; i
++)
486 Map
= isl_map_fix_si(Map
, isl_dim_out
, i
, 0);
488 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
489 Map
= isl_map_equate(Map
, isl_dim_in
, i
- DimsMissing
, isl_dim_out
, i
);
491 AccessRelation
= isl_map_apply_range(AccessRelation
, Map
);
493 // For the non delinearized arrays, divide the access function of the last
494 // subscript by the size of the elements in the array.
496 // A stride one array access in C expressed as A[i] is expressed in
497 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
498 // two subsequent values of 'i' index two values that are stored next to
499 // each other in memory. By this division we make this characteristic
500 // obvious again. If the base pointer was accessed with offsets not divisible
501 // by the accesses element size, we will have chosen a smaller ArrayElemSize
502 // that divides the offsets of all accesses to this base pointer.
503 if (DimsAccess
== 1) {
504 isl_val
*V
= isl_val_int_from_si(Ctx
, ArrayElemSize
);
505 AccessRelation
= isl_map_floordiv_val(AccessRelation
, V
);
508 // We currently do this only if we added at least one dimension, which means
509 // some dimension's indices have not been specified, an indicator that some
510 // index values have been added together.
511 // TODO: Investigate general usefulness; Effect on unit tests is to make index
512 // expressions more complicated.
514 wrapConstantDimensions();
517 computeBoundsOnAccessRelation(ArrayElemSize
);
519 // Introduce multi-element accesses in case the type loaded by this memory
520 // access is larger than the canonical element type of the array.
522 // An access ((float *)A)[i] to an array char *A is modeled as
523 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
524 if (ElemBytes
> ArrayElemSize
) {
525 assert(ElemBytes
% ArrayElemSize
== 0 &&
526 "Loaded element size should be multiple of canonical element size");
527 auto *Map
= isl_map_from_domain_and_range(
528 isl_set_universe(isl_space_copy(ArraySpace
)),
529 isl_set_universe(isl_space_copy(ArraySpace
)));
530 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
531 Map
= isl_map_equate(Map
, isl_dim_in
, i
, isl_dim_out
, i
);
536 LS
= isl_local_space_from_space(isl_map_get_space(Map
));
537 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
539 C
= isl_constraint_alloc_inequality(isl_local_space_copy(LS
));
540 C
= isl_constraint_set_constant_val(C
, isl_val_int_from_si(Ctx
, Num
- 1));
541 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, DimsArray
- 1, 1);
542 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, DimsArray
- 1, -1);
543 Map
= isl_map_add_constraint(Map
, C
);
545 C
= isl_constraint_alloc_inequality(LS
);
546 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, DimsArray
- 1, -1);
547 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, DimsArray
- 1, 1);
548 C
= isl_constraint_set_constant_val(C
, isl_val_int_from_si(Ctx
, 0));
549 Map
= isl_map_add_constraint(Map
, C
);
550 AccessRelation
= isl_map_apply_range(AccessRelation
, Map
);
553 isl_space_free(ArraySpace
);
557 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
559 case MemoryAccess::RT_NONE
:
560 llvm_unreachable("Requested a reduction operator string for a memory "
561 "access which isn't a reduction");
562 case MemoryAccess::RT_ADD
:
564 case MemoryAccess::RT_MUL
:
566 case MemoryAccess::RT_BOR
:
568 case MemoryAccess::RT_BXOR
:
570 case MemoryAccess::RT_BAND
:
573 llvm_unreachable("Unknown reduction type");
577 /// Return the reduction type for a given binary operator.
578 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
579 const Instruction
*Load
) {
581 return MemoryAccess::RT_NONE
;
582 switch (BinOp
->getOpcode()) {
583 case Instruction::FAdd
:
584 if (!BinOp
->hasUnsafeAlgebra())
585 return MemoryAccess::RT_NONE
;
587 case Instruction::Add
:
588 return MemoryAccess::RT_ADD
;
589 case Instruction::Or
:
590 return MemoryAccess::RT_BOR
;
591 case Instruction::Xor
:
592 return MemoryAccess::RT_BXOR
;
593 case Instruction::And
:
594 return MemoryAccess::RT_BAND
;
595 case Instruction::FMul
:
596 if (!BinOp
->hasUnsafeAlgebra())
597 return MemoryAccess::RT_NONE
;
599 case Instruction::Mul
:
600 if (DisableMultiplicativeReductions
)
601 return MemoryAccess::RT_NONE
;
602 return MemoryAccess::RT_MUL
;
604 return MemoryAccess::RT_NONE
;
608 MemoryAccess::~MemoryAccess() {
610 isl_set_free(InvalidDomain
);
611 isl_map_free(AccessRelation
);
612 isl_map_free(NewAccessRelation
);
615 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
616 isl_id
*ArrayId
= getArrayId();
617 void *User
= isl_id_get_user(ArrayId
);
618 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
619 isl_id_free(ArrayId
);
623 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
624 isl_id
*ArrayId
= getLatestArrayId();
625 void *User
= isl_id_get_user(ArrayId
);
626 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
627 isl_id_free(ArrayId
);
631 __isl_give isl_id
*MemoryAccess::getOriginalArrayId() const {
632 return isl_map_get_tuple_id(AccessRelation
, isl_dim_out
);
635 __isl_give isl_id
*MemoryAccess::getLatestArrayId() const {
636 if (!hasNewAccessRelation())
637 return getOriginalArrayId();
638 return isl_map_get_tuple_id(NewAccessRelation
, isl_dim_out
);
641 __isl_give isl_map
*MemoryAccess::getAddressFunction() const {
642 return isl_map_lexmin(getAccessRelation());
645 __isl_give isl_pw_multi_aff
*MemoryAccess::applyScheduleToAccessRelation(
646 __isl_take isl_union_map
*USchedule
) const {
647 isl_map
*Schedule
, *ScheduledAccRel
;
648 isl_union_set
*UDomain
;
650 UDomain
= isl_union_set_from_set(getStatement()->getDomain());
651 USchedule
= isl_union_map_intersect_domain(USchedule
, UDomain
);
652 Schedule
= isl_map_from_union_map(USchedule
);
653 ScheduledAccRel
= isl_map_apply_domain(getAddressFunction(), Schedule
);
654 return isl_pw_multi_aff_from_map(ScheduledAccRel
);
657 __isl_give isl_map
*MemoryAccess::getOriginalAccessRelation() const {
658 return isl_map_copy(AccessRelation
);
661 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
662 return stringFromIslObj(AccessRelation
);
665 __isl_give isl_space
*MemoryAccess::getOriginalAccessRelationSpace() const {
666 return isl_map_get_space(AccessRelation
);
669 __isl_give isl_map
*MemoryAccess::getNewAccessRelation() const {
670 return isl_map_copy(NewAccessRelation
);
673 std::string
MemoryAccess::getNewAccessRelationStr() const {
674 return stringFromIslObj(NewAccessRelation
);
677 __isl_give isl_basic_map
*
678 MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
679 isl_space
*Space
= isl_space_set_alloc(Statement
->getIslCtx(), 0, 1);
680 Space
= isl_space_align_params(Space
, Statement
->getDomainSpace());
682 return isl_basic_map_from_domain_and_range(
683 isl_basic_set_universe(Statement
->getDomainSpace()),
684 isl_basic_set_universe(Space
));
687 // Formalize no out-of-bound access assumption
689 // When delinearizing array accesses we optimistically assume that the
690 // delinearized accesses do not access out of bound locations (the subscript
691 // expression of each array evaluates for each statement instance that is
692 // executed to a value that is larger than zero and strictly smaller than the
693 // size of the corresponding dimension). The only exception is the outermost
694 // dimension for which we do not need to assume any upper bound. At this point
695 // we formalize this assumption to ensure that at code generation time the
696 // relevant run-time checks can be generated.
698 // To find the set of constraints necessary to avoid out of bound accesses, we
699 // first build the set of data locations that are not within array bounds. We
700 // then apply the reverse access relation to obtain the set of iterations that
701 // may contain invalid accesses and reduce this set of iterations to the ones
702 // that are actually executed by intersecting them with the domain of the
703 // statement. If we now project out all loop dimensions, we obtain a set of
704 // parameters that may cause statement instances to be executed that may
705 // possibly yield out of bound memory accesses. The complement of these
706 // constraints is the set of constraints that needs to be assumed to ensure such
707 // statement instances are never executed.
708 void MemoryAccess::assumeNoOutOfBound() {
709 if (PollyIgnoreInbounds
)
711 auto *SAI
= getScopArrayInfo();
712 isl_space
*Space
= isl_space_range(getOriginalAccessRelationSpace());
713 isl_set
*Outside
= isl_set_empty(isl_space_copy(Space
));
714 for (int i
= 1, Size
= isl_space_dim(Space
, isl_dim_set
); i
< Size
; ++i
) {
715 isl_local_space
*LS
= isl_local_space_from_space(isl_space_copy(Space
));
717 isl_pw_aff_var_on_domain(isl_local_space_copy(LS
), isl_dim_set
, i
);
718 isl_pw_aff
*Zero
= isl_pw_aff_zero_on_domain(LS
);
722 DimOutside
= isl_pw_aff_lt_set(isl_pw_aff_copy(Var
), Zero
);
723 isl_pw_aff
*SizeE
= SAI
->getDimensionSizePw(i
);
724 SizeE
= isl_pw_aff_add_dims(SizeE
, isl_dim_in
,
725 isl_space_dim(Space
, isl_dim_set
));
726 SizeE
= isl_pw_aff_set_tuple_id(SizeE
, isl_dim_in
,
727 isl_space_get_tuple_id(Space
, isl_dim_set
));
729 DimOutside
= isl_set_union(DimOutside
, isl_pw_aff_le_set(SizeE
, Var
));
731 Outside
= isl_set_union(Outside
, DimOutside
);
734 Outside
= isl_set_apply(Outside
, isl_map_reverse(getAccessRelation()));
735 Outside
= isl_set_intersect(Outside
, Statement
->getDomain());
736 Outside
= isl_set_params(Outside
);
738 // Remove divs to avoid the construction of overly complicated assumptions.
739 // Doing so increases the set of parameter combinations that are assumed to
740 // not appear. This is always save, but may make the resulting run-time check
741 // bail out more often than strictly necessary.
742 Outside
= isl_set_remove_divs(Outside
);
743 Outside
= isl_set_complement(Outside
);
744 const auto &Loc
= getAccessInstruction()
745 ? getAccessInstruction()->getDebugLoc()
747 if (!PollyPreciseInbounds
)
748 Outside
= isl_set_gist(Outside
, isl_set_params(Statement
->getDomain()));
749 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
, Loc
,
751 isl_space_free(Space
);
754 void MemoryAccess::buildMemIntrinsicAccessRelation() {
755 assert(isMemoryIntrinsic());
756 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
758 auto *SubscriptPWA
= getPwAff(Subscripts
[0]);
759 auto *SubscriptMap
= isl_map_from_pw_aff(SubscriptPWA
);
762 if (Subscripts
[1] == nullptr) {
763 LengthMap
= isl_map_universe(isl_map_get_space(SubscriptMap
));
765 auto *LengthPWA
= getPwAff(Subscripts
[1]);
766 LengthMap
= isl_map_from_pw_aff(LengthPWA
);
767 auto *RangeSpace
= isl_space_range(isl_map_get_space(LengthMap
));
768 LengthMap
= isl_map_apply_range(LengthMap
, isl_map_lex_gt(RangeSpace
));
770 LengthMap
= isl_map_lower_bound_si(LengthMap
, isl_dim_out
, 0, 0);
771 LengthMap
= isl_map_align_params(LengthMap
, isl_map_get_space(SubscriptMap
));
773 isl_map_align_params(SubscriptMap
, isl_map_get_space(LengthMap
));
774 LengthMap
= isl_map_sum(LengthMap
, SubscriptMap
);
775 AccessRelation
= isl_map_set_tuple_id(LengthMap
, isl_dim_in
,
776 getStatement()->getDomainId());
779 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
780 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
782 auto MAI
= MemAccInst(getAccessInstruction());
783 if (isa
<MemIntrinsic
>(MAI
))
786 Value
*Ptr
= MAI
.getPointerOperand();
787 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
790 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
791 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
794 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
795 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
796 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
798 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
799 if (Range
.isFullSet())
802 if (Range
.isWrappedSet() | Range
.isSignWrappedSet())
805 bool isWrapping
= Range
.isSignWrappedSet();
807 unsigned BW
= Range
.getBitWidth();
808 const auto One
= APInt(BW
, 1);
809 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
810 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
812 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
813 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
815 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
817 isl_set
*AccessRange
= isl_map_range(isl_map_copy(AccessRelation
));
819 addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0, isl_dim_set
);
820 AccessRelation
= isl_map_intersect_range(AccessRelation
, AccessRange
);
823 void MemoryAccess::foldAccessRelation() {
824 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
827 int Size
= Subscripts
.size();
829 isl_map
*OldAccessRelation
= isl_map_copy(AccessRelation
);
831 for (int i
= Size
- 2; i
>= 0; --i
) {
833 isl_map
*MapOne
, *MapTwo
;
834 isl_pw_aff
*DimSize
= getPwAff(Sizes
[i
+ 1]);
836 isl_space
*SpaceSize
= isl_pw_aff_get_space(DimSize
);
837 isl_pw_aff_free(DimSize
);
838 isl_id
*ParamId
= isl_space_get_dim_id(SpaceSize
, isl_dim_param
, 0);
840 Space
= isl_map_get_space(AccessRelation
);
841 Space
= isl_space_map_from_set(isl_space_range(Space
));
842 Space
= isl_space_align_params(Space
, SpaceSize
);
844 int ParamLocation
= isl_space_find_dim_by_id(Space
, isl_dim_param
, ParamId
);
845 isl_id_free(ParamId
);
847 MapOne
= isl_map_universe(isl_space_copy(Space
));
848 for (int j
= 0; j
< Size
; ++j
)
849 MapOne
= isl_map_equate(MapOne
, isl_dim_in
, j
, isl_dim_out
, j
);
850 MapOne
= isl_map_lower_bound_si(MapOne
, isl_dim_in
, i
+ 1, 0);
852 MapTwo
= isl_map_universe(isl_space_copy(Space
));
853 for (int j
= 0; j
< Size
; ++j
)
854 if (j
< i
|| j
> i
+ 1)
855 MapTwo
= isl_map_equate(MapTwo
, isl_dim_in
, j
, isl_dim_out
, j
);
857 isl_local_space
*LS
= isl_local_space_from_space(Space
);
859 C
= isl_equality_alloc(isl_local_space_copy(LS
));
860 C
= isl_constraint_set_constant_si(C
, -1);
861 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, 1);
862 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, -1);
863 MapTwo
= isl_map_add_constraint(MapTwo
, C
);
864 C
= isl_equality_alloc(LS
);
865 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
+ 1, 1);
866 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
+ 1, -1);
867 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, ParamLocation
, 1);
868 MapTwo
= isl_map_add_constraint(MapTwo
, C
);
869 MapTwo
= isl_map_upper_bound_si(MapTwo
, isl_dim_in
, i
+ 1, -1);
871 MapOne
= isl_map_union(MapOne
, MapTwo
);
872 AccessRelation
= isl_map_apply_range(AccessRelation
, MapOne
);
875 isl_id
*BaseAddrId
= getScopArrayInfo()->getBasePtrId();
876 auto Space
= Statement
->getDomainSpace();
877 AccessRelation
= isl_map_set_tuple_id(
878 AccessRelation
, isl_dim_in
, isl_space_get_tuple_id(Space
, isl_dim_set
));
880 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
881 AccessRelation
= isl_map_gist_domain(AccessRelation
, Statement
->getDomain());
883 // Access dimension folding might in certain cases increase the number of
884 // disjuncts in the memory access, which can possibly complicate the generated
885 // run-time checks and can lead to costly compilation.
886 if (!PollyPreciseFoldAccesses
&& isl_map_n_basic_map(AccessRelation
) >
887 isl_map_n_basic_map(OldAccessRelation
)) {
888 isl_map_free(AccessRelation
);
889 AccessRelation
= OldAccessRelation
;
891 isl_map_free(OldAccessRelation
);
894 isl_space_free(Space
);
897 /// Check if @p Expr is divisible by @p Size.
898 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
903 // Only one factor needs to be divisible.
904 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
905 for (auto *FactorExpr
: MulExpr
->operands())
906 if (isDivisible(FactorExpr
, Size
, SE
))
911 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
913 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
914 for (auto *OpExpr
: NAryExpr
->operands())
915 if (!isDivisible(OpExpr
, Size
, SE
))
920 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
921 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
922 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
923 return MulSCEV
== Expr
;
926 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
927 assert(!AccessRelation
&& "AccessReltation already built");
929 // Initialize the invalid domain which describes all iterations for which the
930 // access relation is not modeled correctly.
931 auto *StmtInvalidDomain
= getStatement()->getInvalidDomain();
932 InvalidDomain
= isl_set_empty(isl_set_get_space(StmtInvalidDomain
));
933 isl_set_free(StmtInvalidDomain
);
935 isl_ctx
*Ctx
= isl_id_get_ctx(Id
);
936 isl_id
*BaseAddrId
= SAI
->getBasePtrId();
938 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
939 buildMemIntrinsicAccessRelation();
941 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
946 // We overapproximate non-affine accesses with a possible access to the
947 // whole array. For read accesses it does not make a difference, if an
948 // access must or may happen. However, for write accesses it is important to
949 // differentiate between writes that must happen and writes that may happen.
951 AccessRelation
= isl_map_from_basic_map(createBasicAccessMap(Statement
));
954 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
958 isl_space
*Space
= isl_space_alloc(Ctx
, 0, Statement
->getNumIterators(), 0);
959 AccessRelation
= isl_map_universe(Space
);
961 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
962 isl_pw_aff
*Affine
= getPwAff(Subscripts
[i
]);
963 isl_map
*SubscriptMap
= isl_map_from_pw_aff(Affine
);
964 AccessRelation
= isl_map_flat_range_product(AccessRelation
, SubscriptMap
);
967 Space
= Statement
->getDomainSpace();
968 AccessRelation
= isl_map_set_tuple_id(
969 AccessRelation
, isl_dim_in
, isl_space_get_tuple_id(Space
, isl_dim_set
));
971 isl_map_set_tuple_id(AccessRelation
, isl_dim_out
, BaseAddrId
);
973 AccessRelation
= isl_map_gist_domain(AccessRelation
, Statement
->getDomain());
974 isl_space_free(Space
);
977 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
978 AccessType AccType
, Value
*BaseAddress
,
979 Type
*ElementType
, bool Affine
,
980 ArrayRef
<const SCEV
*> Subscripts
,
981 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
983 : Kind(Kind
), AccType(AccType
), RedType(RT_NONE
), Statement(Stmt
),
984 InvalidDomain(nullptr), BaseAddr(BaseAddress
), ElementType(ElementType
),
985 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
986 AccessValue(AccessValue
), IsAffine(Affine
),
987 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
988 NewAccessRelation(nullptr), FAD(nullptr) {
989 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
990 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
992 std::string IdName
= Stmt
->getBaseName() + Access
;
993 Id
= isl_id_alloc(Stmt
->getParent()->getIslCtx(), IdName
.c_str(), this);
996 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
,
997 __isl_take isl_map
*AccRel
)
998 : Kind(MemoryKind::Array
), AccType(AccType
), RedType(RT_NONE
),
999 Statement(Stmt
), InvalidDomain(nullptr), AccessInstruction(nullptr),
1000 IsAffine(true), AccessRelation(nullptr), NewAccessRelation(AccRel
),
1002 auto *ArrayInfoId
= isl_map_get_tuple_id(NewAccessRelation
, isl_dim_out
);
1003 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1004 Sizes
.push_back(nullptr);
1005 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1006 Sizes
.push_back(SAI
->getDimensionSize(i
));
1007 ElementType
= SAI
->getElementType();
1008 BaseAddr
= SAI
->getBasePtr();
1009 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1010 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1012 std::string IdName
= Stmt
->getBaseName() + Access
;
1013 Id
= isl_id_alloc(Stmt
->getParent()->getIslCtx(), IdName
.c_str(), this);
1016 void MemoryAccess::realignParams() {
1017 auto *Ctx
= Statement
->getParent()->getContext();
1018 InvalidDomain
= isl_set_gist_params(InvalidDomain
, isl_set_copy(Ctx
));
1019 AccessRelation
= isl_map_gist_params(AccessRelation
, Ctx
);
1022 const std::string
MemoryAccess::getReductionOperatorStr() const {
1023 return MemoryAccess::getReductionOperatorStr(getReductionType());
1026 __isl_give isl_id
*MemoryAccess::getId() const { return isl_id_copy(Id
); }
1028 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1029 MemoryAccess::ReductionType RT
) {
1030 if (RT
== MemoryAccess::RT_NONE
)
1033 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1037 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1039 void MemoryAccess::print(raw_ostream
&OS
) const {
1042 OS
.indent(12) << "ReadAccess :=\t";
1045 OS
.indent(12) << "MustWriteAccess :=\t";
1048 OS
.indent(12) << "MayWriteAccess :=\t";
1052 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1055 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1059 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1060 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1061 if (hasNewAccessRelation())
1062 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1065 void MemoryAccess::dump() const { print(errs()); }
1067 __isl_give isl_pw_aff
*MemoryAccess::getPwAff(const SCEV
*E
) {
1068 auto *Stmt
= getStatement();
1069 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1070 isl_set
*StmtDom
= isl_set_reset_tuple_id(getStatement()->getDomain());
1071 isl_set
*NewInvalidDom
= isl_set_intersect(StmtDom
, PWAC
.second
);
1072 InvalidDomain
= isl_set_union(InvalidDomain
, NewInvalidDom
);
1076 // Create a map in the size of the provided set domain, that maps from the
1077 // one element of the provided set domain to another element of the provided
1079 // The mapping is limited to all points that are equal in all but the last
1080 // dimension and for which the last dimension of the input is strict smaller
1081 // than the last dimension of the output.
1083 // getEqualAndLarger(set[i0, i1, ..., iX]):
1085 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1086 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1088 static isl_map
*getEqualAndLarger(__isl_take isl_space
*setDomain
) {
1089 isl_space
*Space
= isl_space_map_from_set(setDomain
);
1090 isl_map
*Map
= isl_map_universe(Space
);
1091 unsigned lastDimension
= isl_map_dim(Map
, isl_dim_in
) - 1;
1093 // Set all but the last dimension to be equal for the input and output
1095 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1096 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1097 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1098 Map
= isl_map_equate(Map
, isl_dim_in
, i
, isl_dim_out
, i
);
1100 // Set the last dimension of the input to be strict smaller than the
1101 // last dimension of the output.
1103 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1104 Map
= isl_map_order_lt(Map
, isl_dim_in
, lastDimension
, isl_dim_out
,
1109 __isl_give isl_set
*
1110 MemoryAccess::getStride(__isl_take
const isl_map
*Schedule
) const {
1111 isl_map
*S
= const_cast<isl_map
*>(Schedule
);
1112 isl_map
*AccessRelation
= getAccessRelation();
1113 isl_space
*Space
= isl_space_range(isl_map_get_space(S
));
1114 isl_map
*NextScatt
= getEqualAndLarger(Space
);
1116 S
= isl_map_reverse(S
);
1117 NextScatt
= isl_map_lexmin(NextScatt
);
1119 NextScatt
= isl_map_apply_range(NextScatt
, isl_map_copy(S
));
1120 NextScatt
= isl_map_apply_range(NextScatt
, isl_map_copy(AccessRelation
));
1121 NextScatt
= isl_map_apply_domain(NextScatt
, S
);
1122 NextScatt
= isl_map_apply_domain(NextScatt
, AccessRelation
);
1124 isl_set
*Deltas
= isl_map_deltas(NextScatt
);
1128 bool MemoryAccess::isStrideX(__isl_take
const isl_map
*Schedule
,
1129 int StrideWidth
) const {
1130 isl_set
*Stride
, *StrideX
;
1133 Stride
= getStride(Schedule
);
1134 StrideX
= isl_set_universe(isl_set_get_space(Stride
));
1135 for (unsigned i
= 0; i
< isl_set_dim(StrideX
, isl_dim_set
) - 1; i
++)
1136 StrideX
= isl_set_fix_si(StrideX
, isl_dim_set
, i
, 0);
1137 StrideX
= isl_set_fix_si(StrideX
, isl_dim_set
,
1138 isl_set_dim(StrideX
, isl_dim_set
) - 1, StrideWidth
);
1139 IsStrideX
= isl_set_is_subset(Stride
, StrideX
);
1141 isl_set_free(StrideX
);
1142 isl_set_free(Stride
);
1147 bool MemoryAccess::isStrideZero(__isl_take
const isl_map
*Schedule
) const {
1148 return isStrideX(Schedule
, 0);
1151 bool MemoryAccess::isStrideOne(__isl_take
const isl_map
*Schedule
) const {
1152 return isStrideX(Schedule
, 1);
1155 void MemoryAccess::setAccessRelation(__isl_take isl_map
*NewAccess
) {
1156 isl_map_free(AccessRelation
);
1157 AccessRelation
= NewAccess
;
1160 void MemoryAccess::setNewAccessRelation(__isl_take isl_map
*NewAccess
) {
1164 // Check domain space compatibility.
1165 auto *NewSpace
= isl_map_get_space(NewAccess
);
1166 auto *NewDomainSpace
= isl_space_domain(isl_space_copy(NewSpace
));
1167 auto *OriginalDomainSpace
= getStatement()->getDomainSpace();
1168 assert(isl_space_has_equal_tuples(OriginalDomainSpace
, NewDomainSpace
));
1169 isl_space_free(NewDomainSpace
);
1170 isl_space_free(OriginalDomainSpace
);
1172 // Check whether there is an access for every statement instance.
1173 auto *StmtDomain
= getStatement()->getDomain();
1174 StmtDomain
= isl_set_intersect_params(
1175 StmtDomain
, getStatement()->getParent()->getContext());
1176 auto *NewDomain
= isl_map_domain(isl_map_copy(NewAccess
));
1177 assert(isl_set_is_subset(StmtDomain
, NewDomain
) &&
1178 "Partial accesses not supported");
1179 isl_set_free(NewDomain
);
1180 isl_set_free(StmtDomain
);
1182 auto *NewAccessSpace
= isl_space_range(NewSpace
);
1183 assert(isl_space_has_tuple_id(NewAccessSpace
, isl_dim_set
) &&
1184 "Must specify the array that is accessed");
1185 auto *NewArrayId
= isl_space_get_tuple_id(NewAccessSpace
, isl_dim_set
);
1186 auto *SAI
= static_cast<ScopArrayInfo
*>(isl_id_get_user(NewArrayId
));
1187 assert(SAI
&& "Must set a ScopArrayInfo");
1189 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1190 InvariantEquivClassTy
*EqClass
=
1191 getStatement()->getParent()->lookupInvariantEquivClass(
1194 "Access functions to indirect arrays must have an invariant and "
1195 "hoisted base pointer");
1198 // Check whether access dimensions correspond to number of dimensions of the
1200 auto Dims
= SAI
->getNumberOfDimensions();
1201 assert(isl_space_dim(NewAccessSpace
, isl_dim_set
) == Dims
&&
1202 "Access dims must match array dims");
1203 isl_space_free(NewAccessSpace
);
1204 isl_id_free(NewArrayId
);
1207 isl_map_free(NewAccessRelation
);
1208 NewAccessRelation
= NewAccess
;
1211 //===----------------------------------------------------------------------===//
1213 __isl_give isl_map
*ScopStmt::getSchedule() const {
1214 isl_set
*Domain
= getDomain();
1215 if (isl_set_is_empty(Domain
)) {
1216 isl_set_free(Domain
);
1217 return isl_map_from_aff(
1218 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1220 auto *Schedule
= getParent()->getSchedule();
1222 isl_set_free(Domain
);
1225 Schedule
= isl_union_map_intersect_domain(
1226 Schedule
, isl_union_set_from_set(isl_set_copy(Domain
)));
1227 if (isl_union_map_is_empty(Schedule
)) {
1228 isl_set_free(Domain
);
1229 isl_union_map_free(Schedule
);
1230 return isl_map_from_aff(
1231 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1233 auto *M
= isl_map_from_union_map(Schedule
);
1234 M
= isl_map_coalesce(M
);
1235 M
= isl_map_gist_domain(M
, Domain
);
1236 M
= isl_map_coalesce(M
);
1240 __isl_give isl_pw_aff
*ScopStmt::getPwAff(const SCEV
*E
, bool NonNegative
) {
1241 PWACtx PWAC
= getParent()->getPwAff(E
, getEntryBlock(), NonNegative
);
1242 InvalidDomain
= isl_set_union(InvalidDomain
, PWAC
.second
);
1246 void ScopStmt::restrictDomain(__isl_take isl_set
*NewDomain
) {
1247 assert(isl_set_is_subset(NewDomain
, Domain
) &&
1248 "New domain is not a subset of old domain!");
1249 isl_set_free(Domain
);
1253 void ScopStmt::buildAccessRelations() {
1254 Scop
&S
= *getParent();
1255 for (MemoryAccess
*Access
: MemAccs
) {
1256 Type
*ElementType
= Access
->getElementType();
1259 if (Access
->isPHIKind())
1260 Ty
= MemoryKind::PHI
;
1261 else if (Access
->isExitPHIKind())
1262 Ty
= MemoryKind::ExitPHI
;
1263 else if (Access
->isValueKind())
1264 Ty
= MemoryKind::Value
;
1266 Ty
= MemoryKind::Array
;
1268 auto *SAI
= S
.getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1269 ElementType
, Access
->Sizes
, Ty
);
1270 Access
->buildAccessRelation(SAI
);
1274 MemoryAccess
*ScopStmt::lookupPHIReadOf(PHINode
*PHI
) const {
1275 for (auto *MA
: *this) {
1278 if (!MA
->isLatestAnyPHIKind())
1281 if (MA
->getAccessInstruction() == PHI
)
1287 void ScopStmt::addAccess(MemoryAccess
*Access
) {
1288 Instruction
*AccessInst
= Access
->getAccessInstruction();
1290 if (Access
->isArrayKind()) {
1291 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1292 MAL
.emplace_front(Access
);
1293 } else if (Access
->isValueKind() && Access
->isWrite()) {
1294 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1295 assert(Parent
.getStmtFor(AccessVal
) == this);
1296 assert(!ValueWrites
.lookup(AccessVal
));
1298 ValueWrites
[AccessVal
] = Access
;
1299 } else if (Access
->isValueKind() && Access
->isRead()) {
1300 Value
*AccessVal
= Access
->getAccessValue();
1301 assert(!ValueReads
.lookup(AccessVal
));
1303 ValueReads
[AccessVal
] = Access
;
1304 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1305 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1306 assert(!PHIWrites
.lookup(PHI
));
1308 PHIWrites
[PHI
] = Access
;
1311 MemAccs
.push_back(Access
);
1314 void ScopStmt::realignParams() {
1315 for (MemoryAccess
*MA
: *this)
1316 MA
->realignParams();
1318 auto *Ctx
= Parent
.getContext();
1319 InvalidDomain
= isl_set_gist_params(InvalidDomain
, isl_set_copy(Ctx
));
1320 Domain
= isl_set_gist_params(Domain
, Ctx
);
1323 /// Add @p BSet to the set @p User if @p BSet is bounded.
1324 static isl_stat
collectBoundedParts(__isl_take isl_basic_set
*BSet
,
1326 isl_set
**BoundedParts
= static_cast<isl_set
**>(User
);
1327 if (isl_basic_set_is_bounded(BSet
))
1328 *BoundedParts
= isl_set_union(*BoundedParts
, isl_set_from_basic_set(BSet
));
1330 isl_basic_set_free(BSet
);
1334 /// Return the bounded parts of @p S.
1335 static __isl_give isl_set
*collectBoundedParts(__isl_take isl_set
*S
) {
1336 isl_set
*BoundedParts
= isl_set_empty(isl_set_get_space(S
));
1337 isl_set_foreach_basic_set(S
, collectBoundedParts
, &BoundedParts
);
1339 return BoundedParts
;
1342 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1344 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1345 /// both with regards to the dimension @p Dim.
1346 static std::pair
<__isl_give isl_set
*, __isl_give isl_set
*>
1347 partitionSetParts(__isl_take isl_set
*S
, unsigned Dim
) {
1349 for (unsigned u
= 0, e
= isl_set_n_dim(S
); u
< e
; u
++)
1350 S
= isl_set_lower_bound_si(S
, isl_dim_set
, u
, 0);
1352 unsigned NumDimsS
= isl_set_n_dim(S
);
1353 isl_set
*OnlyDimS
= isl_set_copy(S
);
1355 // Remove dimensions that are greater than Dim as they are not interesting.
1356 assert(NumDimsS
>= Dim
+ 1);
1358 isl_set_project_out(OnlyDimS
, isl_dim_set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1360 // Create artificial parametric upper bounds for dimensions smaller than Dim
1361 // as we are not interested in them.
1362 OnlyDimS
= isl_set_insert_dims(OnlyDimS
, isl_dim_param
, 0, Dim
);
1363 for (unsigned u
= 0; u
< Dim
; u
++) {
1364 isl_constraint
*C
= isl_inequality_alloc(
1365 isl_local_space_from_space(isl_set_get_space(OnlyDimS
)));
1366 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, u
, 1);
1367 C
= isl_constraint_set_coefficient_si(C
, isl_dim_set
, u
, -1);
1368 OnlyDimS
= isl_set_add_constraint(OnlyDimS
, C
);
1371 // Collect all bounded parts of OnlyDimS.
1372 isl_set
*BoundedParts
= collectBoundedParts(OnlyDimS
);
1374 // Create the dimensions greater than Dim again.
1375 BoundedParts
= isl_set_insert_dims(BoundedParts
, isl_dim_set
, Dim
+ 1,
1376 NumDimsS
- Dim
- 1);
1378 // Remove the artificial upper bound parameters again.
1379 BoundedParts
= isl_set_remove_dims(BoundedParts
, isl_dim_param
, 0, Dim
);
1381 isl_set
*UnboundedParts
= isl_set_subtract(S
, isl_set_copy(BoundedParts
));
1382 return std::make_pair(UnboundedParts
, BoundedParts
);
1385 /// Set the dimension Ids from @p From in @p To.
1386 static __isl_give isl_set
*setDimensionIds(__isl_keep isl_set
*From
,
1387 __isl_take isl_set
*To
) {
1388 for (unsigned u
= 0, e
= isl_set_n_dim(From
); u
< e
; u
++) {
1389 isl_id
*DimId
= isl_set_get_dim_id(From
, isl_dim_set
, u
);
1390 To
= isl_set_set_dim_id(To
, isl_dim_set
, u
, DimId
);
1395 /// Create the conditions under which @p L @p Pred @p R is true.
1396 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1397 __isl_take isl_pw_aff
*L
,
1398 __isl_take isl_pw_aff
*R
) {
1400 case ICmpInst::ICMP_EQ
:
1401 return isl_pw_aff_eq_set(L
, R
);
1402 case ICmpInst::ICMP_NE
:
1403 return isl_pw_aff_ne_set(L
, R
);
1404 case ICmpInst::ICMP_SLT
:
1405 return isl_pw_aff_lt_set(L
, R
);
1406 case ICmpInst::ICMP_SLE
:
1407 return isl_pw_aff_le_set(L
, R
);
1408 case ICmpInst::ICMP_SGT
:
1409 return isl_pw_aff_gt_set(L
, R
);
1410 case ICmpInst::ICMP_SGE
:
1411 return isl_pw_aff_ge_set(L
, R
);
1412 case ICmpInst::ICMP_ULT
:
1413 return isl_pw_aff_lt_set(L
, R
);
1414 case ICmpInst::ICMP_UGT
:
1415 return isl_pw_aff_gt_set(L
, R
);
1416 case ICmpInst::ICMP_ULE
:
1417 return isl_pw_aff_le_set(L
, R
);
1418 case ICmpInst::ICMP_UGE
:
1419 return isl_pw_aff_ge_set(L
, R
);
1421 llvm_unreachable("Non integer predicate not supported");
1425 /// Create the conditions under which @p L @p Pred @p R is true.
1427 /// Helper function that will make sure the dimensions of the result have the
1428 /// same isl_id's as the @p Domain.
1429 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1430 __isl_take isl_pw_aff
*L
,
1431 __isl_take isl_pw_aff
*R
,
1432 __isl_keep isl_set
*Domain
) {
1433 isl_set
*ConsequenceCondSet
= buildConditionSet(Pred
, L
, R
);
1434 return setDimensionIds(Domain
, ConsequenceCondSet
);
1437 /// Build the conditions sets for the switch @p SI in the @p Domain.
1439 /// This will fill @p ConditionSets with the conditions under which control
1440 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1441 /// have as many elements as @p SI has successors.
1443 buildConditionSets(ScopStmt
&Stmt
, SwitchInst
*SI
, Loop
*L
,
1444 __isl_keep isl_set
*Domain
,
1445 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1447 Value
*Condition
= getConditionFromTerminator(SI
);
1448 assert(Condition
&& "No condition for switch");
1450 Scop
&S
= *Stmt
.getParent();
1451 ScalarEvolution
&SE
= *S
.getSE();
1452 isl_pw_aff
*LHS
, *RHS
;
1453 LHS
= Stmt
.getPwAff(SE
.getSCEVAtScope(Condition
, L
));
1455 unsigned NumSuccessors
= SI
->getNumSuccessors();
1456 ConditionSets
.resize(NumSuccessors
);
1457 for (auto &Case
: SI
->cases()) {
1458 unsigned Idx
= Case
.getSuccessorIndex();
1459 ConstantInt
*CaseValue
= Case
.getCaseValue();
1461 RHS
= Stmt
.getPwAff(SE
.getSCEV(CaseValue
));
1462 isl_set
*CaseConditionSet
=
1463 buildConditionSet(ICmpInst::ICMP_EQ
, isl_pw_aff_copy(LHS
), RHS
, Domain
);
1464 ConditionSets
[Idx
] = isl_set_coalesce(
1465 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1468 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1469 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1470 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1472 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1473 ConditionSets
[0] = setDimensionIds(
1474 Domain
, isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
));
1476 isl_pw_aff_free(LHS
);
1481 /// Build the conditions sets for the branch condition @p Condition in
1484 /// This will fill @p ConditionSets with the conditions under which control
1485 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1486 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1487 /// context under which @p Condition is true/false will be returned as the
1488 /// new elements of @p ConditionSets.
1490 buildConditionSets(ScopStmt
&Stmt
, Value
*Condition
, TerminatorInst
*TI
,
1491 Loop
*L
, __isl_keep isl_set
*Domain
,
1492 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1494 Scop
&S
= *Stmt
.getParent();
1495 isl_set
*ConsequenceCondSet
= nullptr;
1496 if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1497 if (CCond
->isZero())
1498 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1500 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1501 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1502 auto Opcode
= BinOp
->getOpcode();
1503 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1505 bool Valid
= buildConditionSets(Stmt
, BinOp
->getOperand(0), TI
, L
, Domain
,
1507 buildConditionSets(Stmt
, BinOp
->getOperand(1), TI
, L
, Domain
,
1510 while (!ConditionSets
.empty())
1511 isl_set_free(ConditionSets
.pop_back_val());
1515 isl_set_free(ConditionSets
.pop_back_val());
1516 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1517 isl_set_free(ConditionSets
.pop_back_val());
1518 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1520 if (Opcode
== Instruction::And
)
1521 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1523 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1525 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1527 "Condition of exiting branch was neither constant nor ICmp!");
1529 ScalarEvolution
&SE
= *S
.getSE();
1530 isl_pw_aff
*LHS
, *RHS
;
1531 // For unsigned comparisons we assumed the signed bit of neither operand
1532 // to be set. The comparison is equal to a signed comparison under this
1534 bool NonNeg
= ICond
->isUnsigned();
1535 LHS
= Stmt
.getPwAff(SE
.getSCEVAtScope(ICond
->getOperand(0), L
), NonNeg
);
1536 RHS
= Stmt
.getPwAff(SE
.getSCEVAtScope(ICond
->getOperand(1), L
), NonNeg
);
1537 ConsequenceCondSet
=
1538 buildConditionSet(ICond
->getPredicate(), LHS
, RHS
, Domain
);
1541 // If no terminator was given we are only looking for parameter constraints
1542 // under which @p Condition is true/false.
1544 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1545 assert(ConsequenceCondSet
);
1546 ConsequenceCondSet
= isl_set_coalesce(
1547 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1549 isl_set
*AlternativeCondSet
= nullptr;
1551 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1554 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1555 isl_set_copy(ConsequenceCondSet
));
1557 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1561 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc());
1562 isl_set_free(AlternativeCondSet
);
1563 isl_set_free(ConsequenceCondSet
);
1567 ConditionSets
.push_back(ConsequenceCondSet
);
1568 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1573 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1575 /// This will fill @p ConditionSets with the conditions under which control
1576 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1577 /// have as many elements as @p TI has successors.
1579 buildConditionSets(ScopStmt
&Stmt
, TerminatorInst
*TI
, Loop
*L
,
1580 __isl_keep isl_set
*Domain
,
1581 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1583 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1584 return buildConditionSets(Stmt
, SI
, L
, Domain
, ConditionSets
);
1586 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1588 if (TI
->getNumSuccessors() == 1) {
1589 ConditionSets
.push_back(isl_set_copy(Domain
));
1593 Value
*Condition
= getConditionFromTerminator(TI
);
1594 assert(Condition
&& "No condition for Terminator");
1596 return buildConditionSets(Stmt
, Condition
, TI
, L
, Domain
, ConditionSets
);
1599 void ScopStmt::buildDomain() {
1600 isl_id
*Id
= isl_id_alloc(getIslCtx(), getBaseName(), this);
1602 Domain
= getParent()->getDomainConditions(this);
1603 Domain
= isl_set_set_tuple_id(Domain
, Id
);
1606 void ScopStmt::collectSurroundingLoops() {
1607 for (unsigned u
= 0, e
= isl_set_n_dim(Domain
); u
< e
; u
++) {
1608 isl_id
*DimId
= isl_set_get_dim_id(Domain
, isl_dim_set
, u
);
1609 NestLoops
.push_back(static_cast<Loop
*>(isl_id_get_user(DimId
)));
1614 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, Loop
*SurroundingLoop
)
1615 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(nullptr),
1616 R(&R
), Build(nullptr), SurroundingLoop(SurroundingLoop
) {
1618 BaseName
= getIslCompatibleName(
1619 "Stmt", R
.getNameStr(), parent
.getNextStmtIdx(), "", UseInstructionNames
);
1622 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, Loop
*SurroundingLoop
)
1623 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1624 R(nullptr), Build(nullptr), SurroundingLoop(SurroundingLoop
) {
1626 BaseName
= getIslCompatibleName("Stmt", &bb
, parent
.getNextStmtIdx(), "",
1627 UseInstructionNames
);
1630 ScopStmt::ScopStmt(Scop
&parent
, __isl_take isl_map
*SourceRel
,
1631 __isl_take isl_map
*TargetRel
, __isl_take isl_set
*NewDomain
)
1632 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
), BB(nullptr),
1633 R(nullptr), Build(nullptr) {
1634 BaseName
= getIslCompatibleName("CopyStmt_", "",
1635 std::to_string(parent
.getCopyStmtsNum()));
1636 auto *Id
= isl_id_alloc(getIslCtx(), getBaseName(), this);
1637 Domain
= isl_set_set_tuple_id(Domain
, isl_id_copy(Id
));
1638 TargetRel
= isl_map_set_tuple_id(TargetRel
, isl_dim_in
, Id
);
1640 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1641 parent
.addAccessFunction(Access
);
1643 SourceRel
= isl_map_set_tuple_id(SourceRel
, isl_dim_in
, isl_id_copy(Id
));
1644 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1645 parent
.addAccessFunction(Access
);
1649 void ScopStmt::init(LoopInfo
&LI
) {
1650 assert(!Domain
&& "init must be called only once");
1653 collectSurroundingLoops();
1654 buildAccessRelations();
1656 if (DetectReductions
)
1657 checkForReductions();
1660 /// Collect loads which might form a reduction chain with @p StoreMA.
1662 /// Check if the stored value for @p StoreMA is a binary operator with one or
1663 /// two loads as operands. If the binary operand is commutative & associative,
1664 /// used only once (by @p StoreMA) and its load operands are also used only
1665 /// once, we have found a possible reduction chain. It starts at an operand
1666 /// load and includes the binary operator and @p StoreMA.
1668 /// Note: We allow only one use to ensure the load and binary operator cannot
1669 /// escape this block or into any other store except @p StoreMA.
1670 void ScopStmt::collectCandiateReductionLoads(
1671 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1672 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1676 // Skip if there is not one binary operator between the load and the store
1677 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1681 // Skip if the binary operators has multiple uses
1682 if (BinOp
->getNumUses() != 1)
1685 // Skip if the opcode of the binary operator is not commutative/associative
1686 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1689 // Skip if the binary operator is outside the current SCoP
1690 if (BinOp
->getParent() != Store
->getParent())
1693 // Skip if it is a multiplicative reduction and we disabled them
1694 if (DisableMultiplicativeReductions
&&
1695 (BinOp
->getOpcode() == Instruction::Mul
||
1696 BinOp
->getOpcode() == Instruction::FMul
))
1699 // Check the binary operator operands for a candidate load
1700 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1701 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1702 if (!PossibleLoad0
&& !PossibleLoad1
)
1705 // A load is only a candidate if it cannot escape (thus has only this use)
1706 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1707 if (PossibleLoad0
->getParent() == Store
->getParent())
1708 Loads
.push_back(&getArrayAccessFor(PossibleLoad0
));
1709 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1710 if (PossibleLoad1
->getParent() == Store
->getParent())
1711 Loads
.push_back(&getArrayAccessFor(PossibleLoad1
));
1714 /// Check for reductions in this ScopStmt.
1716 /// Iterate over all store memory accesses and check for valid binary reduction
1717 /// like chains. For all candidates we check if they have the same base address
1718 /// and there are no other accesses which overlap with them. The base address
1719 /// check rules out impossible reductions candidates early. The overlap check,
1720 /// together with the "only one user" check in collectCandiateReductionLoads,
1721 /// guarantees that none of the intermediate results will escape during
1722 /// execution of the loop nest. We basically check here that no other memory
1723 /// access can access the same memory as the potential reduction.
1724 void ScopStmt::checkForReductions() {
1725 SmallVector
<MemoryAccess
*, 2> Loads
;
1726 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1728 // First collect candidate load-store reduction chains by iterating over all
1729 // stores and collecting possible reduction loads.
1730 for (MemoryAccess
*StoreMA
: MemAccs
) {
1731 if (StoreMA
->isRead())
1735 collectCandiateReductionLoads(StoreMA
, Loads
);
1736 for (MemoryAccess
*LoadMA
: Loads
)
1737 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1740 // Then check each possible candidate pair.
1741 for (const auto &CandidatePair
: Candidates
) {
1743 isl_map
*LoadAccs
= CandidatePair
.first
->getAccessRelation();
1744 isl_map
*StoreAccs
= CandidatePair
.second
->getAccessRelation();
1746 // Skip those with obviously unequal base addresses.
1747 if (!isl_map_has_equal_space(LoadAccs
, StoreAccs
)) {
1748 isl_map_free(LoadAccs
);
1749 isl_map_free(StoreAccs
);
1753 // And check if the remaining for overlap with other memory accesses.
1754 isl_map
*AllAccsRel
= isl_map_union(LoadAccs
, StoreAccs
);
1755 AllAccsRel
= isl_map_intersect_domain(AllAccsRel
, getDomain());
1756 isl_set
*AllAccs
= isl_map_range(AllAccsRel
);
1758 for (MemoryAccess
*MA
: MemAccs
) {
1759 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1763 isl_map_intersect_domain(MA
->getAccessRelation(), getDomain());
1764 isl_set
*Accs
= isl_map_range(AccRel
);
1766 if (isl_set_has_equal_space(AllAccs
, Accs
)) {
1767 isl_set
*OverlapAccs
= isl_set_intersect(Accs
, isl_set_copy(AllAccs
));
1768 Valid
= Valid
&& isl_set_is_empty(OverlapAccs
);
1769 isl_set_free(OverlapAccs
);
1775 isl_set_free(AllAccs
);
1779 const LoadInst
*Load
=
1780 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1781 MemoryAccess::ReductionType RT
=
1782 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1784 // If no overlapping access was found we mark the load and store as
1786 CandidatePair
.first
->markAsReductionLike(RT
);
1787 CandidatePair
.second
->markAsReductionLike(RT
);
1791 std::string
ScopStmt::getDomainStr() const { return stringFromIslObj(Domain
); }
1793 std::string
ScopStmt::getScheduleStr() const {
1794 auto *S
= getSchedule();
1797 auto Str
= stringFromIslObj(S
);
1802 void ScopStmt::setInvalidDomain(__isl_take isl_set
*ID
) {
1803 isl_set_free(InvalidDomain
);
1807 BasicBlock
*ScopStmt::getEntryBlock() const {
1809 return getBasicBlock();
1810 return getRegion()->getEntry();
1813 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1815 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1817 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1818 return NestLoops
[Dimension
];
1821 isl_ctx
*ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1823 __isl_give isl_set
*ScopStmt::getDomain() const { return isl_set_copy(Domain
); }
1825 __isl_give isl_space
*ScopStmt::getDomainSpace() const {
1826 return isl_set_get_space(Domain
);
1829 __isl_give isl_id
*ScopStmt::getDomainId() const {
1830 return isl_set_get_tuple_id(Domain
);
1833 ScopStmt::~ScopStmt() {
1834 isl_set_free(Domain
);
1835 isl_set_free(InvalidDomain
);
1838 void ScopStmt::print(raw_ostream
&OS
) const {
1839 OS
<< "\t" << getBaseName() << "\n";
1840 OS
.indent(12) << "Domain :=\n";
1843 OS
.indent(16) << getDomainStr() << ";\n";
1845 OS
.indent(16) << "n/a\n";
1847 OS
.indent(12) << "Schedule :=\n";
1850 OS
.indent(16) << getScheduleStr() << ";\n";
1852 OS
.indent(16) << "n/a\n";
1854 for (MemoryAccess
*Access
: MemAccs
)
1858 void ScopStmt::dump() const { print(dbgs()); }
1860 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
1861 if (MA
->isRead() && MA
->isOriginalValueKind()) {
1862 bool Found
= ValueReads
.erase(MA
->getAccessValue());
1864 assert(Found
&& "Expected access data not found");
1866 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
1867 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
1869 assert(Found
&& "Expected access data not found");
1871 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
1872 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1874 assert(Found
&& "Expected access data not found");
1878 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
1879 // Remove the memory accesses from this statement together with all scalar
1880 // accesses that were caused by it. MemoryKind::Value READs have no access
1881 // instruction, hence would not be removed by this function. However, it is
1882 // only used for invariant LoadInst accesses, its arguments are always affine,
1883 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1884 // accesses to be removed.
1885 auto Predicate
= [&](MemoryAccess
*Acc
) {
1886 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
1888 for (auto *MA
: MemAccs
) {
1890 removeAccessData(MA
);
1892 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
1894 InstructionToAccess
.erase(MA
->getAccessInstruction());
1897 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
) {
1898 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
1899 assert(MAIt
!= MemAccs
.end());
1900 MemAccs
.erase(MAIt
);
1902 removeAccessData(MA
);
1904 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
1905 if (It
!= InstructionToAccess
.end()) {
1906 It
->second
.remove(MA
);
1907 if (It
->second
.empty())
1908 InstructionToAccess
.erase(MA
->getAccessInstruction());
1912 //===----------------------------------------------------------------------===//
1913 /// Scop class implement
1915 void Scop::setContext(__isl_take isl_set
*NewContext
) {
1916 NewContext
= isl_set_align_params(NewContext
, isl_set_get_space(Context
));
1917 isl_set_free(Context
);
1918 Context
= NewContext
;
1921 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1922 struct SCEVSensitiveParameterRewriter
1923 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
1924 ValueToValueMap
&VMap
;
1927 SCEVSensitiveParameterRewriter(ValueToValueMap
&VMap
, ScalarEvolution
&SE
)
1928 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
1930 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
1931 ValueToValueMap
&VMap
) {
1932 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
1933 return SSPR
.visit(E
);
1936 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
1937 auto *Start
= visit(E
->getStart());
1938 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
1939 visit(E
->getStepRecurrence(SE
)),
1940 E
->getLoop(), SCEV::FlagAnyWrap
);
1941 return SE
.getAddExpr(Start
, AddRec
);
1944 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
1945 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
1946 return SE
.getUnknown(NewValue
);
1951 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*S
) {
1952 return SCEVSensitiveParameterRewriter::rewrite(S
, *SE
, InvEquivClassVMap
);
1955 void Scop::createParameterId(const SCEV
*Parameter
) {
1956 assert(Parameters
.count(Parameter
));
1957 assert(!ParameterIds
.count(Parameter
));
1959 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
1961 if (UseInstructionNames
) {
1962 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
1963 Value
*Val
= ValueParameter
->getValue();
1965 // If this parameter references a specific Value and this value has a name
1966 // we use this name as it is likely to be unique and more useful than just
1969 ParameterName
= Val
->getName();
1970 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
1971 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
1972 if (LoadOrigin
->hasName()) {
1973 ParameterName
+= "_loaded_from_";
1975 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
1980 ParameterName
= getIslCompatibleName("", ParameterName
, "");
1983 auto *Id
= isl_id_alloc(getIslCtx(), ParameterName
.c_str(),
1984 const_cast<void *>((const void *)Parameter
));
1985 ParameterIds
[Parameter
] = Id
;
1988 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
1989 for (const SCEV
*Parameter
: NewParameters
) {
1990 // Normalize the SCEV to get the representing element for an invariant load.
1991 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
1992 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
1994 if (Parameters
.insert(Parameter
))
1995 createParameterId(Parameter
);
1999 __isl_give isl_id
*Scop::getIdForParam(const SCEV
*Parameter
) {
2000 // Normalize the SCEV to get the representing element for an invariant load.
2001 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2002 return isl_id_copy(ParameterIds
.lookup(Parameter
));
2005 __isl_give isl_set
*
2006 Scop::addNonEmptyDomainConstraints(__isl_take isl_set
*C
) const {
2007 isl_set
*DomainContext
= isl_union_set_params(getDomains());
2008 return isl_set_intersect_params(C
, DomainContext
);
2011 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2012 return DT
.dominates(BB
, getEntry());
2015 void Scop::addUserAssumptions(AssumptionCache
&AC
, DominatorTree
&DT
,
2017 auto &F
= getFunction();
2018 for (auto &Assumption
: AC
.assumptions()) {
2019 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2020 if (!CI
|| CI
->getNumArgOperands() != 1)
2023 bool InScop
= contains(CI
);
2024 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2027 auto *L
= LI
.getLoopFor(CI
->getParent());
2028 auto *Val
= CI
->getArgOperand(0);
2029 ParameterSetTy DetectedParams
;
2030 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2031 emitOptimizationRemarkAnalysis(F
.getContext(), DEBUG_TYPE
, F
,
2033 "Non-affine user assumption ignored.");
2037 // Collect all newly introduced parameters.
2038 ParameterSetTy NewParams
;
2039 for (auto *Param
: DetectedParams
) {
2040 Param
= extractConstantFactor(Param
, *SE
).second
;
2041 Param
= getRepresentingInvariantLoadSCEV(Param
);
2042 if (Parameters
.count(Param
))
2044 NewParams
.insert(Param
);
2047 SmallVector
<isl_set
*, 2> ConditionSets
;
2048 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2049 auto &Stmt
= InScop
? *getStmtFor(CI
->getParent()) : *Stmts
.begin();
2050 auto *Dom
= InScop
? getDomainConditions(&Stmt
) : isl_set_copy(Context
);
2051 bool Valid
= buildConditionSets(Stmt
, Val
, TI
, L
, Dom
, ConditionSets
);
2057 isl_set
*AssumptionCtx
= nullptr;
2059 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2060 isl_set_free(ConditionSets
[0]);
2062 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2063 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2066 // Project out newly introduced parameters as they are not otherwise useful.
2067 if (!NewParams
.empty()) {
2068 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2069 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2070 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2073 if (!NewParams
.count(Param
))
2077 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2081 emitOptimizationRemarkAnalysis(
2082 F
.getContext(), DEBUG_TYPE
, F
, CI
->getDebugLoc(),
2083 "Use user assumption: " + stringFromIslObj(AssumptionCtx
));
2084 Context
= isl_set_intersect(Context
, AssumptionCtx
);
2088 void Scop::addUserContext() {
2089 if (UserContextStr
.empty())
2092 isl_set
*UserContext
=
2093 isl_set_read_from_str(getIslCtx(), UserContextStr
.c_str());
2094 isl_space
*Space
= getParamSpace();
2095 if (isl_space_dim(Space
, isl_dim_param
) !=
2096 isl_set_dim(UserContext
, isl_dim_param
)) {
2097 auto SpaceStr
= isl_space_to_str(Space
);
2098 errs() << "Error: the context provided in -polly-context has not the same "
2099 << "number of dimensions than the computed context. Due to this "
2100 << "mismatch, the -polly-context option is ignored. Please provide "
2101 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2103 isl_set_free(UserContext
);
2104 isl_space_free(Space
);
2108 for (unsigned i
= 0; i
< isl_space_dim(Space
, isl_dim_param
); i
++) {
2109 auto *NameContext
= isl_set_get_dim_name(Context
, isl_dim_param
, i
);
2110 auto *NameUserContext
= isl_set_get_dim_name(UserContext
, isl_dim_param
, i
);
2112 if (strcmp(NameContext
, NameUserContext
) != 0) {
2113 auto SpaceStr
= isl_space_to_str(Space
);
2114 errs() << "Error: the name of dimension " << i
2115 << " provided in -polly-context "
2116 << "is '" << NameUserContext
<< "', but the name in the computed "
2117 << "context is '" << NameContext
2118 << "'. Due to this name mismatch, "
2119 << "the -polly-context option is ignored. Please provide "
2120 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2122 isl_set_free(UserContext
);
2123 isl_space_free(Space
);
2128 isl_set_set_dim_id(UserContext
, isl_dim_param
, i
,
2129 isl_space_get_dim_id(Space
, isl_dim_param
, i
));
2132 Context
= isl_set_intersect(Context
, UserContext
);
2133 isl_space_free(Space
);
2136 void Scop::buildInvariantEquivalenceClasses() {
2137 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2139 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2140 for (LoadInst
*LInst
: RIL
) {
2141 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2143 Type
*Ty
= LInst
->getType();
2144 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2146 InvEquivClassVMap
[LInst
] = ClassRep
;
2151 InvariantEquivClasses
.emplace_back(
2152 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2156 void Scop::buildContext() {
2157 isl_space
*Space
= isl_space_params_alloc(getIslCtx(), 0);
2158 Context
= isl_set_universe(isl_space_copy(Space
));
2159 InvalidContext
= isl_set_empty(isl_space_copy(Space
));
2160 AssumedContext
= isl_set_universe(Space
);
2163 void Scop::addParameterBounds() {
2165 for (auto *Parameter
: Parameters
) {
2166 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2167 Context
= addRangeBoundsToSet(Context
, SRange
, PDim
++, isl_dim_param
);
2171 void Scop::realignParams() {
2172 if (PollyIgnoreParamBounds
)
2175 // Add all parameters into a common model.
2176 isl_space
*Space
= isl_space_params_alloc(getIslCtx(), ParameterIds
.size());
2179 for (const auto *Parameter
: Parameters
) {
2180 isl_id
*id
= getIdForParam(Parameter
);
2181 Space
= isl_space_set_dim_id(Space
, isl_dim_param
, PDim
++, id
);
2184 // Align the parameters of all data structures to the model.
2185 Context
= isl_set_align_params(Context
, Space
);
2187 // As all parameters are known add bounds to them.
2188 addParameterBounds();
2190 for (ScopStmt
&Stmt
: *this)
2191 Stmt
.realignParams();
2193 // Simplify the schedule according to the context too.
2194 Schedule
= isl_schedule_gist_domain_params(Schedule
, getContext());
2197 static __isl_give isl_set
*
2198 simplifyAssumptionContext(__isl_take isl_set
*AssumptionContext
,
2200 // If we have modeled all blocks in the SCoP that have side effects we can
2201 // simplify the context with the constraints that are needed for anything to
2202 // be executed at all. However, if we have error blocks in the SCoP we already
2203 // assumed some parameter combinations cannot occur and removed them from the
2204 // domains, thus we cannot use the remaining domain to simplify the
2206 if (!S
.hasErrorBlock()) {
2207 isl_set
*DomainParameters
= isl_union_set_params(S
.getDomains());
2209 isl_set_gist_params(AssumptionContext
, DomainParameters
);
2212 AssumptionContext
= isl_set_gist_params(AssumptionContext
, S
.getContext());
2213 return AssumptionContext
;
2216 void Scop::simplifyContexts() {
2217 // The parameter constraints of the iteration domains give us a set of
2218 // constraints that need to hold for all cases where at least a single
2219 // statement iteration is executed in the whole scop. We now simplify the
2220 // assumed context under the assumption that such constraints hold and at
2221 // least a single statement iteration is executed. For cases where no
2222 // statement instances are executed, the assumptions we have taken about
2223 // the executed code do not matter and can be changed.
2225 // WARNING: This only holds if the assumptions we have taken do not reduce
2226 // the set of statement instances that are executed. Otherwise we
2227 // may run into a case where the iteration domains suggest that
2228 // for a certain set of parameter constraints no code is executed,
2229 // but in the original program some computation would have been
2230 // performed. In such a case, modifying the run-time conditions and
2231 // possibly influencing the run-time check may cause certain scops
2232 // to not be executed.
2236 // When delinearizing the following code:
2238 // for (long i = 0; i < 100; i++)
2239 // for (long j = 0; j < m; j++)
2242 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2243 // otherwise we would access out of bound data. Now, knowing that code is
2244 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2245 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2246 InvalidContext
= isl_set_align_params(InvalidContext
, getParamSpace());
2249 /// Add the minimal/maximal access in @p Set to @p User.
2250 static isl_stat
buildMinMaxAccess(__isl_take isl_set
*Set
, void *User
) {
2251 Scop::MinMaxVectorTy
*MinMaxAccesses
= (Scop::MinMaxVectorTy
*)User
;
2252 isl_pw_multi_aff
*MinPMA
, *MaxPMA
;
2253 isl_pw_aff
*LastDimAff
;
2257 Set
= isl_set_remove_divs(Set
);
2259 if (isl_set_n_basic_set(Set
) >= MaxDisjunctsInDomain
) {
2261 return isl_stat_error
;
2264 // Restrict the number of parameters involved in the access as the lexmin/
2265 // lexmax computation will take too long if this number is high.
2267 // Experiments with a simple test case using an i7 4800MQ:
2269 // #Parameters involved | Time (in sec)
2278 if (isl_set_n_param(Set
) > RunTimeChecksMaxParameters
) {
2279 unsigned InvolvedParams
= 0;
2280 for (unsigned u
= 0, e
= isl_set_n_param(Set
); u
< e
; u
++)
2281 if (isl_set_involves_dims(Set
, isl_dim_param
, u
, 1))
2284 if (InvolvedParams
> RunTimeChecksMaxParameters
) {
2286 return isl_stat_error
;
2290 MinPMA
= isl_set_lexmin_pw_multi_aff(isl_set_copy(Set
));
2291 MaxPMA
= isl_set_lexmax_pw_multi_aff(isl_set_copy(Set
));
2293 MinPMA
= isl_pw_multi_aff_coalesce(MinPMA
);
2294 MaxPMA
= isl_pw_multi_aff_coalesce(MaxPMA
);
2296 // Adjust the last dimension of the maximal access by one as we want to
2297 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2298 // we test during code generation might now point after the end of the
2299 // allocated array but we will never dereference it anyway.
2300 assert(isl_pw_multi_aff_dim(MaxPMA
, isl_dim_out
) &&
2301 "Assumed at least one output dimension");
2302 Pos
= isl_pw_multi_aff_dim(MaxPMA
, isl_dim_out
) - 1;
2303 LastDimAff
= isl_pw_multi_aff_get_pw_aff(MaxPMA
, Pos
);
2304 OneAff
= isl_aff_zero_on_domain(
2305 isl_local_space_from_space(isl_pw_aff_get_domain_space(LastDimAff
)));
2306 OneAff
= isl_aff_add_constant_si(OneAff
, 1);
2307 LastDimAff
= isl_pw_aff_add(LastDimAff
, isl_pw_aff_from_aff(OneAff
));
2308 MaxPMA
= isl_pw_multi_aff_set_pw_aff(MaxPMA
, Pos
, LastDimAff
);
2310 MinMaxAccesses
->push_back(std::make_pair(MinPMA
, MaxPMA
));
2316 static __isl_give isl_set
*getAccessDomain(MemoryAccess
*MA
) {
2317 isl_set
*Domain
= MA
->getStatement()->getDomain();
2318 Domain
= isl_set_project_out(Domain
, isl_dim_set
, 0, isl_set_n_dim(Domain
));
2319 return isl_set_reset_tuple_id(Domain
);
2322 /// Wrapper function to calculate minimal/maximal accesses to each array.
2323 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2324 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2326 MinMaxAccesses
.reserve(AliasGroup
.size());
2328 isl_union_set
*Domains
= S
.getDomains();
2329 isl_union_map
*Accesses
= isl_union_map_empty(S
.getParamSpace());
2331 for (MemoryAccess
*MA
: AliasGroup
)
2332 Accesses
= isl_union_map_add_map(Accesses
, MA
->getAccessRelation());
2334 Accesses
= isl_union_map_intersect_domain(Accesses
, Domains
);
2335 isl_union_set
*Locations
= isl_union_map_range(Accesses
);
2336 Locations
= isl_union_set_coalesce(Locations
);
2337 Locations
= isl_union_set_detect_equalities(Locations
);
2338 bool Valid
= (0 == isl_union_set_foreach_set(Locations
, buildMinMaxAccess
,
2340 isl_union_set_free(Locations
);
2344 /// Helper to treat non-affine regions and basic blocks the same.
2348 /// Return the block that is the representing block for @p RN.
2349 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2350 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2351 : RN
->getNodeAs
<BasicBlock
>();
2354 /// Return the @p idx'th block that is executed after @p RN.
2355 static inline BasicBlock
*
2356 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2357 if (RN
->isSubRegion()) {
2359 return RN
->getNodeAs
<Region
>()->getExit();
2361 return TI
->getSuccessor(idx
);
2364 /// Return the smallest loop surrounding @p RN.
2365 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2366 if (!RN
->isSubRegion()) {
2367 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2368 Loop
*L
= LI
.getLoopFor(BB
);
2370 // Unreachable statements are not considered to belong to a LLVM loop, as
2371 // they are not part of an actual loop in the control flow graph.
2372 // Nevertheless, we handle certain unreachable statements that are common
2373 // when modeling run-time bounds checks as being part of the loop to be
2374 // able to model them and to later eliminate the run-time bounds checks.
2376 // Specifically, for basic blocks that terminate in an unreachable and
2377 // where the immeditate predecessor is part of a loop, we assume these
2378 // basic blocks belong to the loop the predecessor belongs to. This
2379 // allows us to model the following code.
2381 // for (i = 0; i < N; i++) {
2383 // abort(); <- this abort might be translated to an
2388 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2389 L
= LI
.getLoopFor(BB
->getPrevNode());
2393 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2394 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2395 while (L
&& NonAffineSubRegion
->contains(L
))
2396 L
= L
->getParentLoop();
2400 /// Get the number of blocks in @p L.
2402 /// The number of blocks in a loop are the number of basic blocks actually
2403 /// belonging to the loop, as well as all single basic blocks that the loop
2404 /// exits to and which terminate in an unreachable instruction. We do not
2405 /// allow such basic blocks in the exit of a scop, hence they belong to the
2406 /// scop and represent run-time conditions which we want to model and
2407 /// subsequently speculate away.
2409 /// @see getRegionNodeLoop for additional details.
2410 long getNumBlocksInLoop(Loop
*L
) {
2411 long NumBlocks
= L
->getNumBlocks();
2412 SmallVector
<llvm::BasicBlock
*, 4> ExitBlocks
;
2413 L
->getExitBlocks(ExitBlocks
);
2415 for (auto ExitBlock
: ExitBlocks
) {
2416 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2422 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2423 if (!RN
->isSubRegion())
2426 Region
*R
= RN
->getNodeAs
<Region
>();
2427 return std::distance(R
->block_begin(), R
->block_end());
2430 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2431 const DominatorTree
&DT
) {
2432 if (!RN
->isSubRegion())
2433 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2434 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2435 if (isErrorBlock(*BB
, R
, LI
, DT
))
2442 static inline __isl_give isl_set
*addDomainDimId(__isl_take isl_set
*Domain
,
2443 unsigned Dim
, Loop
*L
) {
2444 Domain
= isl_set_lower_bound_si(Domain
, isl_dim_set
, Dim
, -1);
2446 isl_id_alloc(isl_set_get_ctx(Domain
), nullptr, static_cast<void *>(L
));
2447 return isl_set_set_dim_id(Domain
, isl_dim_set
, Dim
, DimId
);
2450 __isl_give isl_set
*Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2451 return getDomainConditions(Stmt
->getEntryBlock());
2454 __isl_give isl_set
*Scop::getDomainConditions(BasicBlock
*BB
) const {
2455 auto DIt
= DomainMap
.find(BB
);
2456 if (DIt
!= DomainMap
.end())
2457 return isl_set_copy(DIt
->getSecond());
2459 auto &RI
= *R
.getRegionInfo();
2460 auto *BBR
= RI
.getRegionFor(BB
);
2461 while (BBR
->getEntry() == BB
)
2462 BBR
= BBR
->getParent();
2463 return getDomainConditions(BBR
->getEntry());
2466 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
) {
2468 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2469 auto *EntryBB
= R
->getEntry();
2470 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2471 int LD
= getRelativeLoopDepth(L
);
2472 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD
+ 1));
2475 S
= addDomainDimId(S
, LD
+ 1, L
);
2476 L
= L
->getParentLoop();
2479 // Initialize the invalid domain.
2480 auto *EntryStmt
= getStmtFor(EntryBB
);
2481 EntryStmt
->setInvalidDomain(isl_set_empty(isl_set_get_space(S
)));
2483 DomainMap
[EntryBB
] = S
;
2485 if (IsOnlyNonAffineRegion
)
2486 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2488 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
))
2491 if (!propagateDomainConstraints(R
, DT
, LI
))
2494 // Error blocks and blocks dominated by them have been assumed to never be
2495 // executed. Representing them in the Scop does not add any value. In fact,
2496 // it is likely to cause issues during construction of the ScopStmts. The
2497 // contents of error blocks have not been verified to be expressible and
2498 // will cause problems when building up a ScopStmt for them.
2499 // Furthermore, basic blocks dominated by error blocks may reference
2500 // instructions in the error block which, if the error block is not modeled,
2501 // can themselves not be constructed properly. To this end we will replace
2502 // the domains of error blocks and those only reachable via error blocks
2503 // with an empty set. Additionally, we will record for each block under which
2504 // parameter combination it would be reached via an error block in its
2505 // InvalidDomain. This information is needed during load hoisting.
2506 if (!propagateInvalidStmtDomains(R
, DT
, LI
))
2512 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2513 /// to be compatible to domains constructed for loop @p NewL.
2515 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2516 /// edge from @p OldL to @p NewL.
2517 static __isl_give isl_set
*adjustDomainDimensions(Scop
&S
,
2518 __isl_take isl_set
*Dom
,
2519 Loop
*OldL
, Loop
*NewL
) {
2521 // If the loops are the same there is nothing to do.
2525 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2526 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2527 // If both loops are non-affine loops there is nothing to do.
2528 if (OldDepth
== -1 && NewDepth
== -1)
2531 // Distinguish three cases:
2532 // 1) The depth is the same but the loops are not.
2533 // => One loop was left one was entered.
2534 // 2) The depth increased from OldL to NewL.
2535 // => One loop was entered, none was left.
2536 // 3) The depth decreased from OldL to NewL.
2537 // => Loops were left were difference of the depths defines how many.
2538 if (OldDepth
== NewDepth
) {
2539 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2540 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NewDepth
, 1);
2541 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2542 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2543 } else if (OldDepth
< NewDepth
) {
2544 assert(OldDepth
+ 1 == NewDepth
);
2545 auto &R
= S
.getRegion();
2547 assert(NewL
->getParentLoop() == OldL
||
2548 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2549 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2550 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2552 assert(OldDepth
> NewDepth
);
2553 int Diff
= OldDepth
- NewDepth
;
2554 int NumDim
= isl_set_n_dim(Dom
);
2555 assert(NumDim
>= Diff
);
2556 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NumDim
- Diff
, Diff
);
2562 bool Scop::propagateInvalidStmtDomains(Region
*R
, DominatorTree
&DT
,
2564 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2565 for (auto *RN
: RTraversal
) {
2567 // Recurse for affine subregions but go on for basic blocks and non-affine
2569 if (RN
->isSubRegion()) {
2570 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2571 if (!isNonAffineSubRegion(SubRegion
)) {
2572 propagateInvalidStmtDomains(SubRegion
, DT
, LI
);
2577 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2578 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2579 ScopStmt
*Stmt
= getStmtFor(BB
);
2580 isl_set
*&Domain
= DomainMap
[BB
];
2581 assert(Domain
&& "Cannot propagate a nullptr");
2583 auto *InvalidDomain
= Stmt
->getInvalidDomain();
2584 bool IsInvalidBlock
=
2585 ContainsErrorBlock
|| isl_set_is_subset(Domain
, InvalidDomain
);
2587 if (!IsInvalidBlock
) {
2588 InvalidDomain
= isl_set_intersect(InvalidDomain
, isl_set_copy(Domain
));
2590 isl_set_free(InvalidDomain
);
2591 InvalidDomain
= Domain
;
2592 isl_set
*DomPar
= isl_set_params(isl_set_copy(Domain
));
2593 recordAssumption(ERRORBLOCK
, DomPar
, BB
->getTerminator()->getDebugLoc(),
2598 if (isl_set_is_empty(InvalidDomain
)) {
2599 Stmt
->setInvalidDomain(InvalidDomain
);
2603 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2604 auto *TI
= BB
->getTerminator();
2605 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2606 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2607 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2608 auto *SuccStmt
= getStmtFor(SuccBB
);
2610 // Skip successors outside the SCoP.
2615 if (DT
.dominates(SuccBB
, BB
))
2618 auto *SuccBBLoop
= SuccStmt
->getSurroundingLoop();
2619 auto *AdjustedInvalidDomain
= adjustDomainDimensions(
2620 *this, isl_set_copy(InvalidDomain
), BBLoop
, SuccBBLoop
);
2621 auto *SuccInvalidDomain
= SuccStmt
->getInvalidDomain();
2623 isl_set_union(SuccInvalidDomain
, AdjustedInvalidDomain
);
2624 SuccInvalidDomain
= isl_set_coalesce(SuccInvalidDomain
);
2625 unsigned NumConjucts
= isl_set_n_basic_set(SuccInvalidDomain
);
2626 SuccStmt
->setInvalidDomain(SuccInvalidDomain
);
2628 // Check if the maximal number of domain disjunctions was reached.
2629 // In case this happens we will bail.
2630 if (NumConjucts
< MaxDisjunctsInDomain
)
2633 isl_set_free(InvalidDomain
);
2634 invalidate(COMPLEXITY
, TI
->getDebugLoc());
2638 Stmt
->setInvalidDomain(InvalidDomain
);
2644 void Scop::propagateDomainConstraintsToRegionExit(
2645 BasicBlock
*BB
, Loop
*BBLoop
,
2646 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
) {
2648 // Check if the block @p BB is the entry of a region. If so we propagate it's
2649 // domain to the exit block of the region. Otherwise we are done.
2650 auto *RI
= R
.getRegionInfo();
2651 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2652 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2653 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2656 // Do not propagate the domain if there is a loop backedge inside the region
2657 // that would prevent the exit block from being executed.
2659 while (L
&& contains(L
)) {
2660 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2661 BBLoop
->getLoopLatches(LatchBBs
);
2662 for (auto *LatchBB
: LatchBBs
)
2663 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2665 L
= L
->getParentLoop();
2668 auto *Domain
= DomainMap
[BB
];
2669 assert(Domain
&& "Cannot propagate a nullptr");
2671 auto *ExitStmt
= getStmtFor(ExitBB
);
2672 auto *ExitBBLoop
= ExitStmt
->getSurroundingLoop();
2674 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2675 // adjust the domain before we can propagate it.
2676 auto *AdjustedDomain
=
2677 adjustDomainDimensions(*this, isl_set_copy(Domain
), BBLoop
, ExitBBLoop
);
2678 auto *&ExitDomain
= DomainMap
[ExitBB
];
2680 // If the exit domain is not yet created we set it otherwise we "add" the
2683 ExitDomain
? isl_set_union(AdjustedDomain
, ExitDomain
) : AdjustedDomain
;
2685 // Initialize the invalid domain.
2686 ExitStmt
->setInvalidDomain(isl_set_empty(isl_set_get_space(ExitDomain
)));
2688 FinishedExitBlocks
.insert(ExitBB
);
2691 bool Scop::buildDomainsWithBranchConstraints(Region
*R
, DominatorTree
&DT
,
2693 // To create the domain for each block in R we iterate over all blocks and
2694 // subregions in R and propagate the conditions under which the current region
2695 // element is executed. To this end we iterate in reverse post order over R as
2696 // it ensures that we first visit all predecessors of a region node (either a
2697 // basic block or a subregion) before we visit the region node itself.
2698 // Initially, only the domain for the SCoP region entry block is set and from
2699 // there we propagate the current domain to all successors, however we add the
2700 // condition that the successor is actually executed next.
2701 // As we are only interested in non-loop carried constraints here we can
2702 // simply skip loop back edges.
2704 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2705 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2706 for (auto *RN
: RTraversal
) {
2708 // Recurse for affine subregions but go on for basic blocks and non-affine
2710 if (RN
->isSubRegion()) {
2711 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2712 if (!isNonAffineSubRegion(SubRegion
)) {
2713 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
))
2719 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
2720 HasErrorBlock
= true;
2722 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2723 TerminatorInst
*TI
= BB
->getTerminator();
2725 if (isa
<UnreachableInst
>(TI
))
2728 isl_set
*Domain
= DomainMap
.lookup(BB
);
2731 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
));
2733 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2734 // Propagate the domain from BB directly to blocks that have a superset
2735 // domain, at the moment only region exit nodes of regions that start in BB.
2736 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
);
2738 // If all successors of BB have been set a domain through the propagation
2739 // above we do not need to build condition sets but can just skip this
2740 // block. However, it is important to note that this is a local property
2741 // with regards to the region @p R. To this end FinishedExitBlocks is a
2743 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
2744 return FinishedExitBlocks
.count(SuccBB
);
2746 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
2749 // Build the condition sets for the successor nodes of the current region
2750 // node. If it is a non-affine subregion we will always execute the single
2751 // exit node, hence the single entry node domain is the condition set. For
2752 // basic blocks we use the helper function buildConditionSets.
2753 SmallVector
<isl_set
*, 8> ConditionSets
;
2754 if (RN
->isSubRegion())
2755 ConditionSets
.push_back(isl_set_copy(Domain
));
2756 else if (!buildConditionSets(*getStmtFor(BB
), TI
, BBLoop
, Domain
,
2760 // Now iterate over the successors and set their initial domain based on
2761 // their condition set. We skip back edges here and have to be careful when
2762 // we leave a loop not to keep constraints over a dimension that doesn't
2764 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
2765 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
2766 isl_set
*CondSet
= ConditionSets
[u
];
2767 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2769 auto *SuccStmt
= getStmtFor(SuccBB
);
2770 // Skip blocks outside the region.
2772 isl_set_free(CondSet
);
2776 // If we propagate the domain of some block to "SuccBB" we do not have to
2777 // adjust the domain.
2778 if (FinishedExitBlocks
.count(SuccBB
)) {
2779 isl_set_free(CondSet
);
2784 if (DT
.dominates(SuccBB
, BB
)) {
2785 isl_set_free(CondSet
);
2789 auto *SuccBBLoop
= SuccStmt
->getSurroundingLoop();
2790 CondSet
= adjustDomainDimensions(*this, CondSet
, BBLoop
, SuccBBLoop
);
2792 // Set the domain for the successor or merge it with an existing domain in
2793 // case there are multiple paths (without loop back edges) to the
2795 isl_set
*&SuccDomain
= DomainMap
[SuccBB
];
2798 SuccDomain
= isl_set_coalesce(isl_set_union(SuccDomain
, CondSet
));
2800 // Initialize the invalid domain.
2801 SuccStmt
->setInvalidDomain(isl_set_empty(isl_set_get_space(CondSet
)));
2802 SuccDomain
= CondSet
;
2805 // Check if the maximal number of domain disjunctions was reached.
2806 // In case this happens we will clean up and bail.
2807 if (isl_set_n_basic_set(SuccDomain
) < MaxDisjunctsInDomain
)
2810 invalidate(COMPLEXITY
, DebugLoc());
2811 while (++u
< ConditionSets
.size())
2812 isl_set_free(ConditionSets
[u
]);
2820 __isl_give isl_set
*
2821 Scop::getPredecessorDomainConstraints(BasicBlock
*BB
,
2822 __isl_keep isl_set
*Domain
,
2823 DominatorTree
&DT
, LoopInfo
&LI
) {
2824 // If @p BB is the ScopEntry we are done
2825 if (R
.getEntry() == BB
)
2826 return isl_set_universe(isl_set_get_space(Domain
));
2828 // The region info of this function.
2829 auto &RI
= *R
.getRegionInfo();
2831 auto *BBLoop
= getStmtFor(BB
)->getSurroundingLoop();
2833 // A domain to collect all predecessor domains, thus all conditions under
2834 // which the block is executed. To this end we start with the empty domain.
2835 isl_set
*PredDom
= isl_set_empty(isl_set_get_space(Domain
));
2837 // Set of regions of which the entry block domain has been propagated to BB.
2838 // all predecessors inside any of the regions can be skipped.
2839 SmallSet
<Region
*, 8> PropagatedRegions
;
2841 for (auto *PredBB
: predecessors(BB
)) {
2843 if (DT
.dominates(BB
, PredBB
))
2846 // If the predecessor is in a region we used for propagation we can skip it.
2847 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
2848 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
2853 // Check if there is a valid region we can use for propagation, thus look
2854 // for a region that contains the predecessor and has @p BB as exit block.
2855 auto *PredR
= RI
.getRegionFor(PredBB
);
2856 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
2859 // If a valid region for propagation was found use the entry of that region
2860 // for propagation, otherwise the PredBB directly.
2861 if (PredR
->getExit() == BB
) {
2862 PredBB
= PredR
->getEntry();
2863 PropagatedRegions
.insert(PredR
);
2866 auto *PredBBDom
= getDomainConditions(PredBB
);
2867 auto *PredBBLoop
= getStmtFor(PredBB
)->getSurroundingLoop();
2868 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
2870 PredDom
= isl_set_union(PredDom
, PredBBDom
);
2876 bool Scop::propagateDomainConstraints(Region
*R
, DominatorTree
&DT
,
2878 // Iterate over the region R and propagate the domain constrains from the
2879 // predecessors to the current node. In contrast to the
2880 // buildDomainsWithBranchConstraints function, this one will pull the domain
2881 // information from the predecessors instead of pushing it to the successors.
2882 // Additionally, we assume the domains to be already present in the domain
2883 // map here. However, we iterate again in reverse post order so we know all
2884 // predecessors have been visited before a block or non-affine subregion is
2887 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2888 for (auto *RN
: RTraversal
) {
2890 // Recurse for affine subregions but go on for basic blocks and non-affine
2892 if (RN
->isSubRegion()) {
2893 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2894 if (!isNonAffineSubRegion(SubRegion
)) {
2895 if (!propagateDomainConstraints(SubRegion
, DT
, LI
))
2901 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2902 isl_set
*&Domain
= DomainMap
[BB
];
2905 // Under the union of all predecessor conditions we can reach this block.
2906 auto *PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
2907 Domain
= isl_set_coalesce(isl_set_intersect(Domain
, PredDom
));
2908 Domain
= isl_set_align_params(Domain
, getParamSpace());
2910 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
2911 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
2912 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
))
2919 /// Create a map to map from a given iteration to a subsequent iteration.
2921 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
2922 /// is incremented by one and all other dimensions are equal, e.g.,
2923 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
2925 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
2926 static __isl_give isl_map
*
2927 createNextIterationMap(__isl_take isl_space
*SetSpace
, unsigned Dim
) {
2928 auto *MapSpace
= isl_space_map_from_set(SetSpace
);
2929 auto *NextIterationMap
= isl_map_universe(isl_space_copy(MapSpace
));
2930 for (unsigned u
= 0; u
< isl_map_dim(NextIterationMap
, isl_dim_in
); u
++)
2933 isl_map_equate(NextIterationMap
, isl_dim_in
, u
, isl_dim_out
, u
);
2934 auto *C
= isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace
));
2935 C
= isl_constraint_set_constant_si(C
, 1);
2936 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, Dim
, 1);
2937 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, Dim
, -1);
2938 NextIterationMap
= isl_map_add_constraint(NextIterationMap
, C
);
2939 return NextIterationMap
;
2942 bool Scop::addLoopBoundsToHeaderDomain(Loop
*L
, LoopInfo
&LI
) {
2943 int LoopDepth
= getRelativeLoopDepth(L
);
2944 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
2946 BasicBlock
*HeaderBB
= L
->getHeader();
2947 assert(DomainMap
.count(HeaderBB
));
2948 isl_set
*&HeaderBBDom
= DomainMap
[HeaderBB
];
2950 isl_map
*NextIterationMap
=
2951 createNextIterationMap(isl_set_get_space(HeaderBBDom
), LoopDepth
);
2953 isl_set
*UnionBackedgeCondition
=
2954 isl_set_empty(isl_set_get_space(HeaderBBDom
));
2956 SmallVector
<llvm::BasicBlock
*, 4> LatchBlocks
;
2957 L
->getLoopLatches(LatchBlocks
);
2959 for (BasicBlock
*LatchBB
: LatchBlocks
) {
2961 // If the latch is only reachable via error statements we skip it.
2962 isl_set
*LatchBBDom
= DomainMap
.lookup(LatchBB
);
2966 isl_set
*BackedgeCondition
= nullptr;
2968 TerminatorInst
*TI
= LatchBB
->getTerminator();
2969 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
2970 assert(BI
&& "Only branch instructions allowed in loop latches");
2972 if (BI
->isUnconditional())
2973 BackedgeCondition
= isl_set_copy(LatchBBDom
);
2975 SmallVector
<isl_set
*, 8> ConditionSets
;
2976 int idx
= BI
->getSuccessor(0) != HeaderBB
;
2977 if (!buildConditionSets(*getStmtFor(LatchBB
), TI
, L
, LatchBBDom
,
2979 isl_map_free(NextIterationMap
);
2980 isl_set_free(UnionBackedgeCondition
);
2984 // Free the non back edge condition set as we do not need it.
2985 isl_set_free(ConditionSets
[1 - idx
]);
2987 BackedgeCondition
= ConditionSets
[idx
];
2990 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
2991 assert(LatchLoopDepth
>= LoopDepth
);
2993 isl_set_project_out(BackedgeCondition
, isl_dim_set
, LoopDepth
+ 1,
2994 LatchLoopDepth
- LoopDepth
);
2995 UnionBackedgeCondition
=
2996 isl_set_union(UnionBackedgeCondition
, BackedgeCondition
);
2999 isl_map
*ForwardMap
= isl_map_lex_le(isl_set_get_space(HeaderBBDom
));
3000 for (int i
= 0; i
< LoopDepth
; i
++)
3001 ForwardMap
= isl_map_equate(ForwardMap
, isl_dim_in
, i
, isl_dim_out
, i
);
3003 isl_set
*UnionBackedgeConditionComplement
=
3004 isl_set_complement(UnionBackedgeCondition
);
3005 UnionBackedgeConditionComplement
= isl_set_lower_bound_si(
3006 UnionBackedgeConditionComplement
, isl_dim_set
, LoopDepth
, 0);
3007 UnionBackedgeConditionComplement
=
3008 isl_set_apply(UnionBackedgeConditionComplement
, ForwardMap
);
3009 HeaderBBDom
= isl_set_subtract(HeaderBBDom
, UnionBackedgeConditionComplement
);
3010 HeaderBBDom
= isl_set_apply(HeaderBBDom
, NextIterationMap
);
3012 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
3013 HeaderBBDom
= Parts
.second
;
3015 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3016 // the bounded assumptions to the context as they are already implied by the
3018 if (Affinator
.hasNSWAddRecForLoop(L
)) {
3019 isl_set_free(Parts
.first
);
3023 isl_set
*UnboundedCtx
= isl_set_params(Parts
.first
);
3024 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3025 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3029 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3030 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3032 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3033 if (!PointerBaseInst
)
3036 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3040 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3043 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3044 __isl_keep isl_union_map
*Writes
) {
3045 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3046 auto *NHCtx
= getNonHoistableCtx(BasePtrMA
, Writes
);
3047 bool Hoistable
= NHCtx
!= nullptr;
3048 isl_set_free(NHCtx
);
3052 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3053 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3054 if (!isa
<LoadInst
>(BasePtrInst
))
3055 return contains(BasePtrInst
);
3060 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3061 if (!PollyUseRuntimeAliasChecks
)
3064 if (buildAliasGroups(AA
)) {
3065 // Aliasing assumptions do not go through addAssumption but we still want to
3066 // collect statistics so we do it here explicitly.
3067 if (MinMaxAliasGroups
.size())
3068 AssumptionsAliasing
++;
3072 // If a problem occurs while building the alias groups we need to delete
3073 // this SCoP and pretend it wasn't valid in the first place. To this end
3074 // we make the assumed context infeasible.
3075 invalidate(ALIASING
, DebugLoc());
3077 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3078 << " could not be created as the number of parameters involved "
3079 "is too high. The SCoP will be "
3080 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3081 "the maximal number of parameters but be advised that the "
3082 "compile time might increase exponentially.\n\n");
3086 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3087 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3088 AliasSetTracker
AST(AA
);
3090 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3091 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3092 for (ScopStmt
&Stmt
: *this) {
3094 isl_set
*StmtDomain
= Stmt
.getDomain();
3095 bool StmtDomainEmpty
= isl_set_is_empty(StmtDomain
);
3096 isl_set_free(StmtDomain
);
3098 // Statements with an empty domain will never be executed.
3099 if (StmtDomainEmpty
)
3102 for (MemoryAccess
*MA
: Stmt
) {
3103 if (MA
->isScalarKind())
3106 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3107 MemAccInst
Acc(MA
->getAccessInstruction());
3108 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3109 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3111 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3116 AliasGroupVectorTy AliasGroups
;
3117 for (AliasSet
&AS
: AST
) {
3118 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3122 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3125 AliasGroups
.push_back(std::move(AG
));
3128 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3131 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3132 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3134 AliasGroupTy
&AG
= AliasGroups
[u
];
3135 AliasGroupTy::iterator AGI
= AG
.begin();
3136 isl_set
*AGDomain
= getAccessDomain(*AGI
);
3137 while (AGI
!= AG
.end()) {
3138 MemoryAccess
*MA
= *AGI
;
3139 isl_set
*MADomain
= getAccessDomain(MA
);
3140 if (isl_set_is_disjoint(AGDomain
, MADomain
)) {
3141 NewAG
.push_back(MA
);
3142 AGI
= AG
.erase(AGI
);
3143 isl_set_free(MADomain
);
3145 AGDomain
= isl_set_union(AGDomain
, MADomain
);
3149 if (NewAG
.size() > 1)
3150 AliasGroups
.push_back(std::move(NewAG
));
3151 isl_set_free(AGDomain
);
3155 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3156 // To create sound alias checks we perform the following steps:
3157 // o) We partition each group into read only and non read only accesses.
3158 // o) For each group with more than one base pointer we then compute minimal
3159 // and maximal accesses to each array of a group in read only and non
3160 // read only partitions separately.
3161 AliasGroupVectorTy AliasGroups
;
3162 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3164 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3166 splitAliasGroupsByDomain(AliasGroups
);
3168 for (AliasGroupTy
&AG
: AliasGroups
) {
3169 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3177 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3178 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3179 AliasGroupTy ReadOnlyAccesses
;
3180 AliasGroupTy ReadWriteAccesses
;
3181 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3182 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3184 auto &F
= getFunction();
3186 if (AliasGroup
.size() < 2)
3189 for (MemoryAccess
*Access
: AliasGroup
) {
3190 emitOptimizationRemarkAnalysis(
3191 F
.getContext(), DEBUG_TYPE
, F
,
3192 Access
->getAccessInstruction()->getDebugLoc(),
3193 "Possibly aliasing pointer, use restrict keyword.");
3195 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3196 if (HasWriteAccess
.count(Array
)) {
3197 ReadWriteArrays
.insert(Array
);
3198 ReadWriteAccesses
.push_back(Access
);
3200 ReadOnlyArrays
.insert(Array
);
3201 ReadOnlyAccesses
.push_back(Access
);
3205 // If there are no read-only pointers, and less than two read-write pointers,
3206 // no alias check is needed.
3207 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3210 // If there is no read-write pointer, no alias check is needed.
3211 if (ReadWriteArrays
.empty())
3214 // For non-affine accesses, no alias check can be generated as we cannot
3215 // compute a sufficiently tight lower and upper bound: bail out.
3216 for (MemoryAccess
*MA
: AliasGroup
) {
3217 if (!MA
->isAffine()) {
3218 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc());
3223 // Ensure that for all memory accesses for which we generate alias checks,
3224 // their base pointers are available.
3225 for (MemoryAccess
*MA
: AliasGroup
) {
3226 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3227 addRequiredInvariantLoad(
3228 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3231 MinMaxAliasGroups
.emplace_back();
3232 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3233 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3234 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3239 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3244 // Bail out if the number of values we need to compare is too large.
3245 // This is important as the number of comparisons grows quadratically with
3246 // the number of values we need to compare.
3247 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3248 RunTimeChecksMaxArraysPerGroup
)
3252 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3260 /// Get the smallest loop that contains @p S but is not in @p S.
3261 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3262 // Start with the smallest loop containing the entry and expand that
3263 // loop until it contains all blocks in the region. If there is a loop
3264 // containing all blocks in the region check if it is itself contained
3265 // and if so take the parent loop as it will be the smallest containing
3266 // the region but not contained by it.
3267 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3269 bool AllContained
= true;
3270 for (auto *BB
: S
.blocks())
3271 AllContained
&= L
->contains(BB
);
3274 L
= L
->getParentLoop();
3277 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3280 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3281 ScopDetection::DetectionContext
&DC
)
3282 : SE(&ScalarEvolution
), R(R
), IsOptimized(false),
3283 HasSingleExitEdge(R
.getExitingBlock()), HasErrorBlock(false),
3284 MaxLoopDepth(0), CopyStmtsNum(0), DC(DC
),
3285 IslCtx(isl_ctx_alloc(), isl_ctx_free
), Context(nullptr),
3286 Affinator(this, LI
), AssumedContext(nullptr), InvalidContext(nullptr),
3288 if (IslOnErrorAbort
)
3289 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT
);
3293 void Scop::foldSizeConstantsToRight() {
3294 isl_union_set
*Accessed
= isl_union_map_range(getAccesses());
3296 for (auto Array
: arrays()) {
3297 if (Array
->getNumberOfDimensions() <= 1)
3300 isl_space
*Space
= Array
->getSpace();
3302 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3304 if (!isl_union_set_contains(Accessed
, Space
)) {
3305 isl_space_free(Space
);
3309 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3311 isl_map
*Transform
=
3312 isl_map_universe(isl_space_map_from_set(Array
->getSpace()));
3314 std::vector
<int> Int
;
3316 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3317 for (int i
= 0; i
< Dims
; i
++) {
3319 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3320 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3321 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3323 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3325 if (i
== Dims
- 1) {
3327 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3328 isl_basic_set_free(DimHull
);
3332 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3333 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3334 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3339 if (isl_val_is_int(Val
))
3340 ValInt
= isl_val_get_num_si(Val
);
3343 Int
.push_back(ValInt
);
3345 isl_constraint
*C
= isl_constraint_alloc_equality(
3346 isl_local_space_from_space(isl_map_get_space(Transform
)));
3347 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3348 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3349 Transform
= isl_map_add_constraint(Transform
, C
);
3350 isl_basic_set_free(DimHull
);
3354 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3355 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3358 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3362 Int
.push_back(ValInt
);
3363 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3364 isl_basic_set_free(DimHull
);
3365 isl_basic_set_free(ZeroSet
);
3368 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3370 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3371 isl_set_free(Elements
);
3372 isl_set_free(MappedElements
);
3373 isl_map_free(Transform
);
3377 isl_set_free(MappedElements
);
3379 bool CanFold
= true;
3384 unsigned NumDims
= Array
->getNumberOfDimensions();
3385 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3386 if (Int
[0] != Int
[i
] && Int
[i
])
3390 isl_set_free(Elements
);
3391 isl_map_free(Transform
);
3395 for (auto &Access
: AccessFunctions
)
3396 if (Access
->getScopArrayInfo() == Array
)
3397 Access
->setAccessRelation(isl_map_apply_range(
3398 Access
->getAccessRelation(), isl_map_copy(Transform
)));
3400 isl_map_free(Transform
);
3402 std::vector
<const SCEV
*> Sizes
;
3403 for (unsigned i
= 0; i
< NumDims
; i
++) {
3404 auto Size
= Array
->getDimensionSize(i
);
3406 if (i
== NumDims
- 1)
3407 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3408 Sizes
.push_back(Size
);
3411 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3413 isl_set_free(Elements
);
3415 isl_union_set_free(Accessed
);
3419 void Scop::finalizeAccesses() {
3420 updateAccessDimensionality();
3421 foldSizeConstantsToRight();
3422 foldAccessRelations();
3423 assumeNoOutOfBounds();
3427 isl_set_free(Context
);
3428 isl_set_free(AssumedContext
);
3429 isl_set_free(InvalidContext
);
3430 isl_schedule_free(Schedule
);
3432 for (auto &It
: ParameterIds
)
3433 isl_id_free(It
.second
);
3435 for (auto It
: DomainMap
)
3436 isl_set_free(It
.second
);
3438 for (auto &AS
: RecordedAssumptions
)
3439 isl_set_free(AS
.Set
);
3441 // Free the alias groups
3442 for (MinMaxVectorPairTy
&MinMaxAccessPair
: MinMaxAliasGroups
) {
3443 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.first
) {
3444 isl_pw_multi_aff_free(MMA
.first
);
3445 isl_pw_multi_aff_free(MMA
.second
);
3447 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.second
) {
3448 isl_pw_multi_aff_free(MMA
.first
);
3449 isl_pw_multi_aff_free(MMA
.second
);
3453 for (const auto &IAClass
: InvariantEquivClasses
)
3454 isl_set_free(IAClass
.ExecutionContext
);
3456 // Explicitly release all Scop objects and the underlying isl objects before
3457 // we release the isl context.
3459 ScopArrayInfoSet
.clear();
3460 ScopArrayInfoMap
.clear();
3461 ScopArrayNameMap
.clear();
3462 AccessFunctions
.clear();
3465 void Scop::updateAccessDimensionality() {
3466 // Check all array accesses for each base pointer and find a (virtual) element
3467 // size for the base pointer that divides all access functions.
3468 for (ScopStmt
&Stmt
: *this)
3469 for (MemoryAccess
*Access
: Stmt
) {
3470 if (!Access
->isArrayKind())
3472 ScopArrayInfo
*Array
=
3473 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3475 if (Array
->getNumberOfDimensions() != 1)
3477 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3478 const SCEV
*Subscript
= Access
->getSubscript(0);
3479 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3481 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3482 Array
->updateElementType(Ty
);
3485 for (auto &Stmt
: *this)
3486 for (auto &Access
: Stmt
)
3487 Access
->updateDimensionality();
3490 void Scop::foldAccessRelations() {
3491 for (auto &Stmt
: *this)
3492 for (auto &Access
: Stmt
)
3493 Access
->foldAccessRelation();
3496 void Scop::assumeNoOutOfBounds() {
3497 for (auto &Stmt
: *this)
3498 for (auto &Access
: Stmt
)
3499 Access
->assumeNoOutOfBound();
3502 void Scop::simplifySCoP(bool AfterHoisting
) {
3503 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3504 ScopStmt
&Stmt
= *StmtIt
;
3506 bool RemoveStmt
= Stmt
.isEmpty();
3508 RemoveStmt
= !DomainMap
[Stmt
.getEntryBlock()];
3510 // Remove read only statements only after invariant loop hoisting.
3511 if (!RemoveStmt
&& AfterHoisting
) {
3512 bool OnlyRead
= true;
3513 for (MemoryAccess
*MA
: Stmt
) {
3521 RemoveStmt
= OnlyRead
;
3529 // Remove the statement because it is unnecessary.
3530 if (Stmt
.isRegionStmt())
3531 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks())
3534 StmtMap
.erase(Stmt
.getBasicBlock());
3536 StmtIt
= Stmts
.erase(StmtIt
);
3540 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3541 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3545 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3546 LInst
= cast
<LoadInst
>(Rep
);
3548 Type
*Ty
= LInst
->getType();
3549 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3550 for (auto &IAClass
: InvariantEquivClasses
) {
3551 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3554 auto &MAs
= IAClass
.InvariantAccesses
;
3555 for (auto *MA
: MAs
)
3556 if (MA
->getAccessInstruction() == Val
)
3563 /// Check if @p MA can always be hoisted without execution context.
3564 static bool canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3565 bool MAInvalidCtxIsEmpty
,
3566 bool NonHoistableCtxIsEmpty
) {
3567 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3568 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3569 // TODO: We can provide more information for better but more expensive
3571 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3572 LInst
->getAlignment(), DL
))
3575 // If the location might be overwritten we do not hoist it unconditionally.
3577 // TODO: This is probably to conservative.
3578 if (!NonHoistableCtxIsEmpty
)
3581 // If a dereferencable load is in a statement that is modeled precisely we can
3583 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3586 // Even if the statement is not modeled precisely we can hoist the load if it
3587 // does not involve any parameters that might have been specialized by the
3588 // statement domain.
3589 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3590 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3595 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3600 auto *StmtInvalidCtx
= Stmt
.getInvalidContext();
3601 bool StmtInvalidCtxIsEmpty
= isl_set_is_empty(StmtInvalidCtx
);
3603 // Get the context under which the statement is executed but remove the error
3604 // context under which this statement is reached.
3605 isl_set
*DomainCtx
= isl_set_params(Stmt
.getDomain());
3606 DomainCtx
= isl_set_subtract(DomainCtx
, StmtInvalidCtx
);
3608 if (isl_set_n_basic_set(DomainCtx
) >= MaxDisjunctsInDomain
) {
3609 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3610 invalidate(COMPLEXITY
, AccInst
->getDebugLoc());
3611 isl_set_free(DomainCtx
);
3612 for (auto &InvMA
: InvMAs
)
3613 isl_set_free(InvMA
.NonHoistableCtx
);
3617 // Project out all parameters that relate to loads in the statement. Otherwise
3618 // we could have cyclic dependences on the constraints under which the
3619 // hoisted loads are executed and we could not determine an order in which to
3620 // pre-load them. This happens because not only lower bounds are part of the
3621 // domain but also upper bounds.
3622 for (auto &InvMA
: InvMAs
) {
3623 auto *MA
= InvMA
.MA
;
3624 Instruction
*AccInst
= MA
->getAccessInstruction();
3625 if (SE
->isSCEVable(AccInst
->getType())) {
3626 SetVector
<Value
*> Values
;
3627 for (const SCEV
*Parameter
: Parameters
) {
3629 findValues(Parameter
, *SE
, Values
);
3630 if (!Values
.count(AccInst
))
3633 if (isl_id
*ParamId
= getIdForParam(Parameter
)) {
3634 int Dim
= isl_set_find_dim_by_id(DomainCtx
, isl_dim_param
, ParamId
);
3636 DomainCtx
= isl_set_eliminate(DomainCtx
, isl_dim_param
, Dim
, 1);
3637 isl_id_free(ParamId
);
3643 for (auto &InvMA
: InvMAs
) {
3644 auto *MA
= InvMA
.MA
;
3645 auto *NHCtx
= InvMA
.NonHoistableCtx
;
3647 // Check for another invariant access that accesses the same location as
3648 // MA and if found consolidate them. Otherwise create a new equivalence
3649 // class at the end of InvariantEquivClasses.
3650 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3651 Type
*Ty
= LInst
->getType();
3652 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3654 auto *MAInvalidCtx
= MA
->getInvalidContext();
3655 bool NonHoistableCtxIsEmpty
= isl_set_is_empty(NHCtx
);
3656 bool MAInvalidCtxIsEmpty
= isl_set_is_empty(MAInvalidCtx
);
3659 // Check if we know that this pointer can be speculatively accessed.
3660 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3661 NonHoistableCtxIsEmpty
)) {
3662 MACtx
= isl_set_universe(isl_set_get_space(DomainCtx
));
3663 isl_set_free(MAInvalidCtx
);
3664 isl_set_free(NHCtx
);
3666 MACtx
= isl_set_copy(DomainCtx
);
3667 MACtx
= isl_set_subtract(MACtx
, isl_set_union(MAInvalidCtx
, NHCtx
));
3668 MACtx
= isl_set_gist_params(MACtx
, getContext());
3671 bool Consolidated
= false;
3672 for (auto &IAClass
: InvariantEquivClasses
) {
3673 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3676 // If the pointer and the type is equal check if the access function wrt.
3677 // to the domain is equal too. It can happen that the domain fixes
3678 // parameter values and these can be different for distinct part of the
3679 // SCoP. If this happens we cannot consolidate the loads but need to
3680 // create a new invariant load equivalence class.
3681 auto &MAs
= IAClass
.InvariantAccesses
;
3683 auto *LastMA
= MAs
.front();
3685 auto *AR
= isl_map_range(MA
->getAccessRelation());
3686 auto *LastAR
= isl_map_range(LastMA
->getAccessRelation());
3687 bool SameAR
= isl_set_is_equal(AR
, LastAR
);
3689 isl_set_free(LastAR
);
3695 // Add MA to the list of accesses that are in this class.
3698 Consolidated
= true;
3700 // Unify the execution context of the class and this statement.
3701 isl_set
*&IAClassDomainCtx
= IAClass
.ExecutionContext
;
3702 if (IAClassDomainCtx
)
3704 isl_set_coalesce(isl_set_union(IAClassDomainCtx
, MACtx
));
3706 IAClassDomainCtx
= MACtx
;
3713 // If we did not consolidate MA, thus did not find an equivalence class
3714 // for it, we create a new one.
3715 InvariantEquivClasses
.emplace_back(
3716 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3719 isl_set_free(DomainCtx
);
3722 __isl_give isl_set
*Scop::getNonHoistableCtx(MemoryAccess
*Access
,
3723 __isl_keep isl_union_map
*Writes
) {
3724 // TODO: Loads that are not loop carried, hence are in a statement with
3725 // zero iterators, are by construction invariant, though we
3726 // currently "hoist" them anyway. This is necessary because we allow
3727 // them to be treated as parameters (e.g., in conditions) and our code
3728 // generation would otherwise use the old value.
3730 auto &Stmt
= *Access
->getStatement();
3731 BasicBlock
*BB
= Stmt
.getEntryBlock();
3733 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
3734 Access
->isMemoryIntrinsic())
3737 // Skip accesses that have an invariant base pointer which is defined but
3738 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3739 // returns a pointer that is used as a base address. However, as we want
3740 // to hoist indirect pointers, we allow the base pointer to be defined in
3741 // the region if it is also a memory access. Each ScopArrayInfo object
3742 // that has a base pointer origin has a base pointer that is loaded and
3743 // that it is invariant, thus it will be hoisted too. However, if there is
3744 // no base pointer origin we check that the base pointer is defined
3745 // outside the region.
3746 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
3747 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
3750 isl_map
*AccessRelation
= Access
->getAccessRelation();
3751 assert(!isl_map_is_empty(AccessRelation
));
3753 if (isl_map_involves_dims(AccessRelation
, isl_dim_in
, 0,
3754 Stmt
.getNumIterators())) {
3755 isl_map_free(AccessRelation
);
3759 AccessRelation
= isl_map_intersect_domain(AccessRelation
, Stmt
.getDomain());
3760 isl_set
*SafeToLoad
;
3762 auto &DL
= getFunction().getParent()->getDataLayout();
3763 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
3766 isl_set_universe(isl_space_range(isl_map_get_space(AccessRelation
)));
3767 isl_map_free(AccessRelation
);
3768 } else if (BB
!= LI
->getParent()) {
3769 // Skip accesses in non-affine subregions as they might not be executed
3770 // under the same condition as the entry of the non-affine subregion.
3771 isl_map_free(AccessRelation
);
3774 SafeToLoad
= isl_map_range(AccessRelation
);
3777 isl_union_map
*Written
= isl_union_map_intersect_range(
3778 isl_union_map_copy(Writes
), isl_union_set_from_set(SafeToLoad
));
3779 auto *WrittenCtx
= isl_union_map_params(Written
);
3780 bool IsWritten
= !isl_set_is_empty(WrittenCtx
);
3785 WrittenCtx
= isl_set_remove_divs(WrittenCtx
);
3786 bool TooComplex
= isl_set_n_basic_set(WrittenCtx
) >= MaxDisjunctsInDomain
;
3787 if (TooComplex
|| !isRequiredInvariantLoad(LI
)) {
3788 isl_set_free(WrittenCtx
);
3792 addAssumption(INVARIANTLOAD
, isl_set_copy(WrittenCtx
), LI
->getDebugLoc(),
3797 void Scop::verifyInvariantLoads() {
3798 auto &RIL
= getRequiredInvariantLoads();
3799 for (LoadInst
*LI
: RIL
) {
3800 assert(LI
&& contains(LI
));
3801 ScopStmt
*Stmt
= getStmtFor(LI
);
3802 if (Stmt
&& Stmt
->getArrayAccessOrNULLFor(LI
)) {
3803 invalidate(INVARIANTLOAD
, LI
->getDebugLoc());
3809 void Scop::hoistInvariantLoads() {
3810 if (!PollyInvariantLoadHoisting
)
3813 isl_union_map
*Writes
= getWrites();
3814 for (ScopStmt
&Stmt
: *this) {
3815 InvariantAccessesTy InvariantAccesses
;
3817 for (MemoryAccess
*Access
: Stmt
)
3818 if (auto *NHCtx
= getNonHoistableCtx(Access
, Writes
))
3819 InvariantAccesses
.push_back({Access
, NHCtx
});
3821 // Transfer the memory access from the statement to the SCoP.
3822 for (auto InvMA
: InvariantAccesses
)
3823 Stmt
.removeMemoryAccess(InvMA
.MA
);
3824 addInvariantLoads(Stmt
, InvariantAccesses
);
3826 isl_union_map_free(Writes
);
3829 /// Find the canonical scop array info object for a set of invariant load
3830 /// hoisted loads. The canonical array is the one that corresponds to the
3831 /// first load in the list of accesses which is used as base pointer of a
3833 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
3834 MemoryAccessList
&Accesses
) {
3835 for (MemoryAccess
*Access
: Accesses
) {
3836 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
3837 Access
->getAccessInstruction(), MemoryKind::Array
);
3839 return CanonicalArray
;
3844 /// Check if @p Array severs as base array in an invariant load.
3845 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
3846 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
3847 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3848 if (Access2
->getScopArrayInfo() == Array
)
3853 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3854 /// with a reference to @p New.
3855 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
3856 const ScopArrayInfo
*New
) {
3857 for (ScopStmt
&Stmt
: *S
)
3858 for (MemoryAccess
*Access
: Stmt
) {
3859 if (Access
->getLatestScopArrayInfo() != Old
)
3862 isl_id
*Id
= New
->getBasePtrId();
3863 isl_map
*Map
= Access
->getAccessRelation();
3864 Map
= isl_map_set_tuple_id(Map
, isl_dim_out
, Id
);
3865 Access
->setAccessRelation(Map
);
3869 void Scop::canonicalizeDynamicBasePtrs() {
3870 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
3871 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3873 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3874 findCanonicalArray(this, BasePtrAccesses
);
3876 if (!CanonicalBasePtrSAI
)
3879 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3880 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
3881 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3882 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3883 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3886 // we currently do not canonicalize arrays where some accesses are
3887 // hoisted as invariant loads. If we would, we need to update the access
3888 // function of the invariant loads as well. However, as this is not a
3889 // very common situation, we leave this for now to avoid further
3890 // complexity increases.
3891 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
3894 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
3899 const ScopArrayInfo
*
3900 Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
3901 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
3902 const char *BaseName
) {
3903 assert((BasePtr
|| BaseName
) &&
3904 "BasePtr and BaseName can not be nullptr at the same time.");
3905 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
3906 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
3907 : ScopArrayNameMap
[BaseName
];
3909 auto &DL
= getFunction().getParent()->getDataLayout();
3910 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
3911 DL
, this, BaseName
));
3912 ScopArrayInfoSet
.insert(SAI
.get());
3914 SAI
->updateElementType(ElementType
);
3915 // In case of mismatching array sizes, we bail out by setting the run-time
3916 // context to false.
3917 if (!SAI
->updateSizes(Sizes
))
3918 invalidate(DELINEARIZATION
, DebugLoc());
3923 const ScopArrayInfo
*
3924 Scop::createScopArrayInfo(Type
*ElementType
, const std::string
&BaseName
,
3925 const std::vector
<unsigned> &Sizes
) {
3926 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
3927 std::vector
<const SCEV
*> SCEVSizes
;
3929 for (auto size
: Sizes
)
3931 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
3933 SCEVSizes
.push_back(nullptr);
3935 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
3936 MemoryKind::Array
, BaseName
.c_str());
3940 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
3942 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
3946 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
3947 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
3948 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
3952 std::string
Scop::getContextStr() const { return stringFromIslObj(Context
); }
3954 std::string
Scop::getAssumedContextStr() const {
3955 assert(AssumedContext
&& "Assumed context not yet built");
3956 return stringFromIslObj(AssumedContext
);
3959 std::string
Scop::getInvalidContextStr() const {
3960 return stringFromIslObj(InvalidContext
);
3963 std::string
Scop::getNameStr() const {
3964 std::string ExitName
, EntryName
;
3965 raw_string_ostream
ExitStr(ExitName
);
3966 raw_string_ostream
EntryStr(EntryName
);
3968 R
.getEntry()->printAsOperand(EntryStr
, false);
3972 R
.getExit()->printAsOperand(ExitStr
, false);
3975 ExitName
= "FunctionExit";
3977 return EntryName
+ "---" + ExitName
;
3980 __isl_give isl_set
*Scop::getContext() const { return isl_set_copy(Context
); }
3981 __isl_give isl_space
*Scop::getParamSpace() const {
3982 return isl_set_get_space(Context
);
3985 __isl_give isl_set
*Scop::getAssumedContext() const {
3986 assert(AssumedContext
&& "Assumed context not yet built");
3987 return isl_set_copy(AssumedContext
);
3990 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
3991 if (PollyProcessUnprofitable
)
3997 unsigned OptimizableStmtsOrLoops
= 0;
3998 for (auto &Stmt
: *this) {
3999 if (Stmt
.getNumIterators() == 0)
4002 bool ContainsArrayAccs
= false;
4003 bool ContainsScalarAccs
= false;
4004 for (auto *MA
: Stmt
) {
4007 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4008 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4011 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4012 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4015 return OptimizableStmtsOrLoops
> 1;
4018 bool Scop::hasFeasibleRuntimeContext() const {
4019 auto *PositiveContext
= getAssumedContext();
4020 auto *NegativeContext
= getInvalidContext();
4021 PositiveContext
= addNonEmptyDomainConstraints(PositiveContext
);
4022 bool IsFeasible
= !(isl_set_is_empty(PositiveContext
) ||
4023 isl_set_is_subset(PositiveContext
, NegativeContext
));
4024 isl_set_free(PositiveContext
);
4026 isl_set_free(NegativeContext
);
4030 auto *DomainContext
= isl_union_set_params(getDomains());
4031 IsFeasible
= !isl_set_is_subset(DomainContext
, NegativeContext
);
4032 IsFeasible
&= !isl_set_is_subset(Context
, NegativeContext
);
4033 isl_set_free(NegativeContext
);
4034 isl_set_free(DomainContext
);
4039 static std::string
toString(AssumptionKind Kind
) {
4042 return "No-aliasing";
4046 return "No-overflows";
4048 return "Signed-unsigned";
4050 return "Low complexity";
4052 return "Profitable";
4056 return "Finite loop";
4058 return "Invariant load";
4059 case DELINEARIZATION
:
4060 return "Delinearization";
4062 llvm_unreachable("Unknown AssumptionKind!");
4065 bool Scop::isEffectiveAssumption(__isl_keep isl_set
*Set
, AssumptionSign Sign
) {
4066 if (Sign
== AS_ASSUMPTION
) {
4067 if (isl_set_is_subset(Context
, Set
))
4070 if (isl_set_is_subset(AssumedContext
, Set
))
4073 if (isl_set_is_disjoint(Set
, Context
))
4076 if (isl_set_is_subset(Set
, InvalidContext
))
4082 bool Scop::trackAssumption(AssumptionKind Kind
, __isl_keep isl_set
*Set
,
4083 DebugLoc Loc
, AssumptionSign Sign
) {
4084 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4087 // Do never emit trivial assumptions as they only clutter the output.
4088 if (!PollyRemarksMinimal
) {
4089 isl_set
*Univ
= nullptr;
4090 if (Sign
== AS_ASSUMPTION
)
4091 Univ
= isl_set_universe(isl_set_get_space(Set
));
4093 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& isl_set_is_empty(Set
)) ||
4094 (Sign
== AS_ASSUMPTION
&& isl_set_is_equal(Univ
, Set
));
4103 AssumptionsAliasing
++;
4106 AssumptionsInbounds
++;
4109 AssumptionsWrapping
++;
4112 AssumptionsUnsigned
++;
4115 AssumptionsComplexity
++;
4118 AssumptionsUnprofitable
++;
4121 AssumptionsErrorBlock
++;
4124 AssumptionsInfiniteLoop
++;
4127 AssumptionsInvariantLoad
++;
4129 case DELINEARIZATION
:
4130 AssumptionsDelinearization
++;
4134 auto &F
= getFunction();
4135 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4136 std::string Msg
= toString(Kind
) + Suffix
+ stringFromIslObj(Set
);
4137 emitOptimizationRemarkAnalysis(F
.getContext(), DEBUG_TYPE
, F
, Loc
, Msg
);
4141 void Scop::addAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4142 DebugLoc Loc
, AssumptionSign Sign
) {
4143 // Simplify the assumptions/restrictions first.
4144 Set
= isl_set_gist_params(Set
, getContext());
4146 if (!trackAssumption(Kind
, Set
, Loc
, Sign
)) {
4151 if (Sign
== AS_ASSUMPTION
) {
4152 AssumedContext
= isl_set_intersect(AssumedContext
, Set
);
4153 AssumedContext
= isl_set_coalesce(AssumedContext
);
4155 InvalidContext
= isl_set_union(InvalidContext
, Set
);
4156 InvalidContext
= isl_set_coalesce(InvalidContext
);
4160 void Scop::recordAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4161 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4162 assert((isl_set_is_params(Set
) || BB
) &&
4163 "Assumptions without a basic block must be parameter sets");
4164 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4167 void Scop::addRecordedAssumptions() {
4168 while (!RecordedAssumptions
.empty()) {
4169 const Assumption
&AS
= RecordedAssumptions
.pop_back_val();
4172 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
);
4176 // If the domain was deleted the assumptions are void.
4177 isl_set
*Dom
= getDomainConditions(AS
.BB
);
4179 isl_set_free(AS
.Set
);
4183 // If a basic block was given use its domain to simplify the assumption.
4184 // In case of restrictions we know they only have to hold on the domain,
4185 // thus we can intersect them with the domain of the block. However, for
4186 // assumptions the domain has to imply them, thus:
4188 // Dom => S <==> A v B <==> A - B
4190 // To avoid the complement we will register A - B as a restriction not an
4192 isl_set
*S
= AS
.Set
;
4193 if (AS
.Sign
== AS_RESTRICTION
)
4194 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4195 else /* (AS.Sign == AS_ASSUMPTION) */
4196 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4198 addAssumption(AS
.Kind
, S
, AS
.Loc
, AS_RESTRICTION
);
4202 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
) {
4203 addAssumption(Kind
, isl_set_empty(getParamSpace()), Loc
, AS_ASSUMPTION
);
4206 __isl_give isl_set
*Scop::getInvalidContext() const {
4207 return isl_set_copy(InvalidContext
);
4210 void Scop::printContext(raw_ostream
&OS
) const {
4212 OS
.indent(4) << Context
<< "\n";
4214 OS
.indent(4) << "Assumed Context:\n";
4215 OS
.indent(4) << AssumedContext
<< "\n";
4217 OS
.indent(4) << "Invalid Context:\n";
4218 OS
.indent(4) << InvalidContext
<< "\n";
4221 for (const SCEV
*Parameter
: Parameters
)
4222 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4225 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4227 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4228 if (Pair
.second
.size() == 0)
4231 noOfGroups
+= Pair
.second
.size();
4234 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4235 if (MinMaxAliasGroups
.empty()) {
4236 OS
.indent(8) << "n/a\n";
4240 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4242 // If the group has no read only accesses print the write accesses.
4243 if (Pair
.second
.empty()) {
4244 OS
.indent(8) << "[[";
4245 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4246 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4252 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4253 OS
.indent(8) << "[[";
4254 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4255 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4256 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4264 void Scop::printStatements(raw_ostream
&OS
) const {
4265 OS
<< "Statements {\n";
4267 for (const ScopStmt
&Stmt
: *this)
4268 OS
.indent(4) << Stmt
;
4270 OS
.indent(4) << "}\n";
4273 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4276 for (auto &Array
: arrays())
4279 OS
.indent(4) << "}\n";
4281 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4283 for (auto &Array
: arrays())
4284 Array
->print(OS
, /* SizeAsPwAff */ true);
4286 OS
.indent(4) << "}\n";
4289 void Scop::print(raw_ostream
&OS
) const {
4290 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4291 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4292 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4293 OS
.indent(4) << "Invariant Accesses: {\n";
4294 for (const auto &IAClass
: InvariantEquivClasses
) {
4295 const auto &MAs
= IAClass
.InvariantAccesses
;
4297 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4299 MAs
.front()->print(OS
);
4300 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4304 OS
.indent(4) << "}\n";
4305 printContext(OS
.indent(4));
4306 printArrayInfo(OS
.indent(4));
4307 printAliasAssumptions(OS
);
4308 printStatements(OS
.indent(4));
4311 void Scop::dump() const { print(dbgs()); }
4313 isl_ctx
*Scop::getIslCtx() const { return IslCtx
.get(); }
4315 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4317 // First try to use the SCEVAffinator to generate a piecewise defined
4318 // affine function from @p E in the context of @p BB. If that tasks becomes to
4319 // complex the affinator might return a nullptr. In such a case we invalidate
4320 // the SCoP and return a dummy value. This way we do not need to add error
4321 // handling code to all users of this function.
4322 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4324 // TODO: We could use a heuristic and either use:
4325 // SCEVAffinator::takeNonNegativeAssumption
4327 // SCEVAffinator::interpretAsUnsigned
4328 // to deal with unsigned or "NonNegative" SCEVs.
4330 Affinator
.takeNonNegativeAssumption(PWAC
);
4334 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4335 invalidate(COMPLEXITY
, DL
);
4336 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4339 __isl_give isl_union_set
*Scop::getDomains() const {
4340 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx(), 0);
4341 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4343 for (const ScopStmt
&Stmt
: *this)
4344 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain());
4349 __isl_give isl_pw_aff
*Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4350 PWACtx PWAC
= getPwAff(E
, BB
);
4351 isl_set_free(PWAC
.second
);
4355 __isl_give isl_union_map
*
4356 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4357 isl_union_map
*Accesses
= isl_union_map_empty(getParamSpace());
4359 for (ScopStmt
&Stmt
: *this) {
4360 for (MemoryAccess
*MA
: Stmt
) {
4361 if (!Predicate(*MA
))
4364 isl_set
*Domain
= Stmt
.getDomain();
4365 isl_map
*AccessDomain
= MA
->getAccessRelation();
4366 AccessDomain
= isl_map_intersect_domain(AccessDomain
, Domain
);
4367 Accesses
= isl_union_map_add_map(Accesses
, AccessDomain
);
4370 return isl_union_map_coalesce(Accesses
);
4373 __isl_give isl_union_map
*Scop::getMustWrites() {
4374 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4377 __isl_give isl_union_map
*Scop::getMayWrites() {
4378 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4381 __isl_give isl_union_map
*Scop::getWrites() {
4382 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4385 __isl_give isl_union_map
*Scop::getReads() {
4386 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4389 __isl_give isl_union_map
*Scop::getAccesses() {
4390 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4393 // Check whether @p Node is an extension node.
4395 // @return true if @p Node is an extension node.
4396 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4397 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4398 return isl_bool_error
;
4400 return isl_bool_true
;
4403 bool Scop::containsExtensionNode(__isl_keep isl_schedule
*Schedule
) {
4404 return isl_schedule_foreach_schedule_node_top_down(Schedule
, isNotExtNode
,
4405 nullptr) == isl_stat_error
;
4408 __isl_give isl_union_map
*Scop::getSchedule() const {
4409 auto *Tree
= getScheduleTree();
4410 if (containsExtensionNode(Tree
)) {
4411 isl_schedule_free(Tree
);
4414 auto *S
= isl_schedule_get_map(Tree
);
4415 isl_schedule_free(Tree
);
4419 __isl_give isl_schedule
*Scop::getScheduleTree() const {
4420 return isl_schedule_intersect_domain(isl_schedule_copy(Schedule
),
4424 void Scop::setSchedule(__isl_take isl_union_map
*NewSchedule
) {
4425 auto *S
= isl_schedule_from_domain(getDomains());
4426 S
= isl_schedule_insert_partial_schedule(
4427 S
, isl_multi_union_pw_aff_from_union_map(NewSchedule
));
4428 isl_schedule_free(Schedule
);
4432 void Scop::setScheduleTree(__isl_take isl_schedule
*NewSchedule
) {
4433 isl_schedule_free(Schedule
);
4434 Schedule
= NewSchedule
;
4437 bool Scop::restrictDomains(__isl_take isl_union_set
*Domain
) {
4438 bool Changed
= false;
4439 for (ScopStmt
&Stmt
: *this) {
4440 isl_union_set
*StmtDomain
= isl_union_set_from_set(Stmt
.getDomain());
4441 isl_union_set
*NewStmtDomain
= isl_union_set_intersect(
4442 isl_union_set_copy(StmtDomain
), isl_union_set_copy(Domain
));
4444 if (isl_union_set_is_subset(StmtDomain
, NewStmtDomain
)) {
4445 isl_union_set_free(StmtDomain
);
4446 isl_union_set_free(NewStmtDomain
);
4452 isl_union_set_free(StmtDomain
);
4453 NewStmtDomain
= isl_union_set_coalesce(NewStmtDomain
);
4455 if (isl_union_set_is_empty(NewStmtDomain
)) {
4456 Stmt
.restrictDomain(isl_set_empty(Stmt
.getDomainSpace()));
4457 isl_union_set_free(NewStmtDomain
);
4459 Stmt
.restrictDomain(isl_set_from_union_set(NewStmtDomain
));
4461 isl_union_set_free(Domain
);
4465 ScalarEvolution
*Scop::getSE() const { return SE
; }
4467 struct MapToDimensionDataTy
{
4469 isl_union_pw_multi_aff
*Res
;
4472 // Create a function that maps the elements of 'Set' to its N-th dimension and
4473 // add it to User->Res.
4475 // @param Set The input set.
4476 // @param User->N The dimension to map to.
4477 // @param User->Res The isl_union_pw_multi_aff to which to add the result.
4479 // @returns isl_stat_ok if no error occured, othewise isl_stat_error.
4480 static isl_stat
mapToDimension_AddSet(__isl_take isl_set
*Set
, void *User
) {
4481 struct MapToDimensionDataTy
*Data
= (struct MapToDimensionDataTy
*)User
;
4484 isl_pw_multi_aff
*PMA
;
4486 Dim
= isl_set_dim(Set
, isl_dim_set
);
4487 Space
= isl_set_get_space(Set
);
4488 PMA
= isl_pw_multi_aff_project_out_map(Space
, isl_dim_set
, Data
->N
,
4491 PMA
= isl_pw_multi_aff_drop_dims(PMA
, isl_dim_out
, 0, Data
->N
- 1);
4492 Data
->Res
= isl_union_pw_multi_aff_add_pw_multi_aff(Data
->Res
, PMA
);
4499 // Create an isl_multi_union_aff that defines an identity mapping from the
4500 // elements of USet to their N-th dimension.
4504 // Domain: { A[i,j]; B[i,j,k] }
4507 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4509 // @param USet A union set describing the elements for which to generate a
4511 // @param N The dimension to map to.
4512 // @returns A mapping from USet to its N-th dimension.
4513 static __isl_give isl_multi_union_pw_aff
*
4514 mapToDimension(__isl_take isl_union_set
*USet
, int N
) {
4517 assert(!isl_union_set_is_empty(USet
));
4519 struct MapToDimensionDataTy Data
;
4521 auto *Space
= isl_union_set_get_space(USet
);
4522 auto *PwAff
= isl_union_pw_multi_aff_empty(Space
);
4526 auto Res
= isl_union_set_foreach_set(USet
, &mapToDimension_AddSet
, &Data
);
4529 assert(Res
== isl_stat_ok
);
4531 isl_union_set_free(USet
);
4532 return isl_multi_union_pw_aff_from_union_pw_multi_aff(Data
.Res
);
4535 void Scop::addScopStmt(BasicBlock
*BB
, Loop
*SurroundingLoop
) {
4536 assert(BB
&& "Unexpected nullptr!");
4537 Stmts
.emplace_back(*this, *BB
, SurroundingLoop
);
4538 auto *Stmt
= &Stmts
.back();
4542 void Scop::addScopStmt(Region
*R
, Loop
*SurroundingLoop
) {
4543 assert(R
&& "Unexpected nullptr!");
4544 Stmts
.emplace_back(*this, *R
, SurroundingLoop
);
4545 auto *Stmt
= &Stmts
.back();
4546 for (BasicBlock
*BB
: R
->blocks())
4550 ScopStmt
*Scop::addScopStmt(__isl_take isl_map
*SourceRel
,
4551 __isl_take isl_map
*TargetRel
,
4552 __isl_take isl_set
*Domain
) {
4554 isl_set
*SourceDomain
= isl_map_domain(isl_map_copy(SourceRel
));
4555 isl_set
*TargetDomain
= isl_map_domain(isl_map_copy(TargetRel
));
4556 assert(isl_set_is_subset(Domain
, TargetDomain
) &&
4557 "Target access not defined for complete statement domain");
4558 assert(isl_set_is_subset(Domain
, SourceDomain
) &&
4559 "Source access not defined for complete statement domain");
4560 isl_set_free(SourceDomain
);
4561 isl_set_free(TargetDomain
);
4563 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4565 return &(Stmts
.back());
4568 void Scop::buildSchedule(LoopInfo
&LI
) {
4569 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4570 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4571 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4572 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4573 Schedule
= LoopStack
[0].Schedule
;
4576 /// To generate a schedule for the elements in a Region we traverse the Region
4577 /// in reverse-post-order and add the contained RegionNodes in traversal order
4578 /// to the schedule of the loop that is currently at the top of the LoopStack.
4579 /// For loop-free codes, this results in a correct sequential ordering.
4590 /// Including loops requires additional processing. Whenever a loop header is
4591 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4592 /// from an empty schedule, we first process all RegionNodes that are within
4593 /// this loop and complete the sequential schedule at this loop-level before
4594 /// processing about any other nodes. To implement this
4595 /// loop-nodes-first-processing, the reverse post-order traversal is
4596 /// insufficient. Hence, we additionally check if the traversal yields
4597 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4598 /// These region-nodes are then queue and only traverse after the all nodes
4599 /// within the current loop have been processed.
4600 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4601 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4603 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4604 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4605 std::deque
<RegionNode
*> DelayList
;
4606 bool LastRNWaiting
= false;
4608 // Iterate over the region @p R in reverse post-order but queue
4609 // sub-regions/blocks iff they are not part of the last encountered but not
4610 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4611 // that we queued the last sub-region/block from the reverse post-order
4612 // iterator. If it is set we have to explore the next sub-region/block from
4613 // the iterator (if any) to guarantee progress. If it is not set we first try
4614 // the next queued sub-region/blocks.
4615 while (!WorkList
.empty() || !DelayList
.empty()) {
4618 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.size() == 0) {
4619 RN
= WorkList
.front();
4620 WorkList
.pop_front();
4621 LastRNWaiting
= false;
4623 RN
= DelayList
.front();
4624 DelayList
.pop_front();
4627 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4631 Loop
*LastLoop
= LoopStack
.back().L
;
4632 if (LastLoop
!= L
) {
4633 if (LastLoop
&& !LastLoop
->contains(L
)) {
4634 LastRNWaiting
= true;
4635 DelayList
.push_back(RN
);
4638 LoopStack
.push_back({L
, nullptr, 0});
4640 buildSchedule(RN
, LoopStack
, LI
);
4646 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4648 if (RN
->isSubRegion()) {
4649 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
4650 if (!isNonAffineSubRegion(LocalRegion
)) {
4651 buildSchedule(LocalRegion
, LoopStack
, LI
);
4656 auto &LoopData
= LoopStack
.back();
4657 LoopData
.NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
4659 if (auto *Stmt
= getStmtFor(RN
)) {
4660 auto *UDomain
= isl_union_set_from_set(Stmt
->getDomain());
4661 auto *StmtSchedule
= isl_schedule_from_domain(UDomain
);
4662 LoopData
.Schedule
= combineInSequence(LoopData
.Schedule
, StmtSchedule
);
4665 // Check if we just processed the last node in this loop. If we did, finalize
4668 // - adding new schedule dimensions
4669 // - folding the resulting schedule into the parent loop schedule
4670 // - dropping the loop schedule from the LoopStack.
4672 // Then continue to check surrounding loops, which might also have been
4673 // completed by this node.
4674 while (LoopData
.L
&&
4675 LoopData
.NumBlocksProcessed
== getNumBlocksInLoop(LoopData
.L
)) {
4676 auto *Schedule
= LoopData
.Schedule
;
4677 auto NumBlocksProcessed
= LoopData
.NumBlocksProcessed
;
4679 LoopStack
.pop_back();
4680 auto &NextLoopData
= LoopStack
.back();
4683 auto *Domain
= isl_schedule_get_domain(Schedule
);
4684 auto *MUPA
= mapToDimension(Domain
, LoopStack
.size());
4685 Schedule
= isl_schedule_insert_partial_schedule(Schedule
, MUPA
);
4686 NextLoopData
.Schedule
=
4687 combineInSequence(NextLoopData
.Schedule
, Schedule
);
4690 NextLoopData
.NumBlocksProcessed
+= NumBlocksProcessed
;
4691 LoopData
= NextLoopData
;
4695 ScopStmt
*Scop::getStmtFor(BasicBlock
*BB
) const {
4696 auto StmtMapIt
= StmtMap
.find(BB
);
4697 if (StmtMapIt
== StmtMap
.end())
4699 return StmtMapIt
->second
;
4702 ScopStmt
*Scop::getStmtFor(RegionNode
*RN
) const {
4703 if (RN
->isSubRegion())
4704 return getStmtFor(RN
->getNodeAs
<Region
>());
4705 return getStmtFor(RN
->getNodeAs
<BasicBlock
>());
4708 ScopStmt
*Scop::getStmtFor(Region
*R
) const {
4709 ScopStmt
*Stmt
= getStmtFor(R
->getEntry());
4710 assert(!Stmt
|| Stmt
->getRegion() == R
);
4714 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
4716 L
? R
.outermostLoopInRegion(const_cast<Loop
*>(L
)) : nullptr;
4719 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
4722 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
4723 for (auto &SAI
: arrays()) {
4724 if (SAI
->getName() == BaseName
)
4730 //===----------------------------------------------------------------------===//
4731 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4732 AU
.addRequired
<LoopInfoWrapperPass
>();
4733 AU
.addRequired
<RegionInfoPass
>();
4734 AU
.addRequired
<DominatorTreeWrapperPass
>();
4735 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
4736 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
4737 AU
.addRequired
<AAResultsWrapperPass
>();
4738 AU
.addRequired
<AssumptionCacheTracker
>();
4739 AU
.setPreservesAll();
4742 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
) {
4743 NumLoopsInScop
+= Stats
.NumLoops
;
4745 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
4747 if (Stats
.MaxDepth
== 1)
4749 else if (Stats
.MaxDepth
== 2)
4751 else if (Stats
.MaxDepth
== 3)
4752 NumScopsDepthThree
++;
4753 else if (Stats
.MaxDepth
== 4)
4754 NumScopsDepthFour
++;
4755 else if (Stats
.MaxDepth
== 5)
4756 NumScopsDepthFive
++;
4758 NumScopsDepthLarger
++;
4761 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
4762 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
4764 if (!SD
.isMaxRegionInScop(*R
))
4767 Function
*F
= R
->getEntry()->getParent();
4768 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
4769 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
4770 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
4771 auto const &DL
= F
->getParent()->getDataLayout();
4772 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
4773 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
4775 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
);
4776 S
= SB
.getScop(); // take ownership of scop object
4779 ScopDetection::LoopStats Stats
=
4780 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
4781 updateLoopCountStatistic(Stats
);
4787 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
4791 OS
<< "Invalid Scop!\n";
4794 char ScopInfoRegionPass::ID
= 0;
4796 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
4798 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
4799 "Polly - Create polyhedral description of Scops", false,
4801 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
4802 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
4803 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
4804 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
4805 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
4806 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
4807 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
4808 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
4809 "Polly - Create polyhedral description of Scops", false,
4812 //===----------------------------------------------------------------------===//
4813 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4814 AU
.addRequired
<LoopInfoWrapperPass
>();
4815 AU
.addRequired
<RegionInfoPass
>();
4816 AU
.addRequired
<DominatorTreeWrapperPass
>();
4817 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
4818 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
4819 AU
.addRequired
<AAResultsWrapperPass
>();
4820 AU
.addRequired
<AssumptionCacheTracker
>();
4821 AU
.setPreservesAll();
4824 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
4825 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
4827 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
4828 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
4829 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
4830 auto const &DL
= F
.getParent()->getDataLayout();
4831 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
4832 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
4834 /// Create polyhedral descripton of scops for all the valid regions of a
4836 for (auto &It
: SD
) {
4837 Region
*R
= const_cast<Region
*>(It
);
4838 if (!SD
.isMaxRegionInScop(*R
))
4841 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
);
4842 std::unique_ptr
<Scop
> S
= SB
.getScop();
4846 RegionToScopMap
.insert(std::make_pair(R
, std::move(S
))).second
;
4847 assert(Inserted
&& "Building Scop for the same region twice!");
4853 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
4854 for (auto &It
: RegionToScopMap
) {
4856 It
.second
->print(OS
);
4858 OS
<< "Invalid Scop!\n";
4862 char ScopInfoWrapperPass::ID
= 0;
4864 Pass
*polly::createScopInfoWrapperPassPass() {
4865 return new ScopInfoWrapperPass();
4868 INITIALIZE_PASS_BEGIN(
4869 ScopInfoWrapperPass
, "polly-function-scops",
4870 "Polly - Create polyhedral description of all Scops of a function", false,
4872 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
4873 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
4874 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
4875 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
4876 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
4877 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
4878 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
4879 INITIALIZE_PASS_END(
4880 ScopInfoWrapperPass
, "polly-function-scops",
4881 "Polly - Create polyhedral description of all Scops of a function", false,