1 //===- ScopInfo.cpp -------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Create a polyhedral description for a static control flow region.
11 // The pass creates a polyhedral description of the Scops detected by the Scop
12 // detection derived from their LLVM-IR code.
14 // This representation is shared among several tools in the polyhedral
15 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
17 //===----------------------------------------------------------------------===//
19 #include "polly/ScopInfo.h"
20 #include "polly/LinkAllPasses.h"
21 #include "polly/Options.h"
22 #include "polly/ScopBuilder.h"
23 #include "polly/ScopDetection.h"
24 #include "polly/Support/GICHelper.h"
25 #include "polly/Support/ISLOStream.h"
26 #include "polly/Support/ISLTools.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
106 using namespace llvm
;
107 using namespace polly
;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad
,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization
,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops
, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops
, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops
, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthZero
, "Number of scops with maximal loop depth 0");
130 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
131 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
132 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
133 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
134 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
135 STATISTIC(NumScopsDepthLarger
,
136 "Number of scops with maximal loop depth 6 and larger");
137 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
139 STATISTIC(NumValueWrites
, "Number of scalar value writes after ScopInfo");
141 NumValueWritesInLoops
,
142 "Number of scalar value writes nested in affine loops after ScopInfo");
143 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after ScopInfo");
144 STATISTIC(NumPHIWritesInLoops
,
145 "Number of scalar phi writes nested in affine loops after ScopInfo");
146 STATISTIC(NumSingletonWrites
, "Number of singleton writes after ScopInfo");
147 STATISTIC(NumSingletonWritesInLoops
,
148 "Number of singleton writes nested in affine loops after ScopInfo");
150 // The maximal number of basic sets we allow during domain construction to
151 // be created. More complex scops will result in very high compile time and
152 // are also unlikely to result in good code
153 static int const MaxDisjunctsInDomain
= 20;
155 // The number of disjunct in the context after which we stop to add more
156 // disjuncts. This parameter is there to avoid exponential growth in the
157 // number of disjunct when adding non-convex sets to the context.
158 static int const MaxDisjunctsInContext
= 4;
160 // The maximal number of dimensions we allow during invariant load construction.
161 // More complex access ranges will result in very high compile time and are also
162 // unlikely to result in good code. This value is very high and should only
163 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
164 static int const MaxDimensionsInAccessRange
= 9;
167 OptComputeOut("polly-analysis-computeout",
168 cl::desc("Bound the scop analysis by a maximal amount of "
169 "computational steps (0 means no bound)"),
170 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
171 cl::cat(PollyCategory
));
173 static cl::opt
<bool> PollyRemarksMinimal(
174 "polly-remarks-minimal",
175 cl::desc("Do not emit remarks about assumptions that are known"),
176 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
178 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
179 "polly-rtc-max-array-disjuncts",
180 cl::desc("The maximal number of disjunts allowed in memory accesses to "
182 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
184 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
185 "polly-rtc-max-parameters",
186 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
187 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
189 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
190 "polly-rtc-max-arrays-per-group",
191 cl::desc("The maximal number of arrays to compare in each alias group."),
192 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
194 static cl::opt
<std::string
> UserContextStr(
195 "polly-context", cl::value_desc("isl parameter set"),
196 cl::desc("Provide additional constraints on the context parameters"),
197 cl::init(""), cl::cat(PollyCategory
));
200 IslOnErrorAbort("polly-on-isl-error-abort",
201 cl::desc("Abort if an isl error is encountered"),
202 cl::init(true), cl::cat(PollyCategory
));
204 static cl::opt
<bool> PollyPreciseInbounds(
205 "polly-precise-inbounds",
206 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
207 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
210 PollyIgnoreInbounds("polly-ignore-inbounds",
211 cl::desc("Do not take inbounds assumptions at all"),
212 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
214 static cl::opt
<bool> PollyIgnoreParamBounds(
215 "polly-ignore-parameter-bounds",
217 "Do not add parameter bounds and do no gist simplify sets accordingly"),
218 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
220 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
221 "polly-allow-dereference-of-all-function-parameters",
223 "Treat all parameters to functions that are pointers as dereferencible."
224 " This is useful for invariant load hoisting, since we can generate"
225 " less runtime checks. This is only valid if all pointers to functions"
226 " are always initialized, so that Polly can choose to hoist"
228 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
230 static cl::opt
<bool> PollyPreciseFoldAccesses(
231 "polly-precise-fold-accesses",
232 cl::desc("Fold memory accesses to model more possible delinearizations "
233 "(does not scale well)"),
234 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
236 bool polly::UseInstructionNames
;
238 static cl::opt
<bool, true> XUseInstructionNames(
239 "polly-use-llvm-names",
240 cl::desc("Use LLVM-IR names when deriving statement names"),
241 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
242 cl::ZeroOrMore
, cl::cat(PollyCategory
));
244 static cl::opt
<bool> PollyPrintInstructions(
245 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
246 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
248 //===----------------------------------------------------------------------===//
250 // Create a sequence of two schedules. Either argument may be null and is
251 // interpreted as the empty schedule. Can also return null if both schedules are
253 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
259 return Prev
.sequence(Succ
);
262 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
263 int dim
, isl::dim type
) {
265 isl::ctx Ctx
= S
.get_ctx();
267 // The upper and lower bound for a parameter value is derived either from
268 // the data type of the parameter or from the - possibly more restrictive -
270 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
271 S
= S
.lower_bound_val(type
, dim
, V
);
272 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
273 S
= S
.upper_bound_val(type
, dim
, V
);
275 if (Range
.isFullSet())
278 if (S
.n_basic_set() > MaxDisjunctsInContext
)
281 // In case of signed wrapping, we can refine the set of valid values by
282 // excluding the part not covered by the wrapping range.
283 if (Range
.isSignWrappedSet()) {
284 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
285 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
287 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
289 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
296 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
297 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
301 if (!S
->contains(BasePtrLI
))
304 ScalarEvolution
&SE
= *S
->getSE();
306 auto *OriginBaseSCEV
=
307 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
311 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
312 if (!OriginBaseSCEVUnknown
)
315 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
319 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl::ctx Ctx
,
320 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
321 const DataLayout
&DL
, Scop
*S
,
322 const char *BaseName
)
323 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
324 std::string BasePtrName
=
326 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
327 Kind
== MemoryKind::PHI
? "__phi" : "",
328 UseInstructionNames
);
329 Id
= isl::id::alloc(Ctx
, BasePtrName
, this);
333 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
334 BasePtrOriginSAI
= nullptr;
338 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
339 if (BasePtrOriginSAI
)
340 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
343 ScopArrayInfo::~ScopArrayInfo() = default;
345 isl::space
ScopArrayInfo::getSpace() const {
346 auto Space
= isl::space(Id
.get_ctx(), 0, getNumberOfDimensions());
347 Space
= Space
.set_tuple_id(isl::dim::set
, Id
);
351 bool ScopArrayInfo::isReadOnly() {
352 isl::union_set WriteSet
= S
.getWrites().range();
353 isl::space Space
= getSpace();
354 WriteSet
= WriteSet
.extract_set(Space
);
356 return bool(WriteSet
.is_empty());
359 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
360 if (Array
->getElementType() != getElementType())
363 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
366 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
367 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
373 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
374 if (NewElementType
== ElementType
)
377 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
378 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
380 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
383 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
384 ElementType
= NewElementType
;
386 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
387 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
391 /// Make the ScopArrayInfo model a Fortran Array
392 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
393 assert(FAD
&& "got invalid Fortran array descriptor");
395 assert(this->FAD
== FAD
&&
396 "receiving different array descriptors for same array");
400 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
404 isl::space
Space(S
.getIslCtx(), 1, 0);
406 std::string param_name
= getName();
407 param_name
+= "_fortranarr_size";
408 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
, this);
410 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
412 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
414 DimensionSizesPw
[0] = PwAff
;
417 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
418 bool CheckConsistency
) {
419 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
420 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
421 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
423 if (CheckConsistency
) {
424 for (int i
= 0; i
< SharedDims
; i
++) {
425 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
426 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
427 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
431 if (DimensionSizes
.size() >= NewSizes
.size())
435 DimensionSizes
.clear();
436 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
438 DimensionSizesPw
.clear();
439 for (const SCEV
*Expr
: DimensionSizes
) {
441 DimensionSizesPw
.push_back(nullptr);
444 isl::pw_aff Size
= S
.getPwAffOnly(Expr
);
445 DimensionSizesPw
.push_back(Size
);
450 std::string
ScopArrayInfo::getName() const { return Id
.get_name(); }
452 int ScopArrayInfo::getElemSizeInBytes() const {
453 return DL
.getTypeAllocSize(ElementType
);
456 isl::id
ScopArrayInfo::getBasePtrId() const { return Id
; }
458 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
459 LLVM_DUMP_METHOD
void ScopArrayInfo::dump() const { print(errs()); }
462 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
463 OS
.indent(8) << *getElementType() << " " << getName();
465 // If this is a Fortran array, then we can print the outermost dimension
466 // as a isl_pw_aff even though there is no SCEV information.
467 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
469 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
470 !getDimensionSize(0)) {
474 for (; u
< getNumberOfDimensions(); u
++) {
478 isl::pw_aff Size
= getDimensionSizePw(u
);
479 OS
<< " " << Size
<< " ";
481 OS
<< *getDimensionSize(u
);
489 if (BasePtrOriginSAI
)
490 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
492 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
495 const ScopArrayInfo
*
496 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA
) {
497 isl::id Id
= PMA
.get_tuple_id(isl::dim::out
);
498 assert(!Id
.is_null() && "Output dimension didn't have an ID");
499 return getFromId(Id
);
502 const ScopArrayInfo
*ScopArrayInfo::getFromId(isl::id Id
) {
503 void *User
= Id
.get_user();
504 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
508 void MemoryAccess::wrapConstantDimensions() {
509 auto *SAI
= getScopArrayInfo();
510 isl::space ArraySpace
= SAI
->getSpace();
511 isl::ctx Ctx
= ArraySpace
.get_ctx();
512 unsigned DimsArray
= SAI
->getNumberOfDimensions();
514 isl::multi_aff DivModAff
= isl::multi_aff::identity(
515 ArraySpace
.map_from_domain_and_range(ArraySpace
));
516 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
518 // Begin with last dimension, to iteratively carry into higher dimensions.
519 for (int i
= DimsArray
- 1; i
> 0; i
--) {
520 auto *DimSize
= SAI
->getDimensionSize(i
);
521 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
523 // This transformation is not applicable to dimensions with dynamic size.
527 // This transformation is not applicable to dimensions of size zero.
528 if (DimSize
->isZero())
531 isl::val DimSizeVal
=
532 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
533 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
535 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
537 // Compute: index % size
538 // Modulo must apply in the divide of the previous iteration, if any.
539 isl::aff Modulo
= Var
.mod(DimSizeVal
);
540 Modulo
= Modulo
.pullback(DivModAff
);
542 // Compute: floor(index / size)
543 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
544 Divide
= Divide
.floor();
545 Divide
= Divide
.add(PrevVar
);
546 Divide
= Divide
.pullback(DivModAff
);
548 // Apply Modulo and Divide.
549 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
550 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
553 // Apply all modulo/divides on the accesses.
554 isl::map Relation
= AccessRelation
;
555 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
556 Relation
= Relation
.detect_equalities();
557 AccessRelation
= Relation
;
560 void MemoryAccess::updateDimensionality() {
561 auto *SAI
= getScopArrayInfo();
562 isl::space ArraySpace
= SAI
->getSpace();
563 isl::space AccessSpace
= AccessRelation
.get_space().range();
564 isl::ctx Ctx
= ArraySpace
.get_ctx();
566 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
567 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
568 auto DimsMissing
= DimsArray
- DimsAccess
;
570 auto *BB
= getStatement()->getEntryBlock();
571 auto &DL
= BB
->getModule()->getDataLayout();
572 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
573 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
575 isl::map Map
= isl::map::from_domain_and_range(
576 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
578 for (unsigned i
= 0; i
< DimsMissing
; i
++)
579 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
581 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
582 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
584 AccessRelation
= AccessRelation
.apply_range(Map
);
586 // For the non delinearized arrays, divide the access function of the last
587 // subscript by the size of the elements in the array.
589 // A stride one array access in C expressed as A[i] is expressed in
590 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
591 // two subsequent values of 'i' index two values that are stored next to
592 // each other in memory. By this division we make this characteristic
593 // obvious again. If the base pointer was accessed with offsets not divisible
594 // by the accesses element size, we will have chosen a smaller ArrayElemSize
595 // that divides the offsets of all accesses to this base pointer.
596 if (DimsAccess
== 1) {
597 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
598 AccessRelation
= AccessRelation
.floordiv_val(V
);
601 // We currently do this only if we added at least one dimension, which means
602 // some dimension's indices have not been specified, an indicator that some
603 // index values have been added together.
604 // TODO: Investigate general usefulness; Effect on unit tests is to make index
605 // expressions more complicated.
607 wrapConstantDimensions();
610 computeBoundsOnAccessRelation(ArrayElemSize
);
612 // Introduce multi-element accesses in case the type loaded by this memory
613 // access is larger than the canonical element type of the array.
615 // An access ((float *)A)[i] to an array char *A is modeled as
616 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
617 if (ElemBytes
> ArrayElemSize
) {
618 assert(ElemBytes
% ArrayElemSize
== 0 &&
619 "Loaded element size should be multiple of canonical element size");
620 isl::map Map
= isl::map::from_domain_and_range(
621 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
622 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
623 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
628 LS
= isl::local_space(Map
.get_space());
629 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
631 C
= isl::constraint::alloc_inequality(LS
);
632 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
633 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
634 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
635 Map
= Map
.add_constraint(C
);
637 C
= isl::constraint::alloc_inequality(LS
);
638 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
639 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
640 C
= C
.set_constant_val(isl::val(Ctx
, 0));
641 Map
= Map
.add_constraint(C
);
642 AccessRelation
= AccessRelation
.apply_range(Map
);
647 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
649 case MemoryAccess::RT_NONE
:
650 llvm_unreachable("Requested a reduction operator string for a memory "
651 "access which isn't a reduction");
652 case MemoryAccess::RT_ADD
:
654 case MemoryAccess::RT_MUL
:
656 case MemoryAccess::RT_BOR
:
658 case MemoryAccess::RT_BXOR
:
660 case MemoryAccess::RT_BAND
:
663 llvm_unreachable("Unknown reduction type");
666 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
667 isl::id ArrayId
= getArrayId();
668 void *User
= ArrayId
.get_user();
669 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
673 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
674 isl::id ArrayId
= getLatestArrayId();
675 void *User
= ArrayId
.get_user();
676 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
680 isl::id
MemoryAccess::getOriginalArrayId() const {
681 return AccessRelation
.get_tuple_id(isl::dim::out
);
684 isl::id
MemoryAccess::getLatestArrayId() const {
685 if (!hasNewAccessRelation())
686 return getOriginalArrayId();
687 return NewAccessRelation
.get_tuple_id(isl::dim::out
);
690 isl::map
MemoryAccess::getAddressFunction() const {
691 return getAccessRelation().lexmin();
695 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule
) const {
696 isl::map Schedule
, ScheduledAccRel
;
697 isl::union_set UDomain
;
699 UDomain
= getStatement()->getDomain();
700 USchedule
= USchedule
.intersect_domain(UDomain
);
701 Schedule
= isl::map::from_union_map(USchedule
);
702 ScheduledAccRel
= getAddressFunction().apply_domain(Schedule
);
703 return isl::pw_multi_aff::from_map(ScheduledAccRel
);
706 isl::map
MemoryAccess::getOriginalAccessRelation() const {
707 return AccessRelation
;
710 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
711 return AccessRelation
.to_str();
714 isl::space
MemoryAccess::getOriginalAccessRelationSpace() const {
715 return AccessRelation
.get_space();
718 isl::map
MemoryAccess::getNewAccessRelation() const {
719 return NewAccessRelation
;
722 std::string
MemoryAccess::getNewAccessRelationStr() const {
723 return NewAccessRelation
.to_str();
726 std::string
MemoryAccess::getAccessRelationStr() const {
727 return getAccessRelation().to_str();
730 isl::basic_map
MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
731 isl::space Space
= isl::space(Statement
->getIslCtx(), 0, 1);
732 Space
= Space
.align_params(Statement
->getDomainSpace());
734 return isl::basic_map::from_domain_and_range(
735 isl::basic_set::universe(Statement
->getDomainSpace()),
736 isl::basic_set::universe(Space
));
739 // Formalize no out-of-bound access assumption
741 // When delinearizing array accesses we optimistically assume that the
742 // delinearized accesses do not access out of bound locations (the subscript
743 // expression of each array evaluates for each statement instance that is
744 // executed to a value that is larger than zero and strictly smaller than the
745 // size of the corresponding dimension). The only exception is the outermost
746 // dimension for which we do not need to assume any upper bound. At this point
747 // we formalize this assumption to ensure that at code generation time the
748 // relevant run-time checks can be generated.
750 // To find the set of constraints necessary to avoid out of bound accesses, we
751 // first build the set of data locations that are not within array bounds. We
752 // then apply the reverse access relation to obtain the set of iterations that
753 // may contain invalid accesses and reduce this set of iterations to the ones
754 // that are actually executed by intersecting them with the domain of the
755 // statement. If we now project out all loop dimensions, we obtain a set of
756 // parameters that may cause statement instances to be executed that may
757 // possibly yield out of bound memory accesses. The complement of these
758 // constraints is the set of constraints that needs to be assumed to ensure such
759 // statement instances are never executed.
760 void MemoryAccess::assumeNoOutOfBound() {
761 if (PollyIgnoreInbounds
)
763 auto *SAI
= getScopArrayInfo();
764 isl::space Space
= getOriginalAccessRelationSpace().range();
765 isl::set Outside
= isl::set::empty(Space
);
766 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
767 isl::local_space
LS(Space
);
768 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
769 isl::pw_aff Zero
= isl::pw_aff(LS
);
771 isl::set DimOutside
= Var
.lt_set(Zero
);
772 isl::pw_aff SizeE
= SAI
->getDimensionSizePw(i
);
773 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
774 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
775 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
777 Outside
= Outside
.unite(DimOutside
);
780 Outside
= Outside
.apply(getAccessRelation().reverse());
781 Outside
= Outside
.intersect(Statement
->getDomain());
782 Outside
= Outside
.params();
784 // Remove divs to avoid the construction of overly complicated assumptions.
785 // Doing so increases the set of parameter combinations that are assumed to
786 // not appear. This is always save, but may make the resulting run-time check
787 // bail out more often than strictly necessary.
788 Outside
= Outside
.remove_divs();
789 Outside
= Outside
.complement();
790 const auto &Loc
= getAccessInstruction()
791 ? getAccessInstruction()->getDebugLoc()
793 if (!PollyPreciseInbounds
)
794 Outside
= Outside
.gist_params(Statement
->getDomain().params());
795 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
, Loc
,
799 void MemoryAccess::buildMemIntrinsicAccessRelation() {
800 assert(isMemoryIntrinsic());
801 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
803 isl::pw_aff SubscriptPWA
= getPwAff(Subscripts
[0]);
804 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
807 if (Subscripts
[1] == nullptr) {
808 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
810 isl::pw_aff LengthPWA
= getPwAff(Subscripts
[1]);
811 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
812 isl::space RangeSpace
= LengthMap
.get_space().range();
813 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
815 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
816 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
817 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
818 LengthMap
= LengthMap
.sum(SubscriptMap
);
820 LengthMap
.set_tuple_id(isl::dim::in
, getStatement()->getDomainId());
823 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
824 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
826 auto MAI
= MemAccInst(getAccessInstruction());
827 if (isa
<MemIntrinsic
>(MAI
))
830 Value
*Ptr
= MAI
.getPointerOperand();
831 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
834 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
835 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
838 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
839 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
840 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
842 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
843 if (Range
.isFullSet())
846 if (Range
.isUpperWrapped() || Range
.isSignWrappedSet())
849 bool isWrapping
= Range
.isSignWrappedSet();
851 unsigned BW
= Range
.getBitWidth();
852 const auto One
= APInt(BW
, 1);
853 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
854 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
856 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
857 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
859 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
861 isl::map Relation
= AccessRelation
;
862 isl::set AccessRange
= Relation
.range();
863 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
865 AccessRelation
= Relation
.intersect_range(AccessRange
);
868 void MemoryAccess::foldAccessRelation() {
869 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
872 int Size
= Subscripts
.size();
874 isl::map NewAccessRelation
= AccessRelation
;
876 for (int i
= Size
- 2; i
>= 0; --i
) {
878 isl::map MapOne
, MapTwo
;
879 isl::pw_aff DimSize
= getPwAff(Sizes
[i
+ 1]);
881 isl::space SpaceSize
= DimSize
.get_space();
882 isl::id ParamId
= SpaceSize
.get_dim_id(isl::dim::param
, 0);
884 Space
= AccessRelation
.get_space();
885 Space
= Space
.range().map_from_set();
886 Space
= Space
.align_params(SpaceSize
);
888 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
890 MapOne
= isl::map::universe(Space
);
891 for (int j
= 0; j
< Size
; ++j
)
892 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
893 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
895 MapTwo
= isl::map::universe(Space
);
896 for (int j
= 0; j
< Size
; ++j
)
897 if (j
< i
|| j
> i
+ 1)
898 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
900 isl::local_space
LS(Space
);
902 C
= isl::constraint::alloc_equality(LS
);
903 C
= C
.set_constant_si(-1);
904 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
905 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
906 MapTwo
= MapTwo
.add_constraint(C
);
907 C
= isl::constraint::alloc_equality(LS
);
908 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
909 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
910 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
911 MapTwo
= MapTwo
.add_constraint(C
);
912 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
914 MapOne
= MapOne
.unite(MapTwo
);
915 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
918 isl::id BaseAddrId
= getScopArrayInfo()->getBasePtrId();
919 isl::space Space
= Statement
->getDomainSpace();
920 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
921 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
922 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
923 NewAccessRelation
= NewAccessRelation
.gist_domain(Statement
->getDomain());
925 // Access dimension folding might in certain cases increase the number of
926 // disjuncts in the memory access, which can possibly complicate the generated
927 // run-time checks and can lead to costly compilation.
928 if (!PollyPreciseFoldAccesses
&&
929 NewAccessRelation
.n_basic_map() > AccessRelation
.n_basic_map()) {
931 AccessRelation
= NewAccessRelation
;
935 /// Check if @p Expr is divisible by @p Size.
936 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
941 // Only one factor needs to be divisible.
942 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
943 for (auto *FactorExpr
: MulExpr
->operands())
944 if (isDivisible(FactorExpr
, Size
, SE
))
949 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
951 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
952 for (auto *OpExpr
: NAryExpr
->operands())
953 if (!isDivisible(OpExpr
, Size
, SE
))
958 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
959 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
960 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
961 return MulSCEV
== Expr
;
964 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
965 assert(AccessRelation
.is_null() && "AccessRelation already built");
967 // Initialize the invalid domain which describes all iterations for which the
968 // access relation is not modeled correctly.
969 isl::set StmtInvalidDomain
= getStatement()->getInvalidDomain();
970 InvalidDomain
= isl::set::empty(StmtInvalidDomain
.get_space());
972 isl::ctx Ctx
= Id
.get_ctx();
973 isl::id BaseAddrId
= SAI
->getBasePtrId();
975 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
976 buildMemIntrinsicAccessRelation();
977 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
982 // We overapproximate non-affine accesses with a possible access to the
983 // whole array. For read accesses it does not make a difference, if an
984 // access must or may happen. However, for write accesses it is important to
985 // differentiate between writes that must happen and writes that may happen.
986 if (AccessRelation
.is_null())
987 AccessRelation
= createBasicAccessMap(Statement
);
989 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
993 isl::space Space
= isl::space(Ctx
, 0, Statement
->getNumIterators(), 0);
994 AccessRelation
= isl::map::universe(Space
);
996 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
997 isl::pw_aff Affine
= getPwAff(Subscripts
[i
]);
998 isl::map SubscriptMap
= isl::map::from_pw_aff(Affine
);
999 AccessRelation
= AccessRelation
.flat_range_product(SubscriptMap
);
1002 Space
= Statement
->getDomainSpace();
1003 AccessRelation
= AccessRelation
.set_tuple_id(
1004 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
1005 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1007 AccessRelation
= AccessRelation
.gist_domain(Statement
->getDomain());
1010 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1011 AccessType AccType
, Value
*BaseAddress
,
1012 Type
*ElementType
, bool Affine
,
1013 ArrayRef
<const SCEV
*> Subscripts
,
1014 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1016 : Kind(Kind
), AccType(AccType
), Statement(Stmt
), InvalidDomain(nullptr),
1017 BaseAddr(BaseAddress
), ElementType(ElementType
),
1018 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1019 AccessValue(AccessValue
), IsAffine(Affine
),
1020 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1021 NewAccessRelation(nullptr), FAD(nullptr) {
1022 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1023 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1025 std::string IdName
= Stmt
->getBaseName() + Access
;
1026 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1029 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
, isl::map AccRel
)
1030 : Kind(MemoryKind::Array
), AccType(AccType
), Statement(Stmt
),
1031 InvalidDomain(nullptr), AccessRelation(nullptr),
1032 NewAccessRelation(AccRel
), FAD(nullptr) {
1033 isl::id ArrayInfoId
= NewAccessRelation
.get_tuple_id(isl::dim::out
);
1034 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1035 Sizes
.push_back(nullptr);
1036 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1037 Sizes
.push_back(SAI
->getDimensionSize(i
));
1038 ElementType
= SAI
->getElementType();
1039 BaseAddr
= SAI
->getBasePtr();
1040 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1041 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1043 std::string IdName
= Stmt
->getBaseName() + Access
;
1044 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1047 MemoryAccess::~MemoryAccess() = default;
1049 void MemoryAccess::realignParams() {
1050 isl::set Ctx
= Statement
->getParent()->getContext();
1051 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1052 AccessRelation
= AccessRelation
.gist_params(Ctx
);
1055 const std::string
MemoryAccess::getReductionOperatorStr() const {
1056 return MemoryAccess::getReductionOperatorStr(getReductionType());
1059 isl::id
MemoryAccess::getId() const { return Id
; }
1061 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1062 MemoryAccess::ReductionType RT
) {
1063 if (RT
== MemoryAccess::RT_NONE
)
1066 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1070 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1072 void MemoryAccess::print(raw_ostream
&OS
) const {
1075 OS
.indent(12) << "ReadAccess :=\t";
1078 OS
.indent(12) << "MustWriteAccess :=\t";
1081 OS
.indent(12) << "MayWriteAccess :=\t";
1085 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1088 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1092 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1093 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1094 if (hasNewAccessRelation())
1095 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1098 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1099 LLVM_DUMP_METHOD
void MemoryAccess::dump() const { print(errs()); }
1102 isl::pw_aff
MemoryAccess::getPwAff(const SCEV
*E
) {
1103 auto *Stmt
= getStatement();
1104 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1105 isl::set StmtDom
= getStatement()->getDomain();
1106 StmtDom
= StmtDom
.reset_tuple_id();
1107 isl::set NewInvalidDom
= StmtDom
.intersect(PWAC
.second
);
1108 InvalidDomain
= InvalidDomain
.unite(NewInvalidDom
);
1112 // Create a map in the size of the provided set domain, that maps from the
1113 // one element of the provided set domain to another element of the provided
1115 // The mapping is limited to all points that are equal in all but the last
1116 // dimension and for which the last dimension of the input is strict smaller
1117 // than the last dimension of the output.
1119 // getEqualAndLarger(set[i0, i1, ..., iX]):
1121 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1122 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1124 static isl::map
getEqualAndLarger(isl::space SetDomain
) {
1125 isl::space Space
= SetDomain
.map_from_set();
1126 isl::map Map
= isl::map::universe(Space
);
1127 unsigned lastDimension
= Map
.dim(isl::dim::in
) - 1;
1129 // Set all but the last dimension to be equal for the input and output
1131 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1132 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1133 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1134 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
1136 // Set the last dimension of the input to be strict smaller than the
1137 // last dimension of the output.
1139 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1140 Map
= Map
.order_lt(isl::dim::in
, lastDimension
, isl::dim::out
, lastDimension
);
1144 isl::set
MemoryAccess::getStride(isl::map Schedule
) const {
1145 isl::map AccessRelation
= getAccessRelation();
1146 isl::space Space
= Schedule
.get_space().range();
1147 isl::map NextScatt
= getEqualAndLarger(Space
);
1149 Schedule
= Schedule
.reverse();
1150 NextScatt
= NextScatt
.lexmin();
1152 NextScatt
= NextScatt
.apply_range(Schedule
);
1153 NextScatt
= NextScatt
.apply_range(AccessRelation
);
1154 NextScatt
= NextScatt
.apply_domain(Schedule
);
1155 NextScatt
= NextScatt
.apply_domain(AccessRelation
);
1157 isl::set Deltas
= NextScatt
.deltas();
1161 bool MemoryAccess::isStrideX(isl::map Schedule
, int StrideWidth
) const {
1162 isl::set Stride
, StrideX
;
1165 Stride
= getStride(Schedule
);
1166 StrideX
= isl::set::universe(Stride
.get_space());
1167 for (unsigned i
= 0; i
< StrideX
.dim(isl::dim::set
) - 1; i
++)
1168 StrideX
= StrideX
.fix_si(isl::dim::set
, i
, 0);
1169 StrideX
= StrideX
.fix_si(isl::dim::set
, StrideX
.dim(isl::dim::set
) - 1,
1171 IsStrideX
= Stride
.is_subset(StrideX
);
1176 bool MemoryAccess::isStrideZero(isl::map Schedule
) const {
1177 return isStrideX(Schedule
, 0);
1180 bool MemoryAccess::isStrideOne(isl::map Schedule
) const {
1181 return isStrideX(Schedule
, 1);
1184 void MemoryAccess::setAccessRelation(isl::map NewAccess
) {
1185 AccessRelation
= NewAccess
;
1188 void MemoryAccess::setNewAccessRelation(isl::map NewAccess
) {
1192 // Check domain space compatibility.
1193 isl::space NewSpace
= NewAccess
.get_space();
1194 isl::space NewDomainSpace
= NewSpace
.domain();
1195 isl::space OriginalDomainSpace
= getStatement()->getDomainSpace();
1196 assert(OriginalDomainSpace
.has_equal_tuples(NewDomainSpace
));
1198 // Reads must be executed unconditionally. Writes might be executed in a
1201 // Check whether there is an access for every statement instance.
1202 isl::set StmtDomain
= getStatement()->getDomain();
1204 StmtDomain
.intersect_params(getStatement()->getParent()->getContext());
1205 isl::set NewDomain
= NewAccess
.domain();
1206 assert(StmtDomain
.is_subset(NewDomain
) &&
1207 "Partial READ accesses not supported");
1210 isl::space NewAccessSpace
= NewAccess
.get_space();
1211 assert(NewAccessSpace
.has_tuple_id(isl::dim::set
) &&
1212 "Must specify the array that is accessed");
1213 isl::id NewArrayId
= NewAccessSpace
.get_tuple_id(isl::dim::set
);
1214 auto *SAI
= static_cast<ScopArrayInfo
*>(NewArrayId
.get_user());
1215 assert(SAI
&& "Must set a ScopArrayInfo");
1217 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1218 InvariantEquivClassTy
*EqClass
=
1219 getStatement()->getParent()->lookupInvariantEquivClass(
1222 "Access functions to indirect arrays must have an invariant and "
1223 "hoisted base pointer");
1226 // Check whether access dimensions correspond to number of dimensions of the
1228 auto Dims
= SAI
->getNumberOfDimensions();
1229 assert(NewAccessSpace
.dim(isl::dim::set
) == Dims
&&
1230 "Access dims must match array dims");
1233 NewAccess
= NewAccess
.gist_domain(getStatement()->getDomain());
1234 NewAccessRelation
= NewAccess
;
1237 bool MemoryAccess::isLatestPartialAccess() const {
1238 isl::set StmtDom
= getStatement()->getDomain();
1239 isl::set AccDom
= getLatestAccessRelation().domain();
1241 return !StmtDom
.is_subset(AccDom
);
1244 //===----------------------------------------------------------------------===//
1246 isl::map
ScopStmt::getSchedule() const {
1247 isl::set Domain
= getDomain();
1248 if (Domain
.is_empty())
1249 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1250 auto Schedule
= getParent()->getSchedule();
1253 Schedule
= Schedule
.intersect_domain(isl::union_set(Domain
));
1254 if (Schedule
.is_empty())
1255 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1256 isl::map M
= M
.from_union_map(Schedule
);
1258 M
= M
.gist_domain(Domain
);
1263 void ScopStmt::restrictDomain(isl::set NewDomain
) {
1264 assert(NewDomain
.is_subset(Domain
) &&
1265 "New domain is not a subset of old domain!");
1269 void ScopStmt::addAccess(MemoryAccess
*Access
, bool Prepend
) {
1270 Instruction
*AccessInst
= Access
->getAccessInstruction();
1272 if (Access
->isArrayKind()) {
1273 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1274 MAL
.emplace_front(Access
);
1275 } else if (Access
->isValueKind() && Access
->isWrite()) {
1276 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1277 assert(!ValueWrites
.lookup(AccessVal
));
1279 ValueWrites
[AccessVal
] = Access
;
1280 } else if (Access
->isValueKind() && Access
->isRead()) {
1281 Value
*AccessVal
= Access
->getAccessValue();
1282 assert(!ValueReads
.lookup(AccessVal
));
1284 ValueReads
[AccessVal
] = Access
;
1285 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1286 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1287 assert(!PHIWrites
.lookup(PHI
));
1289 PHIWrites
[PHI
] = Access
;
1290 } else if (Access
->isAnyPHIKind() && Access
->isRead()) {
1291 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1292 assert(!PHIReads
.lookup(PHI
));
1294 PHIReads
[PHI
] = Access
;
1298 MemAccs
.insert(MemAccs
.begin(), Access
);
1301 MemAccs
.push_back(Access
);
1304 void ScopStmt::realignParams() {
1305 for (MemoryAccess
*MA
: *this)
1306 MA
->realignParams();
1308 isl::set Ctx
= Parent
.getContext();
1309 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1310 Domain
= Domain
.gist_params(Ctx
);
1313 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
1314 static isl::set
collectBoundedParts(isl::set S
) {
1315 isl::set BoundedParts
= isl::set::empty(S
.get_space());
1316 for (isl::basic_set BSet
: S
.get_basic_set_list())
1317 if (BSet
.is_bounded())
1318 BoundedParts
= BoundedParts
.unite(isl::set(BSet
));
1319 return BoundedParts
;
1322 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1324 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1325 /// both with regards to the dimension @p Dim.
1326 static std::pair
<isl::set
, isl::set
> partitionSetParts(isl::set S
,
1328 for (unsigned u
= 0, e
= S
.n_dim(); u
< e
; u
++)
1329 S
= S
.lower_bound_si(isl::dim::set
, u
, 0);
1331 unsigned NumDimsS
= S
.n_dim();
1332 isl::set OnlyDimS
= S
;
1334 // Remove dimensions that are greater than Dim as they are not interesting.
1335 assert(NumDimsS
>= Dim
+ 1);
1336 OnlyDimS
= OnlyDimS
.project_out(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1338 // Create artificial parametric upper bounds for dimensions smaller than Dim
1339 // as we are not interested in them.
1340 OnlyDimS
= OnlyDimS
.insert_dims(isl::dim::param
, 0, Dim
);
1342 for (unsigned u
= 0; u
< Dim
; u
++) {
1343 isl::constraint C
= isl::constraint::alloc_inequality(
1344 isl::local_space(OnlyDimS
.get_space()));
1345 C
= C
.set_coefficient_si(isl::dim::param
, u
, 1);
1346 C
= C
.set_coefficient_si(isl::dim::set
, u
, -1);
1347 OnlyDimS
= OnlyDimS
.add_constraint(C
);
1350 // Collect all bounded parts of OnlyDimS.
1351 isl::set BoundedParts
= collectBoundedParts(OnlyDimS
);
1353 // Create the dimensions greater than Dim again.
1355 BoundedParts
.insert_dims(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1357 // Remove the artificial upper bound parameters again.
1358 BoundedParts
= BoundedParts
.remove_dims(isl::dim::param
, 0, Dim
);
1360 isl::set UnboundedParts
= S
.subtract(BoundedParts
);
1361 return std::make_pair(UnboundedParts
, BoundedParts
);
1364 /// Create the conditions under which @p L @p Pred @p R is true.
1365 static isl::set
buildConditionSet(ICmpInst::Predicate Pred
, isl::pw_aff L
,
1368 case ICmpInst::ICMP_EQ
:
1370 case ICmpInst::ICMP_NE
:
1372 case ICmpInst::ICMP_SLT
:
1374 case ICmpInst::ICMP_SLE
:
1376 case ICmpInst::ICMP_SGT
:
1378 case ICmpInst::ICMP_SGE
:
1380 case ICmpInst::ICMP_ULT
:
1382 case ICmpInst::ICMP_UGT
:
1384 case ICmpInst::ICMP_ULE
:
1386 case ICmpInst::ICMP_UGE
:
1389 llvm_unreachable("Non integer predicate not supported");
1393 /// Compute the isl representation for the SCEV @p E in this BB.
1395 /// @param S The Scop in which @p BB resides in.
1396 /// @param BB The BB for which isl representation is to be
1398 /// @param InvalidDomainMap A map of BB to their invalid domains.
1399 /// @param E The SCEV that should be translated.
1400 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1402 /// Note that this function will also adjust the invalid context accordingly.
1404 __isl_give isl_pw_aff
*
1405 getPwAff(Scop
&S
, BasicBlock
*BB
,
1406 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
, const SCEV
*E
,
1407 bool NonNegative
= false) {
1408 PWACtx PWAC
= S
.getPwAff(E
, BB
, NonNegative
);
1409 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
1410 return PWAC
.first
.release();
1413 /// Build the conditions sets for the switch @p SI in the @p Domain.
1415 /// This will fill @p ConditionSets with the conditions under which control
1416 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1417 /// have as many elements as @p SI has successors.
1418 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
,
1419 __isl_keep isl_set
*Domain
,
1420 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1421 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1422 Value
*Condition
= getConditionFromTerminator(SI
);
1423 assert(Condition
&& "No condition for switch");
1425 ScalarEvolution
&SE
= *S
.getSE();
1426 isl_pw_aff
*LHS
, *RHS
;
1427 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
1429 unsigned NumSuccessors
= SI
->getNumSuccessors();
1430 ConditionSets
.resize(NumSuccessors
);
1431 for (auto &Case
: SI
->cases()) {
1432 unsigned Idx
= Case
.getSuccessorIndex();
1433 ConstantInt
*CaseValue
= Case
.getCaseValue();
1435 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
1436 isl_set
*CaseConditionSet
=
1437 buildConditionSet(ICmpInst::ICMP_EQ
, isl::manage_copy(LHS
),
1440 ConditionSets
[Idx
] = isl_set_coalesce(
1441 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1444 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1445 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1446 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1448 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1449 ConditionSets
[0] = isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
);
1451 isl_pw_aff_free(LHS
);
1456 /// Build condition sets for unsigned ICmpInst(s).
1457 /// Special handling is required for unsigned operands to ensure that if
1458 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1459 /// it should wrap around.
1461 /// @param IsStrictUpperBound holds information on the predicate relation
1462 /// between TestVal and UpperBound, i.e,
1463 /// TestVal < UpperBound OR TestVal <= UpperBound
1464 __isl_give isl_set
*
1465 buildUnsignedConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1466 __isl_keep isl_set
*Domain
, const SCEV
*SCEV_TestVal
,
1467 const SCEV
*SCEV_UpperBound
,
1468 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1469 bool IsStrictUpperBound
) {
1470 // Do not take NonNeg assumption on TestVal
1471 // as it might have MSB (Sign bit) set.
1472 isl_pw_aff
*TestVal
= getPwAff(S
, BB
, InvalidDomainMap
, SCEV_TestVal
, false);
1473 // Take NonNeg assumption on UpperBound.
1474 isl_pw_aff
*UpperBound
=
1475 getPwAff(S
, BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
1479 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1480 isl_pw_aff_get_domain_space(TestVal
))),
1481 isl_pw_aff_copy(TestVal
));
1484 if (IsStrictUpperBound
)
1485 // TestVal < UpperBound
1486 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
1488 // TestVal <= UpperBound
1489 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
1491 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
1492 return ConsequenceCondSet
;
1495 /// Build the conditions sets for the branch condition @p Condition in
1498 /// This will fill @p ConditionSets with the conditions under which control
1499 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1500 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1501 /// context under which @p Condition is true/false will be returned as the
1502 /// new elements of @p ConditionSets.
1503 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1504 Instruction
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
1505 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1506 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1507 ScalarEvolution
&SE
= *S
.getSE();
1508 isl_set
*ConsequenceCondSet
= nullptr;
1510 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
1511 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
1512 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
1513 bool NonNeg
= false;
1514 isl_pw_aff
*LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
1515 isl_pw_aff
*RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
1516 ConsequenceCondSet
= buildConditionSet(ICmpInst::ICMP_SLE
, isl::manage(LHS
),
1519 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
1520 auto *Unique
= dyn_cast
<ConstantInt
>(
1521 getUniqueNonErrorValue(PHI
, &S
.getRegion(), *S
.getLI(), *S
.getDT()));
1523 if (Unique
->isZero())
1524 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1526 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1527 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1528 if (CCond
->isZero())
1529 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1531 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1532 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1533 auto Opcode
= BinOp
->getOpcode();
1534 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1536 bool Valid
= buildConditionSets(S
, BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
1537 InvalidDomainMap
, ConditionSets
) &&
1538 buildConditionSets(S
, BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
1539 InvalidDomainMap
, ConditionSets
);
1541 while (!ConditionSets
.empty())
1542 isl_set_free(ConditionSets
.pop_back_val());
1546 isl_set_free(ConditionSets
.pop_back_val());
1547 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1548 isl_set_free(ConditionSets
.pop_back_val());
1549 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1551 if (Opcode
== Instruction::And
)
1552 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1554 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1556 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1558 "Condition of exiting branch was neither constant nor ICmp!");
1560 LoopInfo
&LI
= *S
.getLI();
1561 DominatorTree
&DT
= *S
.getDT();
1562 Region
&R
= S
.getRegion();
1564 isl_pw_aff
*LHS
, *RHS
;
1565 // For unsigned comparisons we assumed the signed bit of neither operand
1566 // to be set. The comparison is equal to a signed comparison under this
1568 bool NonNeg
= ICond
->isUnsigned();
1569 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
1570 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
1572 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, LI
, DT
);
1573 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, LI
, DT
);
1575 switch (ICond
->getPredicate()) {
1576 case ICmpInst::ICMP_ULT
:
1577 ConsequenceCondSet
=
1578 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1579 RightOperand
, InvalidDomainMap
, true);
1581 case ICmpInst::ICMP_ULE
:
1582 ConsequenceCondSet
=
1583 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1584 RightOperand
, InvalidDomainMap
, false);
1586 case ICmpInst::ICMP_UGT
:
1587 ConsequenceCondSet
=
1588 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1589 LeftOperand
, InvalidDomainMap
, true);
1591 case ICmpInst::ICMP_UGE
:
1592 ConsequenceCondSet
=
1593 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1594 LeftOperand
, InvalidDomainMap
, false);
1597 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
1598 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
1599 ConsequenceCondSet
= buildConditionSet(ICond
->getPredicate(),
1600 isl::manage(LHS
), isl::manage(RHS
))
1606 // If no terminator was given we are only looking for parameter constraints
1607 // under which @p Condition is true/false.
1609 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1610 assert(ConsequenceCondSet
);
1611 ConsequenceCondSet
= isl_set_coalesce(
1612 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1614 isl_set
*AlternativeCondSet
= nullptr;
1616 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1619 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1620 isl_set_copy(ConsequenceCondSet
));
1622 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1626 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
1627 TI
? TI
->getParent() : nullptr /* BasicBlock */);
1628 isl_set_free(AlternativeCondSet
);
1629 isl_set_free(ConsequenceCondSet
);
1633 ConditionSets
.push_back(ConsequenceCondSet
);
1634 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1639 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1641 /// This will fill @p ConditionSets with the conditions under which control
1642 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1643 /// have as many elements as @p TI has successors.
1644 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, Instruction
*TI
, Loop
*L
,
1645 __isl_keep isl_set
*Domain
,
1646 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1647 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1648 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1649 return buildConditionSets(S
, BB
, SI
, L
, Domain
, InvalidDomainMap
,
1652 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1654 if (TI
->getNumSuccessors() == 1) {
1655 ConditionSets
.push_back(isl_set_copy(Domain
));
1659 Value
*Condition
= getConditionFromTerminator(TI
);
1660 assert(Condition
&& "No condition for Terminator");
1662 return buildConditionSets(S
, BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
1666 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, StringRef Name
,
1667 Loop
*SurroundingLoop
,
1668 std::vector
<Instruction
*> EntryBlockInstructions
)
1669 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), R(&R
),
1670 Build(nullptr), BaseName(Name
), SurroundingLoop(SurroundingLoop
),
1671 Instructions(EntryBlockInstructions
) {}
1673 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, StringRef Name
,
1674 Loop
*SurroundingLoop
,
1675 std::vector
<Instruction
*> Instructions
)
1676 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1677 Build(nullptr), BaseName(Name
), SurroundingLoop(SurroundingLoop
),
1678 Instructions(Instructions
) {}
1680 ScopStmt::ScopStmt(Scop
&parent
, isl::map SourceRel
, isl::map TargetRel
,
1682 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
),
1684 BaseName
= getIslCompatibleName("CopyStmt_", "",
1685 std::to_string(parent
.getCopyStmtsNum()));
1686 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1687 Domain
= Domain
.set_tuple_id(Id
);
1688 TargetRel
= TargetRel
.set_tuple_id(isl::dim::in
, Id
);
1690 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1691 parent
.addAccessFunction(Access
);
1693 SourceRel
= SourceRel
.set_tuple_id(isl::dim::in
, Id
);
1694 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1695 parent
.addAccessFunction(Access
);
1699 ScopStmt::~ScopStmt() = default;
1701 std::string
ScopStmt::getDomainStr() const { return Domain
.to_str(); }
1703 std::string
ScopStmt::getScheduleStr() const {
1704 auto *S
= getSchedule().release();
1707 auto Str
= stringFromIslObj(S
);
1712 void ScopStmt::setInvalidDomain(isl::set ID
) { InvalidDomain
= ID
; }
1714 BasicBlock
*ScopStmt::getEntryBlock() const {
1716 return getBasicBlock();
1717 return getRegion()->getEntry();
1720 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1722 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1724 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1725 return NestLoops
[Dimension
];
1728 isl::ctx
ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1730 isl::set
ScopStmt::getDomain() const { return Domain
; }
1732 isl::space
ScopStmt::getDomainSpace() const { return Domain
.get_space(); }
1734 isl::id
ScopStmt::getDomainId() const { return Domain
.get_tuple_id(); }
1736 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1737 OS
<< "Instructions {\n";
1739 for (Instruction
*Inst
: Instructions
)
1740 OS
.indent(16) << *Inst
<< "\n";
1742 OS
.indent(12) << "}\n";
1745 void ScopStmt::print(raw_ostream
&OS
, bool PrintInstructions
) const {
1746 OS
<< "\t" << getBaseName() << "\n";
1747 OS
.indent(12) << "Domain :=\n";
1750 OS
.indent(16) << getDomainStr() << ";\n";
1752 OS
.indent(16) << "n/a\n";
1754 OS
.indent(12) << "Schedule :=\n";
1757 OS
.indent(16) << getScheduleStr() << ";\n";
1759 OS
.indent(16) << "n/a\n";
1761 for (MemoryAccess
*Access
: MemAccs
)
1764 if (PrintInstructions
)
1765 printInstructions(OS
.indent(12));
1768 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1769 LLVM_DUMP_METHOD
void ScopStmt::dump() const { print(dbgs(), true); }
1772 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
1773 if (MA
->isRead() && MA
->isOriginalValueKind()) {
1774 bool Found
= ValueReads
.erase(MA
->getAccessValue());
1776 assert(Found
&& "Expected access data not found");
1778 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
1779 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
1781 assert(Found
&& "Expected access data not found");
1783 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
1784 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1786 assert(Found
&& "Expected access data not found");
1788 if (MA
->isRead() && MA
->isOriginalAnyPHIKind()) {
1789 bool Found
= PHIReads
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1791 assert(Found
&& "Expected access data not found");
1795 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
1796 // Remove the memory accesses from this statement together with all scalar
1797 // accesses that were caused by it. MemoryKind::Value READs have no access
1798 // instruction, hence would not be removed by this function. However, it is
1799 // only used for invariant LoadInst accesses, its arguments are always affine,
1800 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1801 // accesses to be removed.
1802 auto Predicate
= [&](MemoryAccess
*Acc
) {
1803 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
1805 for (auto *MA
: MemAccs
) {
1806 if (Predicate(MA
)) {
1807 removeAccessData(MA
);
1808 Parent
.removeAccessData(MA
);
1811 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
1813 InstructionToAccess
.erase(MA
->getAccessInstruction());
1816 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
, bool AfterHoisting
) {
1817 if (AfterHoisting
) {
1818 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
1819 assert(MAIt
!= MemAccs
.end());
1820 MemAccs
.erase(MAIt
);
1822 removeAccessData(MA
);
1823 Parent
.removeAccessData(MA
);
1826 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
1827 if (It
!= InstructionToAccess
.end()) {
1828 It
->second
.remove(MA
);
1829 if (It
->second
.empty())
1830 InstructionToAccess
.erase(MA
->getAccessInstruction());
1834 MemoryAccess
*ScopStmt::ensureValueRead(Value
*V
) {
1835 MemoryAccess
*Access
= lookupInputAccessOf(V
);
1839 ScopArrayInfo
*SAI
=
1840 Parent
.getOrCreateScopArrayInfo(V
, V
->getType(), {}, MemoryKind::Value
);
1841 Access
= new MemoryAccess(this, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1842 true, {}, {}, V
, MemoryKind::Value
);
1843 Parent
.addAccessFunction(Access
);
1844 Access
->buildAccessRelation(SAI
);
1846 Parent
.addAccessData(Access
);
1850 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const ScopStmt
&S
) {
1851 S
.print(OS
, PollyPrintInstructions
);
1855 //===----------------------------------------------------------------------===//
1856 /// Scop class implement
1858 void Scop::setContext(isl::set NewContext
) {
1859 Context
= NewContext
.align_params(Context
.get_space());
1864 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1865 struct SCEVSensitiveParameterRewriter
1866 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
1867 const ValueToValueMap
&VMap
;
1870 SCEVSensitiveParameterRewriter(const ValueToValueMap
&VMap
,
1871 ScalarEvolution
&SE
)
1872 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
1874 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
1875 const ValueToValueMap
&VMap
) {
1876 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
1877 return SSPR
.visit(E
);
1880 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
1881 auto *Start
= visit(E
->getStart());
1882 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
1883 visit(E
->getStepRecurrence(SE
)),
1884 E
->getLoop(), SCEV::FlagAnyWrap
);
1885 return SE
.getAddExpr(Start
, AddRec
);
1888 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
1889 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
1890 return SE
.getUnknown(NewValue
);
1895 /// Check whether we should remap a SCEV expression.
1896 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
1897 const ValueToValueMap
&VMap
;
1898 bool FoundInside
= false;
1902 SCEVFindInsideScop(const ValueToValueMap
&VMap
, ScalarEvolution
&SE
,
1904 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
1906 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
1907 const ValueToValueMap
&VMap
, const Scop
*S
) {
1908 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
1910 return SFIS
.FoundInside
;
1913 bool follow(const SCEV
*E
) {
1914 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
1915 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
1916 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
1917 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
1918 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
1920 return !FoundInside
;
1923 bool isDone() { return FoundInside
; }
1925 } // end anonymous namespace
1927 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) const {
1928 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
1929 // doesn't like addition between an AddRec and an expression that
1930 // doesn't have a dominance relationship with it.)
1931 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
1935 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
1938 // This table of function names is used to translate parameter names in more
1939 // human-readable names. This makes it easier to interpret Polly analysis
1941 StringMap
<std::string
> KnownNames
= {
1942 {"_Z13get_global_idj", "global_id"},
1943 {"_Z12get_local_idj", "local_id"},
1944 {"_Z15get_global_sizej", "global_size"},
1945 {"_Z14get_local_sizej", "local_size"},
1946 {"_Z12get_work_dimv", "work_dim"},
1947 {"_Z17get_global_offsetj", "global_offset"},
1948 {"_Z12get_group_idj", "group_id"},
1949 {"_Z14get_num_groupsj", "num_groups"},
1952 static std::string
getCallParamName(CallInst
*Call
) {
1954 raw_string_ostream
OS(Result
);
1955 std::string Name
= Call
->getCalledFunction()->getName();
1957 auto Iterator
= KnownNames
.find(Name
);
1958 if (Iterator
!= KnownNames
.end())
1959 Name
= "__" + Iterator
->getValue();
1961 for (auto &Operand
: Call
->arg_operands()) {
1962 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
1963 OS
<< "_" << Op
->getValue();
1969 void Scop::createParameterId(const SCEV
*Parameter
) {
1970 assert(Parameters
.count(Parameter
));
1971 assert(!ParameterIds
.count(Parameter
));
1973 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
1975 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
1976 Value
*Val
= ValueParameter
->getValue();
1977 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
1979 if (Call
&& isConstCall(Call
)) {
1980 ParameterName
= getCallParamName(Call
);
1981 } else if (UseInstructionNames
) {
1982 // If this parameter references a specific Value and this value has a name
1983 // we use this name as it is likely to be unique and more useful than just
1986 ParameterName
= Val
->getName();
1987 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
1988 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
1989 if (LoadOrigin
->hasName()) {
1990 ParameterName
+= "_loaded_from_";
1992 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
1997 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2000 isl::id Id
= isl::id::alloc(getIslCtx(), ParameterName
,
2001 const_cast<void *>((const void *)Parameter
));
2002 ParameterIds
[Parameter
] = Id
;
2005 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2006 for (const SCEV
*Parameter
: NewParameters
) {
2007 // Normalize the SCEV to get the representing element for an invariant load.
2008 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2009 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2011 if (Parameters
.insert(Parameter
))
2012 createParameterId(Parameter
);
2016 isl::id
Scop::getIdForParam(const SCEV
*Parameter
) const {
2017 // Normalize the SCEV to get the representing element for an invariant load.
2018 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2019 return ParameterIds
.lookup(Parameter
);
2022 isl::set
Scop::addNonEmptyDomainConstraints(isl::set C
) const {
2023 isl::set DomainContext
= getDomains().params();
2024 return C
.intersect_params(DomainContext
);
2027 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2028 return DT
.dominates(BB
, getEntry());
2031 void Scop::addUserAssumptions(
2032 AssumptionCache
&AC
, DominatorTree
&DT
, LoopInfo
&LI
,
2033 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2034 for (auto &Assumption
: AC
.assumptions()) {
2035 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2036 if (!CI
|| CI
->getNumArgOperands() != 1)
2039 bool InScop
= contains(CI
);
2040 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2043 auto *L
= LI
.getLoopFor(CI
->getParent());
2044 auto *Val
= CI
->getArgOperand(0);
2045 ParameterSetTy DetectedParams
;
2046 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2048 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
2049 << "Non-affine user assumption ignored.");
2053 // Collect all newly introduced parameters.
2054 ParameterSetTy NewParams
;
2055 for (auto *Param
: DetectedParams
) {
2056 Param
= extractConstantFactor(Param
, *SE
).second
;
2057 Param
= getRepresentingInvariantLoadSCEV(Param
);
2058 if (Parameters
.count(Param
))
2060 NewParams
.insert(Param
);
2063 SmallVector
<isl_set
*, 2> ConditionSets
;
2064 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2065 BasicBlock
*BB
= InScop
? CI
->getParent() : getRegion().getEntry();
2066 auto *Dom
= InScop
? DomainMap
[BB
].copy() : Context
.copy();
2067 assert(Dom
&& "Cannot propagate a nullptr.");
2068 bool Valid
= buildConditionSets(*this, BB
, Val
, TI
, L
, Dom
,
2069 InvalidDomainMap
, ConditionSets
);
2075 isl_set
*AssumptionCtx
= nullptr;
2077 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2078 isl_set_free(ConditionSets
[0]);
2080 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2081 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2084 // Project out newly introduced parameters as they are not otherwise useful.
2085 if (!NewParams
.empty()) {
2086 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2087 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2088 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2091 if (!NewParams
.count(Param
))
2095 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2098 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
2099 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
2100 Context
= Context
.intersect(isl::manage(AssumptionCtx
));
2104 void Scop::addUserContext() {
2105 if (UserContextStr
.empty())
2108 isl::set UserContext
= isl::set(getIslCtx(), UserContextStr
.c_str());
2109 isl::space Space
= getParamSpace();
2110 if (Space
.dim(isl::dim::param
) != UserContext
.dim(isl::dim::param
)) {
2111 std::string SpaceStr
= Space
.to_str();
2112 errs() << "Error: the context provided in -polly-context has not the same "
2113 << "number of dimensions than the computed context. Due to this "
2114 << "mismatch, the -polly-context option is ignored. Please provide "
2115 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2119 for (unsigned i
= 0; i
< Space
.dim(isl::dim::param
); i
++) {
2120 std::string NameContext
= Context
.get_dim_name(isl::dim::param
, i
);
2121 std::string NameUserContext
= UserContext
.get_dim_name(isl::dim::param
, i
);
2123 if (NameContext
!= NameUserContext
) {
2124 std::string SpaceStr
= Space
.to_str();
2125 errs() << "Error: the name of dimension " << i
2126 << " provided in -polly-context "
2127 << "is '" << NameUserContext
<< "', but the name in the computed "
2128 << "context is '" << NameContext
2129 << "'. Due to this name mismatch, "
2130 << "the -polly-context option is ignored. Please provide "
2131 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2135 UserContext
= UserContext
.set_dim_id(isl::dim::param
, i
,
2136 Space
.get_dim_id(isl::dim::param
, i
));
2139 Context
= Context
.intersect(UserContext
);
2142 void Scop::buildInvariantEquivalenceClasses() {
2143 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2145 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2146 for (LoadInst
*LInst
: RIL
) {
2147 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2149 Type
*Ty
= LInst
->getType();
2150 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2152 InvEquivClassVMap
[LInst
] = ClassRep
;
2157 InvariantEquivClasses
.emplace_back(
2158 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2162 void Scop::buildContext() {
2163 isl::space Space
= isl::space::params_alloc(getIslCtx(), 0);
2164 Context
= isl::set::universe(Space
);
2165 InvalidContext
= isl::set::empty(Space
);
2166 AssumedContext
= isl::set::universe(Space
);
2169 void Scop::addParameterBounds() {
2171 for (auto *Parameter
: Parameters
) {
2172 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2173 Context
= addRangeBoundsToSet(Context
, SRange
, PDim
++, isl::dim::param
);
2177 static std::vector
<isl::id
> getFortranArrayIds(Scop::array_range Arrays
) {
2178 std::vector
<isl::id
> OutermostSizeIds
;
2179 for (auto Array
: Arrays
) {
2180 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2181 // for its outermost dimension. Fortran arrays will have this since the
2182 // outermost dimension size can be picked up from their runtime description.
2183 // TODO: actually need to check if it has a FAD, but for now this works.
2184 if (Array
->getNumberOfDimensions() > 0) {
2185 isl::pw_aff PwAff
= Array
->getDimensionSizePw(0);
2189 isl::id Id
= PwAff
.get_dim_id(isl::dim::param
, 0);
2190 assert(!Id
.is_null() &&
2191 "Invalid Id for PwAff expression in Fortran array");
2192 OutermostSizeIds
.push_back(Id
);
2195 return OutermostSizeIds
;
2198 // The FORTRAN array size parameters are known to be non-negative.
2199 static isl::set
boundFortranArrayParams(isl::set Context
,
2200 Scop::array_range Arrays
) {
2201 std::vector
<isl::id
> OutermostSizeIds
;
2202 OutermostSizeIds
= getFortranArrayIds(Arrays
);
2204 for (isl::id Id
: OutermostSizeIds
) {
2205 int dim
= Context
.find_dim_by_id(isl::dim::param
, Id
);
2206 Context
= Context
.lower_bound_si(isl::dim::param
, dim
, 0);
2212 void Scop::realignParams() {
2213 if (PollyIgnoreParamBounds
)
2216 // Add all parameters into a common model.
2217 isl::space Space
= getFullParamSpace();
2219 // Align the parameters of all data structures to the model.
2220 Context
= Context
.align_params(Space
);
2222 // Bound the size of the fortran array dimensions.
2223 Context
= boundFortranArrayParams(Context
, arrays());
2225 // As all parameters are known add bounds to them.
2226 addParameterBounds();
2228 for (ScopStmt
&Stmt
: *this)
2229 Stmt
.realignParams();
2230 // Simplify the schedule according to the context too.
2231 Schedule
= Schedule
.gist_domain_params(getContext());
2234 static isl::set
simplifyAssumptionContext(isl::set AssumptionContext
,
2236 // If we have modeled all blocks in the SCoP that have side effects we can
2237 // simplify the context with the constraints that are needed for anything to
2238 // be executed at all. However, if we have error blocks in the SCoP we already
2239 // assumed some parameter combinations cannot occur and removed them from the
2240 // domains, thus we cannot use the remaining domain to simplify the
2242 if (!S
.hasErrorBlock()) {
2243 auto DomainParameters
= S
.getDomains().params();
2244 AssumptionContext
= AssumptionContext
.gist_params(DomainParameters
);
2247 AssumptionContext
= AssumptionContext
.gist_params(S
.getContext());
2248 return AssumptionContext
;
2251 void Scop::simplifyContexts() {
2252 // The parameter constraints of the iteration domains give us a set of
2253 // constraints that need to hold for all cases where at least a single
2254 // statement iteration is executed in the whole scop. We now simplify the
2255 // assumed context under the assumption that such constraints hold and at
2256 // least a single statement iteration is executed. For cases where no
2257 // statement instances are executed, the assumptions we have taken about
2258 // the executed code do not matter and can be changed.
2260 // WARNING: This only holds if the assumptions we have taken do not reduce
2261 // the set of statement instances that are executed. Otherwise we
2262 // may run into a case where the iteration domains suggest that
2263 // for a certain set of parameter constraints no code is executed,
2264 // but in the original program some computation would have been
2265 // performed. In such a case, modifying the run-time conditions and
2266 // possibly influencing the run-time check may cause certain scops
2267 // to not be executed.
2271 // When delinearizing the following code:
2273 // for (long i = 0; i < 100; i++)
2274 // for (long j = 0; j < m; j++)
2277 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2278 // otherwise we would access out of bound data. Now, knowing that code is
2279 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2280 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2281 InvalidContext
= InvalidContext
.align_params(getParamSpace());
2284 /// Add the minimal/maximal access in @p Set to @p User.
2286 /// @return True if more accesses should be added, false if we reached the
2287 /// maximal number of run-time checks to be generated.
2288 static bool buildMinMaxAccess(isl::set Set
,
2289 Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2290 isl::pw_multi_aff MinPMA
, MaxPMA
;
2291 isl::pw_aff LastDimAff
;
2295 Set
= Set
.remove_divs();
2296 polly::simplify(Set
);
2298 if (Set
.n_basic_set() > RunTimeChecksMaxAccessDisjuncts
)
2299 Set
= Set
.simple_hull();
2301 // Restrict the number of parameters involved in the access as the lexmin/
2302 // lexmax computation will take too long if this number is high.
2304 // Experiments with a simple test case using an i7 4800MQ:
2306 // #Parameters involved | Time (in sec)
2315 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2316 unsigned InvolvedParams
= 0;
2317 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2318 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2321 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2325 MinPMA
= Set
.lexmin_pw_multi_aff();
2326 MaxPMA
= Set
.lexmax_pw_multi_aff();
2328 MinPMA
= MinPMA
.coalesce();
2329 MaxPMA
= MaxPMA
.coalesce();
2331 // Adjust the last dimension of the maximal access by one as we want to
2332 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2333 // we test during code generation might now point after the end of the
2334 // allocated array but we will never dereference it anyway.
2335 assert((!MaxPMA
|| MaxPMA
.dim(isl::dim::out
)) &&
2336 "Assumed at least one output dimension");
2338 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2339 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2340 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2341 OneAff
= OneAff
.add_constant_si(1);
2342 LastDimAff
= LastDimAff
.add(OneAff
);
2343 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2345 if (!MinPMA
|| !MaxPMA
)
2348 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
2353 static isl::set
getAccessDomain(MemoryAccess
*MA
) {
2354 isl::set Domain
= MA
->getStatement()->getDomain();
2355 Domain
= Domain
.project_out(isl::dim::set
, 0, Domain
.n_dim());
2356 return Domain
.reset_tuple_id();
2359 /// Wrapper function to calculate minimal/maximal accesses to each array.
2360 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2361 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2362 MinMaxAccesses
.reserve(AliasGroup
.size());
2364 isl::union_set Domains
= S
.getDomains();
2365 isl::union_map Accesses
= isl::union_map::empty(S
.getParamSpace());
2367 for (MemoryAccess
*MA
: AliasGroup
)
2368 Accesses
= Accesses
.add_map(MA
->getAccessRelation());
2370 Accesses
= Accesses
.intersect_domain(Domains
);
2371 isl::union_set Locations
= Accesses
.range();
2373 bool LimitReached
= false;
2374 for (isl::set Set
: Locations
.get_set_list()) {
2375 LimitReached
|= !buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2380 return !LimitReached
;
2383 /// Helper to treat non-affine regions and basic blocks the same.
2387 /// Return the block that is the representing block for @p RN.
2388 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2389 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2390 : RN
->getNodeAs
<BasicBlock
>();
2393 /// Return the @p idx'th block that is executed after @p RN.
2394 static inline BasicBlock
*
2395 getRegionNodeSuccessor(RegionNode
*RN
, Instruction
*TI
, unsigned idx
) {
2396 if (RN
->isSubRegion()) {
2398 return RN
->getNodeAs
<Region
>()->getExit();
2400 return TI
->getSuccessor(idx
);
2403 /// Return the smallest loop surrounding @p RN.
2404 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2405 if (!RN
->isSubRegion()) {
2406 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2407 Loop
*L
= LI
.getLoopFor(BB
);
2409 // Unreachable statements are not considered to belong to a LLVM loop, as
2410 // they are not part of an actual loop in the control flow graph.
2411 // Nevertheless, we handle certain unreachable statements that are common
2412 // when modeling run-time bounds checks as being part of the loop to be
2413 // able to model them and to later eliminate the run-time bounds checks.
2415 // Specifically, for basic blocks that terminate in an unreachable and
2416 // where the immediate predecessor is part of a loop, we assume these
2417 // basic blocks belong to the loop the predecessor belongs to. This
2418 // allows us to model the following code.
2420 // for (i = 0; i < N; i++) {
2422 // abort(); <- this abort might be translated to an
2427 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2428 L
= LI
.getLoopFor(BB
->getPrevNode());
2432 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2433 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2434 while (L
&& NonAffineSubRegion
->contains(L
))
2435 L
= L
->getParentLoop();
2439 /// Get the number of blocks in @p L.
2441 /// The number of blocks in a loop are the number of basic blocks actually
2442 /// belonging to the loop, as well as all single basic blocks that the loop
2443 /// exits to and which terminate in an unreachable instruction. We do not
2444 /// allow such basic blocks in the exit of a scop, hence they belong to the
2445 /// scop and represent run-time conditions which we want to model and
2446 /// subsequently speculate away.
2448 /// @see getRegionNodeLoop for additional details.
2449 unsigned getNumBlocksInLoop(Loop
*L
) {
2450 unsigned NumBlocks
= L
->getNumBlocks();
2451 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
2452 L
->getExitBlocks(ExitBlocks
);
2454 for (auto ExitBlock
: ExitBlocks
) {
2455 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2461 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2462 if (!RN
->isSubRegion())
2465 Region
*R
= RN
->getNodeAs
<Region
>();
2466 return std::distance(R
->block_begin(), R
->block_end());
2469 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2470 const DominatorTree
&DT
) {
2471 if (!RN
->isSubRegion())
2472 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2473 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2474 if (isErrorBlock(*BB
, R
, LI
, DT
))
2481 isl::set
Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2482 return getDomainConditions(Stmt
->getEntryBlock());
2485 isl::set
Scop::getDomainConditions(BasicBlock
*BB
) const {
2486 auto DIt
= DomainMap
.find(BB
);
2487 if (DIt
!= DomainMap
.end())
2488 return DIt
->getSecond();
2490 auto &RI
= *R
.getRegionInfo();
2491 auto *BBR
= RI
.getRegionFor(BB
);
2492 while (BBR
->getEntry() == BB
)
2493 BBR
= BBR
->getParent();
2494 return getDomainConditions(BBR
->getEntry());
2497 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2498 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2499 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2500 auto *EntryBB
= R
->getEntry();
2501 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2502 int LD
= getRelativeLoopDepth(L
);
2503 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD
+ 1));
2506 L
= L
->getParentLoop();
2509 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
2510 DomainMap
[EntryBB
] = isl::manage(S
);
2512 if (IsOnlyNonAffineRegion
)
2513 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2515 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
, InvalidDomainMap
))
2518 if (!propagateDomainConstraints(R
, DT
, LI
, InvalidDomainMap
))
2521 // Error blocks and blocks dominated by them have been assumed to never be
2522 // executed. Representing them in the Scop does not add any value. In fact,
2523 // it is likely to cause issues during construction of the ScopStmts. The
2524 // contents of error blocks have not been verified to be expressible and
2525 // will cause problems when building up a ScopStmt for them.
2526 // Furthermore, basic blocks dominated by error blocks may reference
2527 // instructions in the error block which, if the error block is not modeled,
2528 // can themselves not be constructed properly. To this end we will replace
2529 // the domains of error blocks and those only reachable via error blocks
2530 // with an empty set. Additionally, we will record for each block under which
2531 // parameter combination it would be reached via an error block in its
2532 // InvalidDomain. This information is needed during load hoisting.
2533 if (!propagateInvalidStmtDomains(R
, DT
, LI
, InvalidDomainMap
))
2539 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2540 /// to be compatible to domains constructed for loop @p NewL.
2542 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2543 /// edge from @p OldL to @p NewL.
2544 static isl::set
adjustDomainDimensions(Scop
&S
, isl::set Dom
, Loop
*OldL
,
2546 // If the loops are the same there is nothing to do.
2550 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2551 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2552 // If both loops are non-affine loops there is nothing to do.
2553 if (OldDepth
== -1 && NewDepth
== -1)
2556 // Distinguish three cases:
2557 // 1) The depth is the same but the loops are not.
2558 // => One loop was left one was entered.
2559 // 2) The depth increased from OldL to NewL.
2560 // => One loop was entered, none was left.
2561 // 3) The depth decreased from OldL to NewL.
2562 // => Loops were left were difference of the depths defines how many.
2563 if (OldDepth
== NewDepth
) {
2564 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2565 Dom
= Dom
.project_out(isl::dim::set
, NewDepth
, 1);
2566 Dom
= Dom
.add_dims(isl::dim::set
, 1);
2567 } else if (OldDepth
< NewDepth
) {
2568 assert(OldDepth
+ 1 == NewDepth
);
2569 auto &R
= S
.getRegion();
2571 assert(NewL
->getParentLoop() == OldL
||
2572 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2573 Dom
= Dom
.add_dims(isl::dim::set
, 1);
2575 assert(OldDepth
> NewDepth
);
2576 int Diff
= OldDepth
- NewDepth
;
2577 int NumDim
= Dom
.n_dim();
2578 assert(NumDim
>= Diff
);
2579 Dom
= Dom
.project_out(isl::dim::set
, NumDim
- Diff
, Diff
);
2585 bool Scop::propagateInvalidStmtDomains(
2586 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2587 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2588 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2589 for (auto *RN
: RTraversal
) {
2591 // Recurse for affine subregions but go on for basic blocks and non-affine
2593 if (RN
->isSubRegion()) {
2594 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2595 if (!isNonAffineSubRegion(SubRegion
)) {
2596 propagateInvalidStmtDomains(SubRegion
, DT
, LI
, InvalidDomainMap
);
2601 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2602 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2603 isl::set
&Domain
= DomainMap
[BB
];
2604 assert(Domain
&& "Cannot propagate a nullptr");
2606 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
2608 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
2610 if (!IsInvalidBlock
) {
2611 InvalidDomain
= InvalidDomain
.intersect(Domain
);
2613 InvalidDomain
= Domain
;
2614 isl::set DomPar
= Domain
.params();
2615 recordAssumption(ERRORBLOCK
, DomPar
, BB
->getTerminator()->getDebugLoc(),
2617 Domain
= isl::set::empty(Domain
.get_space());
2620 if (InvalidDomain
.is_empty()) {
2621 InvalidDomainMap
[BB
] = InvalidDomain
;
2625 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2626 auto *TI
= BB
->getTerminator();
2627 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2628 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2629 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2631 // Skip successors outside the SCoP.
2632 if (!contains(SuccBB
))
2636 if (DT
.dominates(SuccBB
, BB
))
2639 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2641 auto AdjustedInvalidDomain
=
2642 adjustDomainDimensions(*this, InvalidDomain
, BBLoop
, SuccBBLoop
);
2644 isl::set SuccInvalidDomain
= InvalidDomainMap
[SuccBB
];
2645 SuccInvalidDomain
= SuccInvalidDomain
.unite(AdjustedInvalidDomain
);
2646 SuccInvalidDomain
= SuccInvalidDomain
.coalesce();
2647 unsigned NumConjucts
= SuccInvalidDomain
.n_basic_set();
2649 InvalidDomainMap
[SuccBB
] = SuccInvalidDomain
;
2651 // Check if the maximal number of domain disjunctions was reached.
2652 // In case this happens we will bail.
2653 if (NumConjucts
< MaxDisjunctsInDomain
)
2656 InvalidDomainMap
.erase(BB
);
2657 invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
2661 InvalidDomainMap
[BB
] = InvalidDomain
;
2667 void Scop::propagateDomainConstraintsToRegionExit(
2668 BasicBlock
*BB
, Loop
*BBLoop
,
2669 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
,
2670 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2671 // Check if the block @p BB is the entry of a region. If so we propagate it's
2672 // domain to the exit block of the region. Otherwise we are done.
2673 auto *RI
= R
.getRegionInfo();
2674 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2675 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2676 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2679 // Do not propagate the domain if there is a loop backedge inside the region
2680 // that would prevent the exit block from being executed.
2682 while (L
&& contains(L
)) {
2683 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2684 BBLoop
->getLoopLatches(LatchBBs
);
2685 for (auto *LatchBB
: LatchBBs
)
2686 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2688 L
= L
->getParentLoop();
2691 isl::set Domain
= DomainMap
[BB
];
2692 assert(Domain
&& "Cannot propagate a nullptr");
2694 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, getBoxedLoops());
2696 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2697 // adjust the domain before we can propagate it.
2698 isl::set AdjustedDomain
=
2699 adjustDomainDimensions(*this, Domain
, BBLoop
, ExitBBLoop
);
2700 isl::set
&ExitDomain
= DomainMap
[ExitBB
];
2702 // If the exit domain is not yet created we set it otherwise we "add" the
2704 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
2706 // Initialize the invalid domain.
2707 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
2709 FinishedExitBlocks
.insert(ExitBB
);
2712 bool Scop::buildDomainsWithBranchConstraints(
2713 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2714 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2715 // To create the domain for each block in R we iterate over all blocks and
2716 // subregions in R and propagate the conditions under which the current region
2717 // element is executed. To this end we iterate in reverse post order over R as
2718 // it ensures that we first visit all predecessors of a region node (either a
2719 // basic block or a subregion) before we visit the region node itself.
2720 // Initially, only the domain for the SCoP region entry block is set and from
2721 // there we propagate the current domain to all successors, however we add the
2722 // condition that the successor is actually executed next.
2723 // As we are only interested in non-loop carried constraints here we can
2724 // simply skip loop back edges.
2726 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2727 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2728 for (auto *RN
: RTraversal
) {
2729 // Recurse for affine subregions but go on for basic blocks and non-affine
2731 if (RN
->isSubRegion()) {
2732 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2733 if (!isNonAffineSubRegion(SubRegion
)) {
2734 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
,
2741 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
2742 HasErrorBlock
= true;
2744 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2745 Instruction
*TI
= BB
->getTerminator();
2747 if (isa
<UnreachableInst
>(TI
))
2750 isl::set Domain
= DomainMap
.lookup(BB
);
2753 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
.get()));
2755 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2756 // Propagate the domain from BB directly to blocks that have a superset
2757 // domain, at the moment only region exit nodes of regions that start in BB.
2758 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
,
2761 // If all successors of BB have been set a domain through the propagation
2762 // above we do not need to build condition sets but can just skip this
2763 // block. However, it is important to note that this is a local property
2764 // with regards to the region @p R. To this end FinishedExitBlocks is a
2766 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
2767 return FinishedExitBlocks
.count(SuccBB
);
2769 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
2772 // Build the condition sets for the successor nodes of the current region
2773 // node. If it is a non-affine subregion we will always execute the single
2774 // exit node, hence the single entry node domain is the condition set. For
2775 // basic blocks we use the helper function buildConditionSets.
2776 SmallVector
<isl_set
*, 8> ConditionSets
;
2777 if (RN
->isSubRegion())
2778 ConditionSets
.push_back(Domain
.copy());
2779 else if (!buildConditionSets(*this, BB
, TI
, BBLoop
, Domain
.get(),
2780 InvalidDomainMap
, ConditionSets
))
2783 // Now iterate over the successors and set their initial domain based on
2784 // their condition set. We skip back edges here and have to be careful when
2785 // we leave a loop not to keep constraints over a dimension that doesn't
2787 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
2788 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
2789 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
2790 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2792 // Skip blocks outside the region.
2793 if (!contains(SuccBB
))
2796 // If we propagate the domain of some block to "SuccBB" we do not have to
2797 // adjust the domain.
2798 if (FinishedExitBlocks
.count(SuccBB
))
2802 if (DT
.dominates(SuccBB
, BB
))
2805 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2807 CondSet
= adjustDomainDimensions(*this, CondSet
, BBLoop
, SuccBBLoop
);
2809 // Set the domain for the successor or merge it with an existing domain in
2810 // case there are multiple paths (without loop back edges) to the
2812 isl::set
&SuccDomain
= DomainMap
[SuccBB
];
2815 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
2817 // Initialize the invalid domain.
2818 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
2819 SuccDomain
= CondSet
;
2822 SuccDomain
= SuccDomain
.detect_equalities();
2824 // Check if the maximal number of domain disjunctions was reached.
2825 // In case this happens we will clean up and bail.
2826 if (SuccDomain
.n_basic_set() < MaxDisjunctsInDomain
)
2829 invalidate(COMPLEXITY
, DebugLoc());
2830 while (++u
< ConditionSets
.size())
2831 isl_set_free(ConditionSets
[u
]);
2839 isl::set
Scop::getPredecessorDomainConstraints(BasicBlock
*BB
, isl::set Domain
,
2842 // If @p BB is the ScopEntry we are done
2843 if (R
.getEntry() == BB
)
2844 return isl::set::universe(Domain
.get_space());
2846 // The region info of this function.
2847 auto &RI
= *R
.getRegionInfo();
2849 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, getBoxedLoops());
2851 // A domain to collect all predecessor domains, thus all conditions under
2852 // which the block is executed. To this end we start with the empty domain.
2853 isl::set PredDom
= isl::set::empty(Domain
.get_space());
2855 // Set of regions of which the entry block domain has been propagated to BB.
2856 // all predecessors inside any of the regions can be skipped.
2857 SmallSet
<Region
*, 8> PropagatedRegions
;
2859 for (auto *PredBB
: predecessors(BB
)) {
2861 if (DT
.dominates(BB
, PredBB
))
2864 // If the predecessor is in a region we used for propagation we can skip it.
2865 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
2866 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
2871 // Check if there is a valid region we can use for propagation, thus look
2872 // for a region that contains the predecessor and has @p BB as exit block.
2873 auto *PredR
= RI
.getRegionFor(PredBB
);
2874 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
2877 // If a valid region for propagation was found use the entry of that region
2878 // for propagation, otherwise the PredBB directly.
2879 if (PredR
->getExit() == BB
) {
2880 PredBB
= PredR
->getEntry();
2881 PropagatedRegions
.insert(PredR
);
2884 isl::set PredBBDom
= getDomainConditions(PredBB
);
2885 Loop
*PredBBLoop
= getFirstNonBoxedLoopFor(PredBB
, LI
, getBoxedLoops());
2886 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
2887 PredDom
= PredDom
.unite(PredBBDom
);
2893 bool Scop::propagateDomainConstraints(
2894 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2895 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2896 // Iterate over the region R and propagate the domain constrains from the
2897 // predecessors to the current node. In contrast to the
2898 // buildDomainsWithBranchConstraints function, this one will pull the domain
2899 // information from the predecessors instead of pushing it to the successors.
2900 // Additionally, we assume the domains to be already present in the domain
2901 // map here. However, we iterate again in reverse post order so we know all
2902 // predecessors have been visited before a block or non-affine subregion is
2905 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2906 for (auto *RN
: RTraversal
) {
2907 // Recurse for affine subregions but go on for basic blocks and non-affine
2909 if (RN
->isSubRegion()) {
2910 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2911 if (!isNonAffineSubRegion(SubRegion
)) {
2912 if (!propagateDomainConstraints(SubRegion
, DT
, LI
, InvalidDomainMap
))
2918 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2919 isl::set
&Domain
= DomainMap
[BB
];
2922 // Under the union of all predecessor conditions we can reach this block.
2923 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
2924 Domain
= Domain
.intersect(PredDom
).coalesce();
2925 Domain
= Domain
.align_params(getParamSpace());
2927 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
2928 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
2929 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
, InvalidDomainMap
))
2936 /// Create a map to map from a given iteration to a subsequent iteration.
2938 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
2939 /// is incremented by one and all other dimensions are equal, e.g.,
2940 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
2942 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
2943 static isl::map
createNextIterationMap(isl::space SetSpace
, unsigned Dim
) {
2944 isl::space MapSpace
= SetSpace
.map_from_set();
2945 isl::map NextIterationMap
= isl::map::universe(MapSpace
);
2946 for (unsigned u
= 0; u
< NextIterationMap
.dim(isl::dim::in
); u
++)
2949 NextIterationMap
.equate(isl::dim::in
, u
, isl::dim::out
, u
);
2951 isl::constraint::alloc_equality(isl::local_space(MapSpace
));
2952 C
= C
.set_constant_si(1);
2953 C
= C
.set_coefficient_si(isl::dim::in
, Dim
, 1);
2954 C
= C
.set_coefficient_si(isl::dim::out
, Dim
, -1);
2955 NextIterationMap
= NextIterationMap
.add_constraint(C
);
2956 return NextIterationMap
;
2959 bool Scop::addLoopBoundsToHeaderDomain(
2960 Loop
*L
, LoopInfo
&LI
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2961 int LoopDepth
= getRelativeLoopDepth(L
);
2962 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
2964 BasicBlock
*HeaderBB
= L
->getHeader();
2965 assert(DomainMap
.count(HeaderBB
));
2966 isl::set
&HeaderBBDom
= DomainMap
[HeaderBB
];
2968 isl::map NextIterationMap
=
2969 createNextIterationMap(HeaderBBDom
.get_space(), LoopDepth
);
2971 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
2973 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
2974 L
->getLoopLatches(LatchBlocks
);
2976 for (BasicBlock
*LatchBB
: LatchBlocks
) {
2977 // If the latch is only reachable via error statements we skip it.
2978 isl::set LatchBBDom
= DomainMap
.lookup(LatchBB
);
2982 isl::set BackedgeCondition
= nullptr;
2984 Instruction
*TI
= LatchBB
->getTerminator();
2985 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
2986 assert(BI
&& "Only branch instructions allowed in loop latches");
2988 if (BI
->isUnconditional())
2989 BackedgeCondition
= LatchBBDom
;
2991 SmallVector
<isl_set
*, 8> ConditionSets
;
2992 int idx
= BI
->getSuccessor(0) != HeaderBB
;
2993 if (!buildConditionSets(*this, LatchBB
, TI
, L
, LatchBBDom
.get(),
2994 InvalidDomainMap
, ConditionSets
))
2997 // Free the non back edge condition set as we do not need it.
2998 isl_set_free(ConditionSets
[1 - idx
]);
3000 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
3003 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3004 assert(LatchLoopDepth
>= LoopDepth
);
3005 BackedgeCondition
= BackedgeCondition
.project_out(
3006 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
3007 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
3010 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
3011 for (int i
= 0; i
< LoopDepth
; i
++)
3012 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3014 isl::set UnionBackedgeConditionComplement
=
3015 UnionBackedgeCondition
.complement();
3016 UnionBackedgeConditionComplement
=
3017 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
3019 UnionBackedgeConditionComplement
=
3020 UnionBackedgeConditionComplement
.apply(ForwardMap
);
3021 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
3022 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
3024 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
3025 HeaderBBDom
= Parts
.second
;
3027 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3028 // the bounded assumptions to the context as they are already implied by the
3030 if (Affinator
.hasNSWAddRecForLoop(L
))
3033 isl::set UnboundedCtx
= Parts
.first
.params();
3034 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3035 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3039 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3040 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3042 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3043 if (!PointerBaseInst
)
3046 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3050 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3053 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3054 isl::union_map Writes
) {
3055 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3056 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3059 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3060 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3061 if (!isa
<LoadInst
>(BasePtrInst
))
3062 return contains(BasePtrInst
);
3067 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3068 if (!PollyUseRuntimeAliasChecks
)
3071 if (buildAliasGroups(AA
)) {
3072 // Aliasing assumptions do not go through addAssumption but we still want to
3073 // collect statistics so we do it here explicitly.
3074 if (MinMaxAliasGroups
.size())
3075 AssumptionsAliasing
++;
3079 // If a problem occurs while building the alias groups we need to delete
3080 // this SCoP and pretend it wasn't valid in the first place. To this end
3081 // we make the assumed context infeasible.
3082 invalidate(ALIASING
, DebugLoc());
3085 dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3086 << " could not be created as the number of parameters involved "
3087 "is too high. The SCoP will be "
3088 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3089 "the maximal number of parameters but be advised that the "
3090 "compile time might increase exponentially.\n\n");
3094 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3095 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3096 AliasSetTracker
AST(AA
);
3098 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3099 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3100 for (ScopStmt
&Stmt
: *this) {
3102 isl::set StmtDomain
= Stmt
.getDomain();
3103 bool StmtDomainEmpty
= StmtDomain
.is_empty();
3105 // Statements with an empty domain will never be executed.
3106 if (StmtDomainEmpty
)
3109 for (MemoryAccess
*MA
: Stmt
) {
3110 if (MA
->isScalarKind())
3113 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3114 MemAccInst
Acc(MA
->getAccessInstruction());
3115 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3116 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3118 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3123 AliasGroupVectorTy AliasGroups
;
3124 for (AliasSet
&AS
: AST
) {
3125 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3129 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3132 AliasGroups
.push_back(std::move(AG
));
3135 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3138 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3139 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3141 AliasGroupTy
&AG
= AliasGroups
[u
];
3142 AliasGroupTy::iterator AGI
= AG
.begin();
3143 isl::set AGDomain
= getAccessDomain(*AGI
);
3144 while (AGI
!= AG
.end()) {
3145 MemoryAccess
*MA
= *AGI
;
3146 isl::set MADomain
= getAccessDomain(MA
);
3147 if (AGDomain
.is_disjoint(MADomain
)) {
3148 NewAG
.push_back(MA
);
3149 AGI
= AG
.erase(AGI
);
3151 AGDomain
= AGDomain
.unite(MADomain
);
3155 if (NewAG
.size() > 1)
3156 AliasGroups
.push_back(std::move(NewAG
));
3160 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3161 // To create sound alias checks we perform the following steps:
3162 // o) We partition each group into read only and non read only accesses.
3163 // o) For each group with more than one base pointer we then compute minimal
3164 // and maximal accesses to each array of a group in read only and non
3165 // read only partitions separately.
3166 AliasGroupVectorTy AliasGroups
;
3167 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3169 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3171 splitAliasGroupsByDomain(AliasGroups
);
3173 for (AliasGroupTy
&AG
: AliasGroups
) {
3174 if (!hasFeasibleRuntimeContext())
3178 IslMaxOperationsGuard
MaxOpGuard(getIslCtx().get(), OptComputeOut
);
3179 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3183 if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota
) {
3184 invalidate(COMPLEXITY
, DebugLoc());
3192 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3193 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3194 AliasGroupTy ReadOnlyAccesses
;
3195 AliasGroupTy ReadWriteAccesses
;
3196 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3197 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3199 if (AliasGroup
.size() < 2)
3202 for (MemoryAccess
*Access
: AliasGroup
) {
3203 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3204 Access
->getAccessInstruction())
3205 << "Possibly aliasing pointer, use restrict keyword.");
3206 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3207 if (HasWriteAccess
.count(Array
)) {
3208 ReadWriteArrays
.insert(Array
);
3209 ReadWriteAccesses
.push_back(Access
);
3211 ReadOnlyArrays
.insert(Array
);
3212 ReadOnlyAccesses
.push_back(Access
);
3216 // If there are no read-only pointers, and less than two read-write pointers,
3217 // no alias check is needed.
3218 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3221 // If there is no read-write pointer, no alias check is needed.
3222 if (ReadWriteArrays
.empty())
3225 // For non-affine accesses, no alias check can be generated as we cannot
3226 // compute a sufficiently tight lower and upper bound: bail out.
3227 for (MemoryAccess
*MA
: AliasGroup
) {
3228 if (!MA
->isAffine()) {
3229 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3230 MA
->getAccessInstruction()->getParent());
3235 // Ensure that for all memory accesses for which we generate alias checks,
3236 // their base pointers are available.
3237 for (MemoryAccess
*MA
: AliasGroup
) {
3238 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3239 addRequiredInvariantLoad(
3240 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3243 MinMaxAliasGroups
.emplace_back();
3244 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3245 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3246 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3251 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3256 // Bail out if the number of values we need to compare is too large.
3257 // This is important as the number of comparisons grows quadratically with
3258 // the number of values we need to compare.
3259 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3260 RunTimeChecksMaxArraysPerGroup
)
3264 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3272 /// Get the smallest loop that contains @p S but is not in @p S.
3273 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3274 // Start with the smallest loop containing the entry and expand that
3275 // loop until it contains all blocks in the region. If there is a loop
3276 // containing all blocks in the region check if it is itself contained
3277 // and if so take the parent loop as it will be the smallest containing
3278 // the region but not contained by it.
3279 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3281 bool AllContained
= true;
3282 for (auto *BB
: S
.blocks())
3283 AllContained
&= L
->contains(BB
);
3286 L
= L
->getParentLoop();
3289 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3292 int Scop::NextScopID
= 0;
3294 std::string
Scop::CurrentFunc
;
3296 int Scop::getNextID(std::string ParentFunc
) {
3297 if (ParentFunc
!= CurrentFunc
) {
3298 CurrentFunc
= ParentFunc
;
3301 return NextScopID
++;
3304 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3305 DominatorTree
&DT
, ScopDetection::DetectionContext
&DC
,
3306 OptimizationRemarkEmitter
&ORE
)
3307 : IslCtx(isl_ctx_alloc(), isl_ctx_free
), SE(&ScalarEvolution
), DT(&DT
),
3308 R(R
), name(None
), HasSingleExitEdge(R
.getExitingBlock()), DC(DC
),
3309 ORE(ORE
), Affinator(this, LI
),
3310 ID(getNextID((*R
.getEntry()->getParent()).getName().str())) {
3311 if (IslOnErrorAbort
)
3312 isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT
);
3316 Scop::~Scop() = default;
3318 void Scop::foldSizeConstantsToRight() {
3319 isl::union_set Accessed
= getAccesses().range();
3321 for (auto Array
: arrays()) {
3322 if (Array
->getNumberOfDimensions() <= 1)
3325 isl::space Space
= Array
->getSpace();
3326 Space
= Space
.align_params(Accessed
.get_space());
3328 if (!Accessed
.contains(Space
))
3331 isl::set Elements
= Accessed
.extract_set(Space
);
3332 isl::map Transform
= isl::map::universe(Array
->getSpace().map_from_set());
3334 std::vector
<int> Int
;
3335 int Dims
= Elements
.dim(isl::dim::set
);
3336 for (int i
= 0; i
< Dims
; i
++) {
3337 isl::set DimOnly
= isl::set(Elements
).project_out(isl::dim::set
, 0, i
);
3338 DimOnly
= DimOnly
.project_out(isl::dim::set
, 1, Dims
- i
- 1);
3339 DimOnly
= DimOnly
.lower_bound_si(isl::dim::set
, 0, 0);
3341 isl::basic_set DimHull
= DimOnly
.affine_hull();
3343 if (i
== Dims
- 1) {
3345 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3349 if (DimHull
.dim(isl::dim::div
) == 1) {
3350 isl::aff Diff
= DimHull
.get_div(0);
3351 isl::val Val
= Diff
.get_denominator_val();
3355 auto ValAPInt
= APIntFromVal(Val
);
3356 if (ValAPInt
.isSignedIntN(32))
3357 ValInt
= ValAPInt
.getSExtValue();
3361 Int
.push_back(ValInt
);
3362 isl::constraint C
= isl::constraint::alloc_equality(
3363 isl::local_space(Transform
.get_space()));
3364 C
= C
.set_coefficient_si(isl::dim::out
, i
, ValInt
);
3365 C
= C
.set_coefficient_si(isl::dim::in
, i
, -1);
3366 Transform
= Transform
.add_constraint(C
);
3370 isl::basic_set ZeroSet
= isl::basic_set(DimHull
);
3371 ZeroSet
= ZeroSet
.fix_si(isl::dim::set
, 0, 0);
3374 if (ZeroSet
.is_equal(DimHull
)) {
3378 Int
.push_back(ValInt
);
3379 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3382 isl::set MappedElements
= isl::map(Transform
).domain();
3383 if (!Elements
.is_subset(MappedElements
))
3386 bool CanFold
= true;
3390 unsigned NumDims
= Array
->getNumberOfDimensions();
3391 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3392 if (Int
[0] != Int
[i
] && Int
[i
])
3398 for (auto &Access
: AccessFunctions
)
3399 if (Access
->getScopArrayInfo() == Array
)
3400 Access
->setAccessRelation(
3401 Access
->getAccessRelation().apply_range(Transform
));
3403 std::vector
<const SCEV
*> Sizes
;
3404 for (unsigned i
= 0; i
< NumDims
; i
++) {
3405 auto Size
= Array
->getDimensionSize(i
);
3407 if (i
== NumDims
- 1)
3408 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3409 Sizes
.push_back(Size
);
3412 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3416 void Scop::markFortranArrays() {
3417 for (ScopStmt
&Stmt
: Stmts
) {
3418 for (MemoryAccess
*MemAcc
: Stmt
) {
3419 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3423 // TODO: const_cast-ing to edit
3424 ScopArrayInfo
*SAI
=
3425 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3426 assert(SAI
&& "memory access into a Fortran array does not "
3427 "have an associated ScopArrayInfo");
3428 SAI
->applyAndSetFAD(FAD
);
3433 void Scop::finalizeAccesses() {
3434 updateAccessDimensionality();
3435 foldSizeConstantsToRight();
3436 foldAccessRelations();
3437 assumeNoOutOfBounds();
3438 markFortranArrays();
3441 void Scop::updateAccessDimensionality() {
3442 // Check all array accesses for each base pointer and find a (virtual) element
3443 // size for the base pointer that divides all access functions.
3444 for (ScopStmt
&Stmt
: *this)
3445 for (MemoryAccess
*Access
: Stmt
) {
3446 if (!Access
->isArrayKind())
3448 ScopArrayInfo
*Array
=
3449 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3451 if (Array
->getNumberOfDimensions() != 1)
3453 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3454 const SCEV
*Subscript
= Access
->getSubscript(0);
3455 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3457 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3458 Array
->updateElementType(Ty
);
3461 for (auto &Stmt
: *this)
3462 for (auto &Access
: Stmt
)
3463 Access
->updateDimensionality();
3466 void Scop::foldAccessRelations() {
3467 for (auto &Stmt
: *this)
3468 for (auto &Access
: Stmt
)
3469 Access
->foldAccessRelation();
3472 void Scop::assumeNoOutOfBounds() {
3473 for (auto &Stmt
: *this)
3474 for (auto &Access
: Stmt
)
3475 Access
->assumeNoOutOfBound();
3478 void Scop::removeFromStmtMap(ScopStmt
&Stmt
) {
3479 for (Instruction
*Inst
: Stmt
.getInstructions())
3480 InstStmtMap
.erase(Inst
);
3482 if (Stmt
.isRegionStmt()) {
3483 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks()) {
3485 // Skip entry basic block, as its instructions are already deleted as
3486 // part of the statement's instruction list.
3487 if (BB
== Stmt
.getEntryBlock())
3489 for (Instruction
&Inst
: *BB
)
3490 InstStmtMap
.erase(&Inst
);
3493 auto StmtMapIt
= StmtMap
.find(Stmt
.getBasicBlock());
3494 if (StmtMapIt
!= StmtMap
.end())
3495 StmtMapIt
->second
.erase(std::remove(StmtMapIt
->second
.begin(),
3496 StmtMapIt
->second
.end(), &Stmt
),
3497 StmtMapIt
->second
.end());
3498 for (Instruction
*Inst
: Stmt
.getInstructions())
3499 InstStmtMap
.erase(Inst
);
3503 void Scop::removeStmts(std::function
<bool(ScopStmt
&)> ShouldDelete
,
3504 bool AfterHoisting
) {
3505 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3506 if (!ShouldDelete(*StmtIt
)) {
3511 // Start with removing all of the statement's accesses including erasing it
3512 // from all maps that are pointing to them.
3513 // Make a temporary copy because removing MAs invalidates the iterator.
3514 SmallVector
<MemoryAccess
*, 16> MAList(StmtIt
->begin(), StmtIt
->end());
3515 for (MemoryAccess
*MA
: MAList
)
3516 StmtIt
->removeSingleMemoryAccess(MA
, AfterHoisting
);
3518 removeFromStmtMap(*StmtIt
);
3519 StmtIt
= Stmts
.erase(StmtIt
);
3523 void Scop::removeStmtNotInDomainMap() {
3524 auto ShouldDelete
= [this](ScopStmt
&Stmt
) -> bool {
3525 isl::set Domain
= DomainMap
.lookup(Stmt
.getEntryBlock());
3528 return Domain
.is_empty();
3530 removeStmts(ShouldDelete
, false);
3533 void Scop::simplifySCoP(bool AfterHoisting
) {
3534 auto ShouldDelete
= [AfterHoisting
](ScopStmt
&Stmt
) -> bool {
3535 // Never delete statements that contain calls to debug functions.
3536 if (hasDebugCall(&Stmt
))
3539 bool RemoveStmt
= Stmt
.isEmpty();
3541 // Remove read only statements only after invariant load hoisting.
3542 if (!RemoveStmt
&& AfterHoisting
) {
3543 bool OnlyRead
= true;
3544 for (MemoryAccess
*MA
: Stmt
) {
3552 RemoveStmt
= OnlyRead
;
3557 removeStmts(ShouldDelete
, AfterHoisting
);
3560 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3561 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3565 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3566 LInst
= cast
<LoadInst
>(Rep
);
3568 Type
*Ty
= LInst
->getType();
3569 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3570 for (auto &IAClass
: InvariantEquivClasses
) {
3571 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3574 auto &MAs
= IAClass
.InvariantAccesses
;
3575 for (auto *MA
: MAs
)
3576 if (MA
->getAccessInstruction() == Val
)
3583 bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
3584 for (const llvm::Argument
&Arg
: F
.args())
3585 if (&Arg
== maybeParam
)
3591 bool Scop::canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3592 bool MAInvalidCtxIsEmpty
,
3593 bool NonHoistableCtxIsEmpty
) {
3594 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3595 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3596 if (PollyAllowDereferenceOfAllFunctionParams
&&
3597 isAParameter(LInst
->getPointerOperand(), getFunction()))
3600 // TODO: We can provide more information for better but more expensive
3602 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3603 LInst
->getAlignment(), DL
))
3606 // If the location might be overwritten we do not hoist it unconditionally.
3608 // TODO: This is probably too conservative.
3609 if (!NonHoistableCtxIsEmpty
)
3612 // If a dereferenceable load is in a statement that is modeled precisely we
3614 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3617 // Even if the statement is not modeled precisely we can hoist the load if it
3618 // does not involve any parameters that might have been specialized by the
3619 // statement domain.
3620 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3621 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3626 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3630 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3631 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3633 // Get the context under which the statement is executed but remove the error
3634 // context under which this statement is reached.
3635 isl::set DomainCtx
= Stmt
.getDomain().params();
3636 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3638 if (DomainCtx
.n_basic_set() >= MaxDisjunctsInDomain
) {
3639 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3640 invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3644 // Project out all parameters that relate to loads in the statement. Otherwise
3645 // we could have cyclic dependences on the constraints under which the
3646 // hoisted loads are executed and we could not determine an order in which to
3647 // pre-load them. This happens because not only lower bounds are part of the
3648 // domain but also upper bounds.
3649 for (auto &InvMA
: InvMAs
) {
3650 auto *MA
= InvMA
.MA
;
3651 Instruction
*AccInst
= MA
->getAccessInstruction();
3652 if (SE
->isSCEVable(AccInst
->getType())) {
3653 SetVector
<Value
*> Values
;
3654 for (const SCEV
*Parameter
: Parameters
) {
3656 findValues(Parameter
, *SE
, Values
);
3657 if (!Values
.count(AccInst
))
3660 if (isl::id ParamId
= getIdForParam(Parameter
)) {
3661 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3663 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3669 for (auto &InvMA
: InvMAs
) {
3670 auto *MA
= InvMA
.MA
;
3671 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3673 // Check for another invariant access that accesses the same location as
3674 // MA and if found consolidate them. Otherwise create a new equivalence
3675 // class at the end of InvariantEquivClasses.
3676 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3677 Type
*Ty
= LInst
->getType();
3678 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3680 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3681 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3682 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3685 // Check if we know that this pointer can be speculatively accessed.
3686 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3687 NonHoistableCtxIsEmpty
)) {
3688 MACtx
= isl::set::universe(DomainCtx
.get_space());
3691 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3692 MACtx
= MACtx
.gist_params(getContext());
3695 bool Consolidated
= false;
3696 for (auto &IAClass
: InvariantEquivClasses
) {
3697 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3700 // If the pointer and the type is equal check if the access function wrt.
3701 // to the domain is equal too. It can happen that the domain fixes
3702 // parameter values and these can be different for distinct part of the
3703 // SCoP. If this happens we cannot consolidate the loads but need to
3704 // create a new invariant load equivalence class.
3705 auto &MAs
= IAClass
.InvariantAccesses
;
3707 auto *LastMA
= MAs
.front();
3709 isl::set AR
= MA
->getAccessRelation().range();
3710 isl::set LastAR
= LastMA
->getAccessRelation().range();
3711 bool SameAR
= AR
.is_equal(LastAR
);
3717 // Add MA to the list of accesses that are in this class.
3720 Consolidated
= true;
3722 // Unify the execution context of the class and this statement.
3723 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
3724 if (IAClassDomainCtx
)
3725 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
3727 IAClassDomainCtx
= MACtx
;
3728 IAClass
.ExecutionContext
= IAClassDomainCtx
;
3735 MACtx
= MACtx
.coalesce();
3737 // If we did not consolidate MA, thus did not find an equivalence class
3738 // for it, we create a new one.
3739 InvariantEquivClasses
.emplace_back(
3740 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3744 /// Check if an access range is too complex.
3746 /// An access range is too complex, if it contains either many disjuncts or
3747 /// very complex expressions. As a simple heuristic, we assume if a set to
3748 /// be too complex if the sum of existentially quantified dimensions and
3749 /// set dimensions is larger than a threshold. This reliably detects both
3750 /// sets with many disjuncts as well as sets with many divisions as they
3753 /// @param AccessRange The range to check for complexity.
3755 /// @returns True if the access range is too complex.
3756 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
3757 unsigned NumTotalDims
= 0;
3759 for (isl::basic_set BSet
: AccessRange
.get_basic_set_list()) {
3760 NumTotalDims
+= BSet
.dim(isl::dim::div
);
3761 NumTotalDims
+= BSet
.dim(isl::dim::set
);
3764 if (NumTotalDims
> MaxDimensionsInAccessRange
)
3770 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
3771 // TODO: Loads that are not loop carried, hence are in a statement with
3772 // zero iterators, are by construction invariant, though we
3773 // currently "hoist" them anyway. This is necessary because we allow
3774 // them to be treated as parameters (e.g., in conditions) and our code
3775 // generation would otherwise use the old value.
3777 auto &Stmt
= *Access
->getStatement();
3778 BasicBlock
*BB
= Stmt
.getEntryBlock();
3780 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
3781 Access
->isMemoryIntrinsic())
3784 // Skip accesses that have an invariant base pointer which is defined but
3785 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3786 // returns a pointer that is used as a base address. However, as we want
3787 // to hoist indirect pointers, we allow the base pointer to be defined in
3788 // the region if it is also a memory access. Each ScopArrayInfo object
3789 // that has a base pointer origin has a base pointer that is loaded and
3790 // that it is invariant, thus it will be hoisted too. However, if there is
3791 // no base pointer origin we check that the base pointer is defined
3792 // outside the region.
3793 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
3794 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
3797 isl::map AccessRelation
= Access
->getAccessRelation();
3798 assert(!AccessRelation
.is_empty());
3800 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
3803 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
3804 isl::set SafeToLoad
;
3806 auto &DL
= getFunction().getParent()->getDataLayout();
3807 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
3809 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
3810 } else if (BB
!= LI
->getParent()) {
3811 // Skip accesses in non-affine subregions as they might not be executed
3812 // under the same condition as the entry of the non-affine subregion.
3815 SafeToLoad
= AccessRelation
.range();
3818 if (isAccessRangeTooComplex(AccessRelation
.range()))
3821 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
3822 isl::set WrittenCtx
= Written
.params();
3823 bool IsWritten
= !WrittenCtx
.is_empty();
3828 WrittenCtx
= WrittenCtx
.remove_divs();
3829 bool TooComplex
= WrittenCtx
.n_basic_set() >= MaxDisjunctsInDomain
;
3830 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
3833 addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(), AS_RESTRICTION
,
3838 void Scop::verifyInvariantLoads() {
3839 auto &RIL
= getRequiredInvariantLoads();
3840 for (LoadInst
*LI
: RIL
) {
3841 assert(LI
&& contains(LI
));
3842 // If there exists a statement in the scop which has a memory access for
3843 // @p LI, then mark this scop as infeasible for optimization.
3844 for (ScopStmt
&Stmt
: Stmts
)
3845 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
3846 invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
3852 void Scop::hoistInvariantLoads() {
3853 if (!PollyInvariantLoadHoisting
)
3856 isl::union_map Writes
= getWrites();
3857 for (ScopStmt
&Stmt
: *this) {
3858 InvariantAccessesTy InvariantAccesses
;
3860 for (MemoryAccess
*Access
: Stmt
)
3861 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
3862 InvariantAccesses
.push_back({Access
, NHCtx
});
3864 // Transfer the memory access from the statement to the SCoP.
3865 for (auto InvMA
: InvariantAccesses
)
3866 Stmt
.removeMemoryAccess(InvMA
.MA
);
3867 addInvariantLoads(Stmt
, InvariantAccesses
);
3871 /// Find the canonical scop array info object for a set of invariant load
3872 /// hoisted loads. The canonical array is the one that corresponds to the
3873 /// first load in the list of accesses which is used as base pointer of a
3875 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
3876 MemoryAccessList
&Accesses
) {
3877 for (MemoryAccess
*Access
: Accesses
) {
3878 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
3879 Access
->getAccessInstruction(), MemoryKind::Array
);
3881 return CanonicalArray
;
3886 /// Check if @p Array severs as base array in an invariant load.
3887 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
3888 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
3889 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3890 if (Access2
->getScopArrayInfo() == Array
)
3895 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3896 /// with a reference to @p New.
3897 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
3898 const ScopArrayInfo
*New
) {
3899 for (ScopStmt
&Stmt
: *S
)
3900 for (MemoryAccess
*Access
: Stmt
) {
3901 if (Access
->getLatestScopArrayInfo() != Old
)
3904 isl::id Id
= New
->getBasePtrId();
3905 isl::map Map
= Access
->getAccessRelation();
3906 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3907 Access
->setAccessRelation(Map
);
3911 void Scop::canonicalizeDynamicBasePtrs() {
3912 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
3913 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3915 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3916 findCanonicalArray(this, BasePtrAccesses
);
3918 if (!CanonicalBasePtrSAI
)
3921 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3922 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
3923 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3924 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3925 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3928 // we currently do not canonicalize arrays where some accesses are
3929 // hoisted as invariant loads. If we would, we need to update the access
3930 // function of the invariant loads as well. However, as this is not a
3931 // very common situation, we leave this for now to avoid further
3932 // complexity increases.
3933 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
3936 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
3941 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
3942 ArrayRef
<const SCEV
*> Sizes
,
3944 const char *BaseName
) {
3945 assert((BasePtr
|| BaseName
) &&
3946 "BasePtr and BaseName can not be nullptr at the same time.");
3947 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
3948 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
3949 : ScopArrayNameMap
[BaseName
];
3951 auto &DL
= getFunction().getParent()->getDataLayout();
3952 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
3953 DL
, this, BaseName
));
3954 ScopArrayInfoSet
.insert(SAI
.get());
3956 SAI
->updateElementType(ElementType
);
3957 // In case of mismatching array sizes, we bail out by setting the run-time
3958 // context to false.
3959 if (!SAI
->updateSizes(Sizes
))
3960 invalidate(DELINEARIZATION
, DebugLoc());
3965 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
3966 const std::string
&BaseName
,
3967 const std::vector
<unsigned> &Sizes
) {
3968 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
3969 std::vector
<const SCEV
*> SCEVSizes
;
3971 for (auto size
: Sizes
)
3973 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
3975 SCEVSizes
.push_back(nullptr);
3977 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
3978 MemoryKind::Array
, BaseName
.c_str());
3982 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
3984 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
3988 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
3989 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
3990 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
3994 std::string
Scop::getContextStr() const { return getContext().to_str(); }
3996 std::string
Scop::getAssumedContextStr() const {
3997 assert(AssumedContext
&& "Assumed context not yet built");
3998 return AssumedContext
.to_str();
4001 std::string
Scop::getInvalidContextStr() const {
4002 return InvalidContext
.to_str();
4005 std::string
Scop::getNameStr() const {
4006 std::string ExitName
, EntryName
;
4007 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4008 return EntryName
+ "---" + ExitName
;
4011 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4012 std::string ExitName
, EntryName
;
4013 raw_string_ostream
ExitStr(ExitName
);
4014 raw_string_ostream
EntryStr(EntryName
);
4016 R
.getEntry()->printAsOperand(EntryStr
, false);
4020 R
.getExit()->printAsOperand(ExitStr
, false);
4023 ExitName
= "FunctionExit";
4025 return std::make_pair(EntryName
, ExitName
);
4028 isl::set
Scop::getContext() const { return Context
; }
4029 isl::space
Scop::getParamSpace() const { return getContext().get_space(); }
4031 isl::space
Scop::getFullParamSpace() const {
4032 std::vector
<isl::id
> FortranIDs
;
4033 FortranIDs
= getFortranArrayIds(arrays());
4035 isl::space Space
= isl::space::params_alloc(
4036 getIslCtx(), ParameterIds
.size() + FortranIDs
.size());
4039 for (const SCEV
*Parameter
: Parameters
) {
4040 isl::id Id
= getIdForParam(Parameter
);
4041 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4044 for (isl::id Id
: FortranIDs
)
4045 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4050 isl::set
Scop::getAssumedContext() const {
4051 assert(AssumedContext
&& "Assumed context not yet built");
4052 return AssumedContext
;
4055 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4056 if (PollyProcessUnprofitable
)
4062 unsigned OptimizableStmtsOrLoops
= 0;
4063 for (auto &Stmt
: *this) {
4064 if (Stmt
.getNumIterators() == 0)
4067 bool ContainsArrayAccs
= false;
4068 bool ContainsScalarAccs
= false;
4069 for (auto *MA
: Stmt
) {
4072 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4073 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4076 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4077 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4080 return OptimizableStmtsOrLoops
> 1;
4083 bool Scop::hasFeasibleRuntimeContext() const {
4084 auto PositiveContext
= getAssumedContext();
4085 auto NegativeContext
= getInvalidContext();
4086 PositiveContext
= addNonEmptyDomainConstraints(PositiveContext
);
4087 // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain
4088 if (!PositiveContext
)
4091 bool IsFeasible
= !(PositiveContext
.is_empty() ||
4092 PositiveContext
.is_subset(NegativeContext
));
4096 auto DomainContext
= getDomains().params();
4097 IsFeasible
= !DomainContext
.is_subset(NegativeContext
);
4098 IsFeasible
&= !Context
.is_subset(NegativeContext
);
4103 static std::string
toString(AssumptionKind Kind
) {
4106 return "No-aliasing";
4110 return "No-overflows";
4112 return "Signed-unsigned";
4114 return "Low complexity";
4116 return "Profitable";
4120 return "Finite loop";
4122 return "Invariant load";
4123 case DELINEARIZATION
:
4124 return "Delinearization";
4126 llvm_unreachable("Unknown AssumptionKind!");
4129 bool Scop::isEffectiveAssumption(isl::set Set
, AssumptionSign Sign
) {
4130 if (Sign
== AS_ASSUMPTION
) {
4131 if (Context
.is_subset(Set
))
4134 if (AssumedContext
.is_subset(Set
))
4137 if (Set
.is_disjoint(Context
))
4140 if (Set
.is_subset(InvalidContext
))
4146 bool Scop::trackAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4147 AssumptionSign Sign
, BasicBlock
*BB
) {
4148 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4151 // Do never emit trivial assumptions as they only clutter the output.
4152 if (!PollyRemarksMinimal
) {
4154 if (Sign
== AS_ASSUMPTION
)
4155 Univ
= isl::set::universe(Set
.get_space());
4157 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& Set
.is_empty()) ||
4158 (Sign
== AS_ASSUMPTION
&& Univ
.is_equal(Set
));
4166 AssumptionsAliasing
++;
4169 AssumptionsInbounds
++;
4172 AssumptionsWrapping
++;
4175 AssumptionsUnsigned
++;
4178 AssumptionsComplexity
++;
4181 AssumptionsUnprofitable
++;
4184 AssumptionsErrorBlock
++;
4187 AssumptionsInfiniteLoop
++;
4190 AssumptionsInvariantLoad
++;
4192 case DELINEARIZATION
:
4193 AssumptionsDelinearization
++;
4197 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4198 std::string Msg
= toString(Kind
) + Suffix
+ Set
.to_str();
4200 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
, BB
)
4203 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
,
4209 void Scop::addAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4210 AssumptionSign Sign
, BasicBlock
*BB
) {
4211 // Simplify the assumptions/restrictions first.
4212 Set
= Set
.gist_params(getContext());
4214 if (!trackAssumption(Kind
, Set
, Loc
, Sign
, BB
))
4217 if (Sign
== AS_ASSUMPTION
)
4218 AssumedContext
= AssumedContext
.intersect(Set
).coalesce();
4220 InvalidContext
= InvalidContext
.unite(Set
).coalesce();
4223 void Scop::recordAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4224 AssumptionSign Sign
, BasicBlock
*BB
) {
4225 assert((Set
.is_params() || BB
) &&
4226 "Assumptions without a basic block must be parameter sets");
4227 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4230 void Scop::addRecordedAssumptions() {
4231 while (!RecordedAssumptions
.empty()) {
4232 Assumption AS
= RecordedAssumptions
.pop_back_val();
4235 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
, nullptr /* BasicBlock */);
4239 // If the domain was deleted the assumptions are void.
4240 isl_set
*Dom
= getDomainConditions(AS
.BB
).release();
4244 // If a basic block was given use its domain to simplify the assumption.
4245 // In case of restrictions we know they only have to hold on the domain,
4246 // thus we can intersect them with the domain of the block. However, for
4247 // assumptions the domain has to imply them, thus:
4249 // Dom => S <==> A v B <==> A - B
4251 // To avoid the complement we will register A - B as a restriction not an
4253 isl_set
*S
= AS
.Set
.copy();
4254 if (AS
.Sign
== AS_RESTRICTION
)
4255 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4256 else /* (AS.Sign == AS_ASSUMPTION) */
4257 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4259 addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
4263 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
, BasicBlock
*BB
) {
4264 LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind
<< "\n");
4265 addAssumption(Kind
, isl::set::empty(getParamSpace()), Loc
, AS_ASSUMPTION
, BB
);
4268 isl::set
Scop::getInvalidContext() const { return InvalidContext
; }
4270 void Scop::printContext(raw_ostream
&OS
) const {
4272 OS
.indent(4) << Context
<< "\n";
4274 OS
.indent(4) << "Assumed Context:\n";
4275 OS
.indent(4) << AssumedContext
<< "\n";
4277 OS
.indent(4) << "Invalid Context:\n";
4278 OS
.indent(4) << InvalidContext
<< "\n";
4281 for (const SCEV
*Parameter
: Parameters
)
4282 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4285 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4287 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4288 if (Pair
.second
.size() == 0)
4291 noOfGroups
+= Pair
.second
.size();
4294 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4295 if (MinMaxAliasGroups
.empty()) {
4296 OS
.indent(8) << "n/a\n";
4300 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4302 // If the group has no read only accesses print the write accesses.
4303 if (Pair
.second
.empty()) {
4304 OS
.indent(8) << "[[";
4305 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4306 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4312 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4313 OS
.indent(8) << "[[";
4314 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4315 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4316 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4324 void Scop::printStatements(raw_ostream
&OS
, bool PrintInstructions
) const {
4325 OS
<< "Statements {\n";
4327 for (const ScopStmt
&Stmt
: *this) {
4329 Stmt
.print(OS
, PrintInstructions
);
4332 OS
.indent(4) << "}\n";
4335 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4338 for (auto &Array
: arrays())
4341 OS
.indent(4) << "}\n";
4343 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4345 for (auto &Array
: arrays())
4346 Array
->print(OS
, /* SizeAsPwAff */ true);
4348 OS
.indent(4) << "}\n";
4351 void Scop::print(raw_ostream
&OS
, bool PrintInstructions
) const {
4352 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4353 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4354 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4355 OS
.indent(4) << "Invariant Accesses: {\n";
4356 for (const auto &IAClass
: InvariantEquivClasses
) {
4357 const auto &MAs
= IAClass
.InvariantAccesses
;
4359 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4361 MAs
.front()->print(OS
);
4362 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4366 OS
.indent(4) << "}\n";
4367 printContext(OS
.indent(4));
4368 printArrayInfo(OS
.indent(4));
4369 printAliasAssumptions(OS
);
4370 printStatements(OS
.indent(4), PrintInstructions
);
4373 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4374 LLVM_DUMP_METHOD
void Scop::dump() const { print(dbgs(), true); }
4377 isl::ctx
Scop::getIslCtx() const { return IslCtx
.get(); }
4379 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4381 // First try to use the SCEVAffinator to generate a piecewise defined
4382 // affine function from @p E in the context of @p BB. If that tasks becomes to
4383 // complex the affinator might return a nullptr. In such a case we invalidate
4384 // the SCoP and return a dummy value. This way we do not need to add error
4385 // handling code to all users of this function.
4386 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4388 // TODO: We could use a heuristic and either use:
4389 // SCEVAffinator::takeNonNegativeAssumption
4391 // SCEVAffinator::interpretAsUnsigned
4392 // to deal with unsigned or "NonNegative" SCEVs.
4394 Affinator
.takeNonNegativeAssumption(PWAC
);
4398 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4399 invalidate(COMPLEXITY
, DL
, BB
);
4400 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4403 isl::union_set
Scop::getDomains() const {
4404 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx().get(), 0);
4405 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4407 for (const ScopStmt
&Stmt
: *this)
4408 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain().release());
4410 return isl::manage(Domain
);
4413 isl::pw_aff
Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4414 PWACtx PWAC
= getPwAff(E
, BB
);
4419 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4420 isl::union_map Accesses
= isl::union_map::empty(getParamSpace());
4422 for (ScopStmt
&Stmt
: *this) {
4423 for (MemoryAccess
*MA
: Stmt
) {
4424 if (!Predicate(*MA
))
4427 isl::set Domain
= Stmt
.getDomain();
4428 isl::map AccessDomain
= MA
->getAccessRelation();
4429 AccessDomain
= AccessDomain
.intersect_domain(Domain
);
4430 Accesses
= Accesses
.add_map(AccessDomain
);
4434 return Accesses
.coalesce();
4437 isl::union_map
Scop::getMustWrites() {
4438 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4441 isl::union_map
Scop::getMayWrites() {
4442 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4445 isl::union_map
Scop::getWrites() {
4446 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4449 isl::union_map
Scop::getReads() {
4450 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4453 isl::union_map
Scop::getAccesses() {
4454 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4457 isl::union_map
Scop::getAccesses(ScopArrayInfo
*Array
) {
4458 return getAccessesOfType(
4459 [Array
](MemoryAccess
&MA
) { return MA
.getScopArrayInfo() == Array
; });
4462 // Check whether @p Node is an extension node.
4464 // @return true if @p Node is an extension node.
4465 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4466 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4467 return isl_bool_error
;
4469 return isl_bool_true
;
4472 bool Scop::containsExtensionNode(isl::schedule Schedule
) {
4473 return isl_schedule_foreach_schedule_node_top_down(
4474 Schedule
.get(), isNotExtNode
, nullptr) == isl_stat_error
;
4477 isl::union_map
Scop::getSchedule() const {
4478 auto Tree
= getScheduleTree();
4479 if (containsExtensionNode(Tree
))
4482 return Tree
.get_map();
4485 isl::schedule
Scop::getScheduleTree() const {
4486 return Schedule
.intersect_domain(getDomains());
4489 void Scop::setSchedule(isl::union_map NewSchedule
) {
4490 auto S
= isl::schedule::from_domain(getDomains());
4491 Schedule
= S
.insert_partial_schedule(
4492 isl::multi_union_pw_aff::from_union_map(NewSchedule
));
4493 ScheduleModified
= true;
4496 void Scop::setScheduleTree(isl::schedule NewSchedule
) {
4497 Schedule
= NewSchedule
;
4498 ScheduleModified
= true;
4501 bool Scop::restrictDomains(isl::union_set Domain
) {
4502 bool Changed
= false;
4503 for (ScopStmt
&Stmt
: *this) {
4504 isl::union_set StmtDomain
= isl::union_set(Stmt
.getDomain());
4505 isl::union_set NewStmtDomain
= StmtDomain
.intersect(Domain
);
4507 if (StmtDomain
.is_subset(NewStmtDomain
))
4512 NewStmtDomain
= NewStmtDomain
.coalesce();
4514 if (NewStmtDomain
.is_empty())
4515 Stmt
.restrictDomain(isl::set::empty(Stmt
.getDomainSpace()));
4517 Stmt
.restrictDomain(isl::set(NewStmtDomain
));
4522 ScalarEvolution
*Scop::getSE() const { return SE
; }
4524 // Create an isl_multi_union_aff that defines an identity mapping from the
4525 // elements of USet to their N-th dimension.
4529 // Domain: { A[i,j]; B[i,j,k] }
4532 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4534 // @param USet A union set describing the elements for which to generate a
4536 // @param N The dimension to map to.
4537 // @returns A mapping from USet to its N-th dimension.
4538 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4541 assert(!USet
.is_empty());
4543 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4545 for (isl::set S
: USet
.get_set_list()) {
4546 int Dim
= S
.dim(isl::dim::set
);
4547 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4550 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4552 Result
= Result
.add_pw_multi_aff(PMA
);
4555 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4558 void Scop::addScopStmt(BasicBlock
*BB
, StringRef Name
, Loop
*SurroundingLoop
,
4559 std::vector
<Instruction
*> Instructions
) {
4560 assert(BB
&& "Unexpected nullptr!");
4561 Stmts
.emplace_back(*this, *BB
, Name
, SurroundingLoop
, Instructions
);
4562 auto *Stmt
= &Stmts
.back();
4563 StmtMap
[BB
].push_back(Stmt
);
4564 for (Instruction
*Inst
: Instructions
) {
4565 assert(!InstStmtMap
.count(Inst
) &&
4566 "Unexpected statement corresponding to the instruction.");
4567 InstStmtMap
[Inst
] = Stmt
;
4571 void Scop::addScopStmt(Region
*R
, StringRef Name
, Loop
*SurroundingLoop
,
4572 std::vector
<Instruction
*> Instructions
) {
4573 assert(R
&& "Unexpected nullptr!");
4574 Stmts
.emplace_back(*this, *R
, Name
, SurroundingLoop
, Instructions
);
4575 auto *Stmt
= &Stmts
.back();
4577 for (Instruction
*Inst
: Instructions
) {
4578 assert(!InstStmtMap
.count(Inst
) &&
4579 "Unexpected statement corresponding to the instruction.");
4580 InstStmtMap
[Inst
] = Stmt
;
4583 for (BasicBlock
*BB
: R
->blocks()) {
4584 StmtMap
[BB
].push_back(Stmt
);
4585 if (BB
== R
->getEntry())
4587 for (Instruction
&Inst
: *BB
) {
4588 assert(!InstStmtMap
.count(&Inst
) &&
4589 "Unexpected statement corresponding to the instruction.");
4590 InstStmtMap
[&Inst
] = Stmt
;
4595 ScopStmt
*Scop::addScopStmt(isl::map SourceRel
, isl::map TargetRel
,
4598 isl::set SourceDomain
= SourceRel
.domain();
4599 isl::set TargetDomain
= TargetRel
.domain();
4600 assert(Domain
.is_subset(TargetDomain
) &&
4601 "Target access not defined for complete statement domain");
4602 assert(Domain
.is_subset(SourceDomain
) &&
4603 "Source access not defined for complete statement domain");
4605 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4607 return &(Stmts
.back());
4610 void Scop::buildSchedule(LoopInfo
&LI
) {
4611 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4612 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4613 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4614 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4615 Schedule
= LoopStack
[0].Schedule
;
4618 /// To generate a schedule for the elements in a Region we traverse the Region
4619 /// in reverse-post-order and add the contained RegionNodes in traversal order
4620 /// to the schedule of the loop that is currently at the top of the LoopStack.
4621 /// For loop-free codes, this results in a correct sequential ordering.
4632 /// Including loops requires additional processing. Whenever a loop header is
4633 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4634 /// from an empty schedule, we first process all RegionNodes that are within
4635 /// this loop and complete the sequential schedule at this loop-level before
4636 /// processing about any other nodes. To implement this
4637 /// loop-nodes-first-processing, the reverse post-order traversal is
4638 /// insufficient. Hence, we additionally check if the traversal yields
4639 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4640 /// These region-nodes are then queue and only traverse after the all nodes
4641 /// within the current loop have been processed.
4642 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4643 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4645 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4646 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4647 std::deque
<RegionNode
*> DelayList
;
4648 bool LastRNWaiting
= false;
4650 // Iterate over the region @p R in reverse post-order but queue
4651 // sub-regions/blocks iff they are not part of the last encountered but not
4652 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4653 // that we queued the last sub-region/block from the reverse post-order
4654 // iterator. If it is set we have to explore the next sub-region/block from
4655 // the iterator (if any) to guarantee progress. If it is not set we first try
4656 // the next queued sub-region/blocks.
4657 while (!WorkList
.empty() || !DelayList
.empty()) {
4660 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
4661 RN
= WorkList
.front();
4662 WorkList
.pop_front();
4663 LastRNWaiting
= false;
4665 RN
= DelayList
.front();
4666 DelayList
.pop_front();
4669 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4673 Loop
*LastLoop
= LoopStack
.back().L
;
4674 if (LastLoop
!= L
) {
4675 if (LastLoop
&& !LastLoop
->contains(L
)) {
4676 LastRNWaiting
= true;
4677 DelayList
.push_back(RN
);
4680 LoopStack
.push_back({L
, nullptr, 0});
4682 buildSchedule(RN
, LoopStack
, LI
);
4686 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4687 if (RN
->isSubRegion()) {
4688 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
4689 if (!isNonAffineSubRegion(LocalRegion
)) {
4690 buildSchedule(LocalRegion
, LoopStack
, LI
);
4695 assert(LoopStack
.rbegin() != LoopStack
.rend());
4696 auto LoopData
= LoopStack
.rbegin();
4697 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
4699 for (auto *Stmt
: getStmtListFor(RN
)) {
4700 isl::union_set UDomain
{Stmt
->getDomain()};
4701 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
4702 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
4705 // Check if we just processed the last node in this loop. If we did, finalize
4708 // - adding new schedule dimensions
4709 // - folding the resulting schedule into the parent loop schedule
4710 // - dropping the loop schedule from the LoopStack.
4712 // Then continue to check surrounding loops, which might also have been
4713 // completed by this node.
4714 size_t Dimension
= LoopStack
.size();
4715 while (LoopData
->L
&&
4716 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
4717 isl::schedule Schedule
= LoopData
->Schedule
;
4718 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
4720 assert(std::next(LoopData
) != LoopStack
.rend());
4725 isl::union_set Domain
= Schedule
.get_domain();
4726 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
4727 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
4728 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
4731 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
4733 // Now pop all loops processed up there from the LoopStack
4734 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
4737 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(BasicBlock
*BB
) const {
4738 auto StmtMapIt
= StmtMap
.find(BB
);
4739 if (StmtMapIt
== StmtMap
.end())
4741 return StmtMapIt
->second
;
4744 ScopStmt
*Scop::getIncomingStmtFor(const Use
&U
) const {
4745 auto *PHI
= cast
<PHINode
>(U
.getUser());
4746 BasicBlock
*IncomingBB
= PHI
->getIncomingBlock(U
);
4748 // If the value is a non-synthesizable from the incoming block, use the
4749 // statement that contains it as user statement.
4750 if (auto *IncomingInst
= dyn_cast
<Instruction
>(U
.get())) {
4751 if (IncomingInst
->getParent() == IncomingBB
) {
4752 if (ScopStmt
*IncomingStmt
= getStmtFor(IncomingInst
))
4753 return IncomingStmt
;
4757 // Otherwise, use the epilogue/last statement.
4758 return getLastStmtFor(IncomingBB
);
4761 ScopStmt
*Scop::getLastStmtFor(BasicBlock
*BB
) const {
4762 ArrayRef
<ScopStmt
*> StmtList
= getStmtListFor(BB
);
4763 if (!StmtList
.empty())
4764 return StmtList
.back();
4768 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(RegionNode
*RN
) const {
4769 if (RN
->isSubRegion())
4770 return getStmtListFor(RN
->getNodeAs
<Region
>());
4771 return getStmtListFor(RN
->getNodeAs
<BasicBlock
>());
4774 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(Region
*R
) const {
4775 return getStmtListFor(R
->getEntry());
4778 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
4779 if (!L
|| !R
.contains(L
))
4781 // outermostLoopInRegion always returns nullptr for top level regions
4782 if (R
.isTopLevelRegion()) {
4783 // LoopInfo's depths start at 1, we start at 0
4784 return L
->getLoopDepth() - 1;
4786 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
4788 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
4792 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
4793 for (auto &SAI
: arrays()) {
4794 if (SAI
->getName() == BaseName
)
4800 void Scop::addAccessData(MemoryAccess
*Access
) {
4801 const ScopArrayInfo
*SAI
= Access
->getOriginalScopArrayInfo();
4802 assert(SAI
&& "can only use after access relations have been constructed");
4804 if (Access
->isOriginalValueKind() && Access
->isRead())
4805 ValueUseAccs
[SAI
].push_back(Access
);
4806 else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite())
4807 PHIIncomingAccs
[SAI
].push_back(Access
);
4810 void Scop::removeAccessData(MemoryAccess
*Access
) {
4811 if (Access
->isOriginalValueKind() && Access
->isWrite()) {
4812 ValueDefAccs
.erase(Access
->getAccessValue());
4813 } else if (Access
->isOriginalValueKind() && Access
->isRead()) {
4814 auto &Uses
= ValueUseAccs
[Access
->getScopArrayInfo()];
4815 auto NewEnd
= std::remove(Uses
.begin(), Uses
.end(), Access
);
4816 Uses
.erase(NewEnd
, Uses
.end());
4817 } else if (Access
->isOriginalPHIKind() && Access
->isRead()) {
4818 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessInstruction());
4819 PHIReadAccs
.erase(PHI
);
4820 } else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite()) {
4821 auto &Incomings
= PHIIncomingAccs
[Access
->getScopArrayInfo()];
4822 auto NewEnd
= std::remove(Incomings
.begin(), Incomings
.end(), Access
);
4823 Incomings
.erase(NewEnd
, Incomings
.end());
4827 MemoryAccess
*Scop::getValueDef(const ScopArrayInfo
*SAI
) const {
4828 assert(SAI
->isValueKind());
4830 Instruction
*Val
= dyn_cast
<Instruction
>(SAI
->getBasePtr());
4834 return ValueDefAccs
.lookup(Val
);
4837 ArrayRef
<MemoryAccess
*> Scop::getValueUses(const ScopArrayInfo
*SAI
) const {
4838 assert(SAI
->isValueKind());
4839 auto It
= ValueUseAccs
.find(SAI
);
4840 if (It
== ValueUseAccs
.end())
4845 MemoryAccess
*Scop::getPHIRead(const ScopArrayInfo
*SAI
) const {
4846 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4848 if (SAI
->isExitPHIKind())
4851 PHINode
*PHI
= cast
<PHINode
>(SAI
->getBasePtr());
4852 return PHIReadAccs
.lookup(PHI
);
4855 ArrayRef
<MemoryAccess
*> Scop::getPHIIncomings(const ScopArrayInfo
*SAI
) const {
4856 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4857 auto It
= PHIIncomingAccs
.find(SAI
);
4858 if (It
== PHIIncomingAccs
.end())
4863 bool Scop::isEscaping(Instruction
*Inst
) {
4864 assert(contains(Inst
) && "The concept of escaping makes only sense for "
4865 "values defined inside the SCoP");
4867 for (Use
&Use
: Inst
->uses()) {
4868 BasicBlock
*UserBB
= getUseBlock(Use
);
4869 if (!contains(UserBB
))
4872 // When the SCoP region exit needs to be simplified, PHIs in the region exit
4873 // move to a new basic block such that its incoming blocks are not in the
4875 if (hasSingleExitEdge() && isa
<PHINode
>(Use
.getUser()) &&
4876 isExit(cast
<PHINode
>(Use
.getUser())->getParent()))
4882 Scop::ScopStatistics
Scop::getStatistics() const {
4883 ScopStatistics Result
;
4884 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4885 auto LoopStat
= ScopDetection::countBeneficialLoops(&R
, *SE
, *getLI(), 0);
4887 int NumTotalLoops
= LoopStat
.NumLoops
;
4888 Result
.NumBoxedLoops
= getBoxedLoops().size();
4889 Result
.NumAffineLoops
= NumTotalLoops
- Result
.NumBoxedLoops
;
4891 for (const ScopStmt
&Stmt
: *this) {
4892 isl::set Domain
= Stmt
.getDomain().intersect_params(getContext());
4893 bool IsInLoop
= Stmt
.getNumIterators() >= 1;
4894 for (MemoryAccess
*MA
: Stmt
) {
4898 if (MA
->isLatestValueKind()) {
4899 Result
.NumValueWrites
+= 1;
4901 Result
.NumValueWritesInLoops
+= 1;
4904 if (MA
->isLatestAnyPHIKind()) {
4905 Result
.NumPHIWrites
+= 1;
4907 Result
.NumPHIWritesInLoops
+= 1;
4911 MA
->getAccessRelation().intersect_domain(Domain
).range();
4912 if (AccSet
.is_singleton()) {
4913 Result
.NumSingletonWrites
+= 1;
4915 Result
.NumSingletonWritesInLoops
+= 1;
4923 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const Scop
&scop
) {
4924 scop
.print(OS
, PollyPrintInstructions
);
4928 //===----------------------------------------------------------------------===//
4929 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4930 AU
.addRequired
<LoopInfoWrapperPass
>();
4931 AU
.addRequired
<RegionInfoPass
>();
4932 AU
.addRequired
<DominatorTreeWrapperPass
>();
4933 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
4934 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
4935 AU
.addRequired
<AAResultsWrapperPass
>();
4936 AU
.addRequired
<AssumptionCacheTracker
>();
4937 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
4938 AU
.setPreservesAll();
4941 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
,
4942 Scop::ScopStatistics ScopStats
) {
4943 assert(Stats
.NumLoops
== ScopStats
.NumAffineLoops
+ ScopStats
.NumBoxedLoops
);
4946 NumLoopsInScop
+= Stats
.NumLoops
;
4948 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
4950 if (Stats
.MaxDepth
== 0)
4951 NumScopsDepthZero
++;
4952 else if (Stats
.MaxDepth
== 1)
4954 else if (Stats
.MaxDepth
== 2)
4956 else if (Stats
.MaxDepth
== 3)
4957 NumScopsDepthThree
++;
4958 else if (Stats
.MaxDepth
== 4)
4959 NumScopsDepthFour
++;
4960 else if (Stats
.MaxDepth
== 5)
4961 NumScopsDepthFive
++;
4963 NumScopsDepthLarger
++;
4965 NumAffineLoops
+= ScopStats
.NumAffineLoops
;
4966 NumBoxedLoops
+= ScopStats
.NumBoxedLoops
;
4968 NumValueWrites
+= ScopStats
.NumValueWrites
;
4969 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
4970 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
4971 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
4972 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
4973 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
4976 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
4977 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
4979 if (!SD
.isMaxRegionInScop(*R
))
4982 Function
*F
= R
->getEntry()->getParent();
4983 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
4984 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
4985 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
4986 auto const &DL
= F
->getParent()->getDataLayout();
4987 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
4988 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
4989 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
4991 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
4992 S
= SB
.getScop(); // take ownership of scop object
4994 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4996 ScopDetection::LoopStats Stats
=
4997 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
4998 updateLoopCountStatistic(Stats
, S
->getStatistics());
5005 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
5007 S
->print(OS
, PollyPrintInstructions
);
5009 OS
<< "Invalid Scop!\n";
5012 char ScopInfoRegionPass::ID
= 0;
5014 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5016 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
5017 "Polly - Create polyhedral description of Scops", false,
5019 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5020 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5021 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5022 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5023 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5024 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5025 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5026 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
5027 "Polly - Create polyhedral description of Scops", false,
5030 //===----------------------------------------------------------------------===//
5031 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
5032 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
5033 AssumptionCache
&AC
, OptimizationRemarkEmitter
&ORE
)
5034 : DL(DL
), SD(SD
), SE(SE
), LI(LI
), AA(AA
), DT(DT
), AC(AC
), ORE(ORE
) {
5038 void ScopInfo::recompute() {
5039 RegionToScopMap
.clear();
5040 /// Create polyhedral description of scops for all the valid regions of a
5042 for (auto &It
: SD
) {
5043 Region
*R
= const_cast<Region
*>(It
);
5044 if (!SD
.isMaxRegionInScop(*R
))
5047 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5048 std::unique_ptr
<Scop
> S
= SB
.getScop();
5051 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5052 ScopDetection::LoopStats Stats
=
5053 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5054 updateLoopCountStatistic(Stats
, S
->getStatistics());
5056 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5057 assert(Inserted
&& "Building Scop for the same region twice!");
5062 bool ScopInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
5063 FunctionAnalysisManager::Invalidator
&Inv
) {
5064 // Check whether the analysis, all analyses on functions have been preserved
5065 // or anything we're holding references to is being invalidated
5066 auto PAC
= PA
.getChecker
<ScopInfoAnalysis
>();
5067 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>()) ||
5068 Inv
.invalidate
<ScopAnalysis
>(F
, PA
) ||
5069 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
5070 Inv
.invalidate
<LoopAnalysis
>(F
, PA
) ||
5071 Inv
.invalidate
<AAManager
>(F
, PA
) ||
5072 Inv
.invalidate
<DominatorTreeAnalysis
>(F
, PA
) ||
5073 Inv
.invalidate
<AssumptionAnalysis
>(F
, PA
);
5076 AnalysisKey
ScopInfoAnalysis::Key
;
5078 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5079 FunctionAnalysisManager
&FAM
) {
5080 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5081 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5082 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5083 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5084 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5085 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5086 auto &DL
= F
.getParent()->getDataLayout();
5087 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
5088 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
};
5091 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5092 FunctionAnalysisManager
&FAM
) {
5093 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5094 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5095 // order here to keep the output persistent
5096 for (auto &It
: reverse(SI
)) {
5098 It
.second
->print(Stream
, PollyPrintInstructions
);
5100 Stream
<< "Invalid Scop!\n";
5102 return PreservedAnalyses::all();
5105 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5106 AU
.addRequired
<LoopInfoWrapperPass
>();
5107 AU
.addRequired
<RegionInfoPass
>();
5108 AU
.addRequired
<DominatorTreeWrapperPass
>();
5109 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5110 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5111 AU
.addRequired
<AAResultsWrapperPass
>();
5112 AU
.addRequired
<AssumptionCacheTracker
>();
5113 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5114 AU
.setPreservesAll();
5117 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5118 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5119 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5120 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5121 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5122 auto const &DL
= F
.getParent()->getDataLayout();
5123 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5124 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5125 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5127 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
});
5131 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5132 for (auto &It
: *Result
) {
5134 It
.second
->print(OS
, PollyPrintInstructions
);
5136 OS
<< "Invalid Scop!\n";
5140 char ScopInfoWrapperPass::ID
= 0;
5142 Pass
*polly::createScopInfoWrapperPassPass() {
5143 return new ScopInfoWrapperPass();
5146 INITIALIZE_PASS_BEGIN(
5147 ScopInfoWrapperPass
, "polly-function-scops",
5148 "Polly - Create polyhedral description of all Scops of a function", false,
5150 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5151 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5152 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5153 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5154 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5155 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5156 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5157 INITIALIZE_PASS_END(
5158 ScopInfoWrapperPass
, "polly-function-scops",
5159 "Polly - Create polyhedral description of all Scops of a function", false,