1 //===- ScopInfo.cpp -------------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
106 using namespace llvm
;
107 using namespace polly
;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad
,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization
,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops
, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops
, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops
, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
130 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
131 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
132 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
133 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
134 STATISTIC(NumScopsDepthLarger
,
135 "Number of scops with maximal loop depth 6 and larger");
136 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
138 STATISTIC(NumValueWrites
, "Number of scalar value writes after ScopInfo");
140 NumValueWritesInLoops
,
141 "Number of scalar value writes nested in affine loops after ScopInfo");
142 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after ScopInfo");
143 STATISTIC(NumPHIWritesInLoops
,
144 "Number of scalar phi writes nested in affine loops after ScopInfo");
145 STATISTIC(NumSingletonWrites
, "Number of singleton writes after ScopInfo");
146 STATISTIC(NumSingletonWritesInLoops
,
147 "Number of singleton writes nested in affine loops after ScopInfo");
149 // The maximal number of basic sets we allow during domain construction to
150 // be created. More complex scops will result in very high compile time and
151 // are also unlikely to result in good code
152 static int const MaxDisjunctsInDomain
= 20;
154 // The number of disjunct in the context after which we stop to add more
155 // disjuncts. This parameter is there to avoid exponential growth in the
156 // number of disjunct when adding non-convex sets to the context.
157 static int const MaxDisjunctsInContext
= 4;
159 // The maximal number of dimensions we allow during invariant load construction.
160 // More complex access ranges will result in very high compile time and are also
161 // unlikely to result in good code. This value is very high and should only
162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
163 static int const MaxDimensionsInAccessRange
= 9;
166 OptComputeOut("polly-analysis-computeout",
167 cl::desc("Bound the scop analysis by a maximal amount of "
168 "computational steps (0 means no bound)"),
169 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
170 cl::cat(PollyCategory
));
172 static cl::opt
<bool> PollyRemarksMinimal(
173 "polly-remarks-minimal",
174 cl::desc("Do not emit remarks about assumptions that are known"),
175 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
177 // Multiplicative reductions can be disabled separately as these kind of
178 // operations can overflow easily. Additive reductions and bit operations
179 // are in contrast pretty stable.
180 static cl::opt
<bool> DisableMultiplicativeReductions(
181 "polly-disable-multiplicative-reductions",
182 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
183 cl::init(false), cl::cat(PollyCategory
));
185 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
186 "polly-rtc-max-array-disjuncts",
187 cl::desc("The maximal number of disjunts allowed in memory accesses to "
189 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
191 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
192 "polly-rtc-max-parameters",
193 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
194 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
196 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
197 "polly-rtc-max-arrays-per-group",
198 cl::desc("The maximal number of arrays to compare in each alias group."),
199 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
201 static cl::opt
<std::string
> UserContextStr(
202 "polly-context", cl::value_desc("isl parameter set"),
203 cl::desc("Provide additional constraints on the context parameters"),
204 cl::init(""), cl::cat(PollyCategory
));
206 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
207 cl::desc("Detect and exploit reductions"),
208 cl::Hidden
, cl::ZeroOrMore
,
209 cl::init(true), cl::cat(PollyCategory
));
212 IslOnErrorAbort("polly-on-isl-error-abort",
213 cl::desc("Abort if an isl error is encountered"),
214 cl::init(true), cl::cat(PollyCategory
));
216 static cl::opt
<bool> PollyPreciseInbounds(
217 "polly-precise-inbounds",
218 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
219 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
222 PollyIgnoreInbounds("polly-ignore-inbounds",
223 cl::desc("Do not take inbounds assumptions at all"),
224 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
226 static cl::opt
<bool> PollyIgnoreParamBounds(
227 "polly-ignore-parameter-bounds",
229 "Do not add parameter bounds and do no gist simplify sets accordingly"),
230 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
232 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
233 "polly-allow-dereference-of-all-function-parameters",
235 "Treat all parameters to functions that are pointers as dereferencible."
236 " This is useful for invariant load hoisting, since we can generate"
237 " less runtime checks. This is only valid if all pointers to functions"
238 " are always initialized, so that Polly can choose to hoist"
240 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
242 static cl::opt
<bool> PollyPreciseFoldAccesses(
243 "polly-precise-fold-accesses",
244 cl::desc("Fold memory accesses to model more possible delinearizations "
245 "(does not scale well)"),
246 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
248 bool polly::UseInstructionNames
;
250 static cl::opt
<bool, true> XUseInstructionNames(
251 "polly-use-llvm-names",
252 cl::desc("Use LLVM-IR names when deriving statement names"),
253 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
254 cl::ZeroOrMore
, cl::cat(PollyCategory
));
256 static cl::opt
<bool> PollyPrintInstructions(
257 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
258 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
260 //===----------------------------------------------------------------------===//
262 // Create a sequence of two schedules. Either argument may be null and is
263 // interpreted as the empty schedule. Can also return null if both schedules are
265 static __isl_give isl_schedule
*
266 combineInSequence(__isl_take isl_schedule
*Prev
,
267 __isl_take isl_schedule
*Succ
) {
273 return isl_schedule_sequence(Prev
, Succ
);
276 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
277 int dim
, isl::dim type
) {
279 isl::ctx Ctx
= S
.get_ctx();
281 // The upper and lower bound for a parameter value is derived either from
282 // the data type of the parameter or from the - possibly more restrictive -
284 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
285 S
= S
.lower_bound_val(type
, dim
, V
);
286 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
287 S
= S
.upper_bound_val(type
, dim
, V
);
289 if (Range
.isFullSet())
292 if (isl_set_n_basic_set(S
.get()) > MaxDisjunctsInContext
)
295 // In case of signed wrapping, we can refine the set of valid values by
296 // excluding the part not covered by the wrapping range.
297 if (Range
.isSignWrappedSet()) {
298 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
299 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
301 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
303 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
310 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
311 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
315 if (!S
->contains(BasePtrLI
))
318 ScalarEvolution
&SE
= *S
->getSE();
320 auto *OriginBaseSCEV
=
321 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
325 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
326 if (!OriginBaseSCEVUnknown
)
329 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
333 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl::ctx Ctx
,
334 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
335 const DataLayout
&DL
, Scop
*S
,
336 const char *BaseName
)
337 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
338 std::string BasePtrName
=
340 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
341 Kind
== MemoryKind::PHI
? "__phi" : "",
342 UseInstructionNames
);
343 Id
= isl::id::alloc(Ctx
, BasePtrName
, this);
347 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
348 BasePtrOriginSAI
= nullptr;
352 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
353 if (BasePtrOriginSAI
)
354 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
357 ScopArrayInfo::~ScopArrayInfo() = default;
359 isl::space
ScopArrayInfo::getSpace() const {
360 auto Space
= isl::space(Id
.get_ctx(), 0, getNumberOfDimensions());
361 Space
= Space
.set_tuple_id(isl::dim::set
, Id
);
365 bool ScopArrayInfo::isReadOnly() {
366 isl::union_set WriteSet
= S
.getWrites().range();
367 isl::space Space
= getSpace();
368 WriteSet
= WriteSet
.extract_set(Space
);
370 return bool(WriteSet
.is_empty());
373 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
374 if (Array
->getElementType() != getElementType())
377 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
380 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
381 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
387 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
388 if (NewElementType
== ElementType
)
391 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
392 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
394 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
397 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
398 ElementType
= NewElementType
;
400 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
401 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
405 /// Make the ScopArrayInfo model a Fortran Array
406 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
407 assert(FAD
&& "got invalid Fortran array descriptor");
409 assert(this->FAD
== FAD
&&
410 "receiving different array descriptors for same array");
414 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
418 isl::space
Space(S
.getIslCtx(), 1, 0);
420 std::string param_name
= getName();
421 param_name
+= "_fortranarr_size";
422 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
, this);
424 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
426 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
428 DimensionSizesPw
[0] = PwAff
;
431 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
432 bool CheckConsistency
) {
433 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
434 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
435 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
437 if (CheckConsistency
) {
438 for (int i
= 0; i
< SharedDims
; i
++) {
439 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
440 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
441 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
445 if (DimensionSizes
.size() >= NewSizes
.size())
449 DimensionSizes
.clear();
450 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
452 DimensionSizesPw
.clear();
453 for (const SCEV
*Expr
: DimensionSizes
) {
455 DimensionSizesPw
.push_back(nullptr);
458 isl::pw_aff Size
= S
.getPwAffOnly(Expr
);
459 DimensionSizesPw
.push_back(Size
);
464 std::string
ScopArrayInfo::getName() const { return Id
.get_name(); }
466 int ScopArrayInfo::getElemSizeInBytes() const {
467 return DL
.getTypeAllocSize(ElementType
);
470 isl::id
ScopArrayInfo::getBasePtrId() const { return Id
; }
472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
473 LLVM_DUMP_METHOD
void ScopArrayInfo::dump() const { print(errs()); }
476 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
477 OS
.indent(8) << *getElementType() << " " << getName();
479 // If this is a Fortran array, then we can print the outermost dimension
480 // as a isl_pw_aff even though there is no SCEV information.
481 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
483 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
484 !getDimensionSize(0)) {
488 for (; u
< getNumberOfDimensions(); u
++) {
492 isl::pw_aff Size
= getDimensionSizePw(u
);
493 OS
<< " " << Size
<< " ";
495 OS
<< *getDimensionSize(u
);
503 if (BasePtrOriginSAI
)
504 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
506 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
509 const ScopArrayInfo
*
510 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA
) {
511 isl::id Id
= PMA
.get_tuple_id(isl::dim::out
);
512 assert(!Id
.is_null() && "Output dimension didn't have an ID");
513 return getFromId(Id
);
516 const ScopArrayInfo
*ScopArrayInfo::getFromId(isl::id Id
) {
517 void *User
= Id
.get_user();
518 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
522 void MemoryAccess::wrapConstantDimensions() {
523 auto *SAI
= getScopArrayInfo();
524 isl::space ArraySpace
= SAI
->getSpace();
525 isl::ctx Ctx
= ArraySpace
.get_ctx();
526 unsigned DimsArray
= SAI
->getNumberOfDimensions();
528 isl::multi_aff DivModAff
= isl::multi_aff::identity(
529 ArraySpace
.map_from_domain_and_range(ArraySpace
));
530 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
532 // Begin with last dimension, to iteratively carry into higher dimensions.
533 for (int i
= DimsArray
- 1; i
> 0; i
--) {
534 auto *DimSize
= SAI
->getDimensionSize(i
);
535 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
537 // This transformation is not applicable to dimensions with dynamic size.
541 // This transformation is not applicable to dimensions of size zero.
542 if (DimSize
->isZero())
545 isl::val DimSizeVal
=
546 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
547 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
549 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
551 // Compute: index % size
552 // Modulo must apply in the divide of the previous iteration, if any.
553 isl::aff Modulo
= Var
.mod(DimSizeVal
);
554 Modulo
= Modulo
.pullback(DivModAff
);
556 // Compute: floor(index / size)
557 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
558 Divide
= Divide
.floor();
559 Divide
= Divide
.add(PrevVar
);
560 Divide
= Divide
.pullback(DivModAff
);
562 // Apply Modulo and Divide.
563 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
564 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
567 // Apply all modulo/divides on the accesses.
568 isl::map Relation
= AccessRelation
;
569 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
570 Relation
= Relation
.detect_equalities();
571 AccessRelation
= Relation
;
574 void MemoryAccess::updateDimensionality() {
575 auto *SAI
= getScopArrayInfo();
576 isl::space ArraySpace
= SAI
->getSpace();
577 isl::space AccessSpace
= AccessRelation
.get_space().range();
578 isl::ctx Ctx
= ArraySpace
.get_ctx();
580 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
581 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
582 auto DimsMissing
= DimsArray
- DimsAccess
;
584 auto *BB
= getStatement()->getEntryBlock();
585 auto &DL
= BB
->getModule()->getDataLayout();
586 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
587 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
589 isl::map Map
= isl::map::from_domain_and_range(
590 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
592 for (unsigned i
= 0; i
< DimsMissing
; i
++)
593 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
595 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
596 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
598 AccessRelation
= AccessRelation
.apply_range(Map
);
600 // For the non delinearized arrays, divide the access function of the last
601 // subscript by the size of the elements in the array.
603 // A stride one array access in C expressed as A[i] is expressed in
604 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
605 // two subsequent values of 'i' index two values that are stored next to
606 // each other in memory. By this division we make this characteristic
607 // obvious again. If the base pointer was accessed with offsets not divisible
608 // by the accesses element size, we will have chosen a smaller ArrayElemSize
609 // that divides the offsets of all accesses to this base pointer.
610 if (DimsAccess
== 1) {
611 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
612 AccessRelation
= AccessRelation
.floordiv_val(V
);
615 // We currently do this only if we added at least one dimension, which means
616 // some dimension's indices have not been specified, an indicator that some
617 // index values have been added together.
618 // TODO: Investigate general usefulness; Effect on unit tests is to make index
619 // expressions more complicated.
621 wrapConstantDimensions();
624 computeBoundsOnAccessRelation(ArrayElemSize
);
626 // Introduce multi-element accesses in case the type loaded by this memory
627 // access is larger than the canonical element type of the array.
629 // An access ((float *)A)[i] to an array char *A is modeled as
630 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
631 if (ElemBytes
> ArrayElemSize
) {
632 assert(ElemBytes
% ArrayElemSize
== 0 &&
633 "Loaded element size should be multiple of canonical element size");
634 isl::map Map
= isl::map::from_domain_and_range(
635 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
636 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
637 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
642 LS
= isl::local_space(Map
.get_space());
643 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
645 C
= isl::constraint::alloc_inequality(LS
);
646 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
647 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
648 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
649 Map
= Map
.add_constraint(C
);
651 C
= isl::constraint::alloc_inequality(LS
);
652 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
653 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
654 C
= C
.set_constant_val(isl::val(Ctx
, 0));
655 Map
= Map
.add_constraint(C
);
656 AccessRelation
= AccessRelation
.apply_range(Map
);
661 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
663 case MemoryAccess::RT_NONE
:
664 llvm_unreachable("Requested a reduction operator string for a memory "
665 "access which isn't a reduction");
666 case MemoryAccess::RT_ADD
:
668 case MemoryAccess::RT_MUL
:
670 case MemoryAccess::RT_BOR
:
672 case MemoryAccess::RT_BXOR
:
674 case MemoryAccess::RT_BAND
:
677 llvm_unreachable("Unknown reduction type");
680 /// Return the reduction type for a given binary operator.
681 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
682 const Instruction
*Load
) {
684 return MemoryAccess::RT_NONE
;
685 switch (BinOp
->getOpcode()) {
686 case Instruction::FAdd
:
687 if (!BinOp
->hasUnsafeAlgebra())
688 return MemoryAccess::RT_NONE
;
690 case Instruction::Add
:
691 return MemoryAccess::RT_ADD
;
692 case Instruction::Or
:
693 return MemoryAccess::RT_BOR
;
694 case Instruction::Xor
:
695 return MemoryAccess::RT_BXOR
;
696 case Instruction::And
:
697 return MemoryAccess::RT_BAND
;
698 case Instruction::FMul
:
699 if (!BinOp
->hasUnsafeAlgebra())
700 return MemoryAccess::RT_NONE
;
702 case Instruction::Mul
:
703 if (DisableMultiplicativeReductions
)
704 return MemoryAccess::RT_NONE
;
705 return MemoryAccess::RT_MUL
;
707 return MemoryAccess::RT_NONE
;
711 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
712 isl::id ArrayId
= getArrayId();
713 void *User
= ArrayId
.get_user();
714 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
718 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
719 isl::id ArrayId
= getLatestArrayId();
720 void *User
= ArrayId
.get_user();
721 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
725 isl::id
MemoryAccess::getOriginalArrayId() const {
726 return AccessRelation
.get_tuple_id(isl::dim::out
);
729 isl::id
MemoryAccess::getLatestArrayId() const {
730 if (!hasNewAccessRelation())
731 return getOriginalArrayId();
732 return NewAccessRelation
.get_tuple_id(isl::dim::out
);
735 isl::map
MemoryAccess::getAddressFunction() const {
736 return getAccessRelation().lexmin();
740 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule
) const {
741 isl::map Schedule
, ScheduledAccRel
;
742 isl::union_set UDomain
;
744 UDomain
= getStatement()->getDomain();
745 USchedule
= USchedule
.intersect_domain(UDomain
);
746 Schedule
= isl::map::from_union_map(USchedule
);
747 ScheduledAccRel
= getAddressFunction().apply_domain(Schedule
);
748 return isl::pw_multi_aff::from_map(ScheduledAccRel
);
751 isl::map
MemoryAccess::getOriginalAccessRelation() const {
752 return AccessRelation
;
755 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
756 return stringFromIslObj(AccessRelation
.get());
759 isl::space
MemoryAccess::getOriginalAccessRelationSpace() const {
760 return AccessRelation
.get_space();
763 isl::map
MemoryAccess::getNewAccessRelation() const {
764 return NewAccessRelation
;
767 std::string
MemoryAccess::getNewAccessRelationStr() const {
768 return stringFromIslObj(NewAccessRelation
.get());
771 std::string
MemoryAccess::getAccessRelationStr() const {
772 return getAccessRelation().to_str();
775 isl::basic_map
MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
776 isl::space Space
= isl::space(Statement
->getIslCtx(), 0, 1);
777 Space
= Space
.align_params(Statement
->getDomainSpace());
779 return isl::basic_map::from_domain_and_range(
780 isl::basic_set::universe(Statement
->getDomainSpace()),
781 isl::basic_set::universe(Space
));
784 // Formalize no out-of-bound access assumption
786 // When delinearizing array accesses we optimistically assume that the
787 // delinearized accesses do not access out of bound locations (the subscript
788 // expression of each array evaluates for each statement instance that is
789 // executed to a value that is larger than zero and strictly smaller than the
790 // size of the corresponding dimension). The only exception is the outermost
791 // dimension for which we do not need to assume any upper bound. At this point
792 // we formalize this assumption to ensure that at code generation time the
793 // relevant run-time checks can be generated.
795 // To find the set of constraints necessary to avoid out of bound accesses, we
796 // first build the set of data locations that are not within array bounds. We
797 // then apply the reverse access relation to obtain the set of iterations that
798 // may contain invalid accesses and reduce this set of iterations to the ones
799 // that are actually executed by intersecting them with the domain of the
800 // statement. If we now project out all loop dimensions, we obtain a set of
801 // parameters that may cause statement instances to be executed that may
802 // possibly yield out of bound memory accesses. The complement of these
803 // constraints is the set of constraints that needs to be assumed to ensure such
804 // statement instances are never executed.
805 void MemoryAccess::assumeNoOutOfBound() {
806 if (PollyIgnoreInbounds
)
808 auto *SAI
= getScopArrayInfo();
809 isl::space Space
= getOriginalAccessRelationSpace().range();
810 isl::set Outside
= isl::set::empty(Space
);
811 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
812 isl::local_space
LS(Space
);
813 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
814 isl::pw_aff Zero
= isl::pw_aff(LS
);
816 isl::set DimOutside
= Var
.lt_set(Zero
);
817 isl::pw_aff SizeE
= SAI
->getDimensionSizePw(i
);
818 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
819 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
820 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
822 Outside
= Outside
.unite(DimOutside
);
825 Outside
= Outside
.apply(getAccessRelation().reverse());
826 Outside
= Outside
.intersect(Statement
->getDomain());
827 Outside
= Outside
.params();
829 // Remove divs to avoid the construction of overly complicated assumptions.
830 // Doing so increases the set of parameter combinations that are assumed to
831 // not appear. This is always save, but may make the resulting run-time check
832 // bail out more often than strictly necessary.
833 Outside
= Outside
.remove_divs();
834 Outside
= Outside
.complement();
835 const auto &Loc
= getAccessInstruction()
836 ? getAccessInstruction()->getDebugLoc()
838 if (!PollyPreciseInbounds
)
839 Outside
= Outside
.gist_params(Statement
->getDomain().params());
840 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
.release(), Loc
,
844 void MemoryAccess::buildMemIntrinsicAccessRelation() {
845 assert(isMemoryIntrinsic());
846 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
848 isl::pw_aff SubscriptPWA
= getPwAff(Subscripts
[0]);
849 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
852 if (Subscripts
[1] == nullptr) {
853 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
855 isl::pw_aff LengthPWA
= getPwAff(Subscripts
[1]);
856 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
857 isl::space RangeSpace
= LengthMap
.get_space().range();
858 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
860 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
861 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
862 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
863 LengthMap
= LengthMap
.sum(SubscriptMap
);
865 LengthMap
.set_tuple_id(isl::dim::in
, getStatement()->getDomainId());
868 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
869 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
871 auto MAI
= MemAccInst(getAccessInstruction());
872 if (isa
<MemIntrinsic
>(MAI
))
875 Value
*Ptr
= MAI
.getPointerOperand();
876 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
879 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
880 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
883 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
884 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
885 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
887 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
888 if (Range
.isFullSet())
891 if (Range
.isWrappedSet() || Range
.isSignWrappedSet())
894 bool isWrapping
= Range
.isSignWrappedSet();
896 unsigned BW
= Range
.getBitWidth();
897 const auto One
= APInt(BW
, 1);
898 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
899 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
901 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
902 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
904 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
906 isl::map Relation
= AccessRelation
;
907 isl::set AccessRange
= Relation
.range();
908 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
910 AccessRelation
= Relation
.intersect_range(AccessRange
);
913 void MemoryAccess::foldAccessRelation() {
914 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
917 int Size
= Subscripts
.size();
919 isl::map NewAccessRelation
= AccessRelation
;
921 for (int i
= Size
- 2; i
>= 0; --i
) {
923 isl::map MapOne
, MapTwo
;
924 isl::pw_aff DimSize
= getPwAff(Sizes
[i
+ 1]);
926 isl::space SpaceSize
= DimSize
.get_space();
928 give(isl_space_get_dim_id(SpaceSize
.get(), isl_dim_param
, 0));
930 Space
= AccessRelation
.get_space();
931 Space
= Space
.range().map_from_set();
932 Space
= Space
.align_params(SpaceSize
);
934 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
936 MapOne
= isl::map::universe(Space
);
937 for (int j
= 0; j
< Size
; ++j
)
938 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
939 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
941 MapTwo
= isl::map::universe(Space
);
942 for (int j
= 0; j
< Size
; ++j
)
943 if (j
< i
|| j
> i
+ 1)
944 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
946 isl::local_space
LS(Space
);
948 C
= isl::constraint::alloc_equality(LS
);
949 C
= C
.set_constant_si(-1);
950 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
951 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
952 MapTwo
= MapTwo
.add_constraint(C
);
953 C
= isl::constraint::alloc_equality(LS
);
954 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
955 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
956 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
957 MapTwo
= MapTwo
.add_constraint(C
);
958 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
960 MapOne
= MapOne
.unite(MapTwo
);
961 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
964 isl::id BaseAddrId
= getScopArrayInfo()->getBasePtrId();
965 isl::space Space
= Statement
->getDomainSpace();
966 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
967 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
968 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
969 NewAccessRelation
= NewAccessRelation
.gist_domain(Statement
->getDomain());
971 // Access dimension folding might in certain cases increase the number of
972 // disjuncts in the memory access, which can possibly complicate the generated
973 // run-time checks and can lead to costly compilation.
974 if (!PollyPreciseFoldAccesses
&&
975 isl_map_n_basic_map(NewAccessRelation
.get()) >
976 isl_map_n_basic_map(AccessRelation
.get())) {
978 AccessRelation
= NewAccessRelation
;
982 /// Check if @p Expr is divisible by @p Size.
983 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
988 // Only one factor needs to be divisible.
989 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
990 for (auto *FactorExpr
: MulExpr
->operands())
991 if (isDivisible(FactorExpr
, Size
, SE
))
996 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
998 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
999 for (auto *OpExpr
: NAryExpr
->operands())
1000 if (!isDivisible(OpExpr
, Size
, SE
))
1005 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
1006 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
1007 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
1008 return MulSCEV
== Expr
;
1011 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
1012 assert(AccessRelation
.is_null() && "AccessRelation already built");
1014 // Initialize the invalid domain which describes all iterations for which the
1015 // access relation is not modeled correctly.
1016 isl::set StmtInvalidDomain
= getStatement()->getInvalidDomain();
1017 InvalidDomain
= isl::set::empty(StmtInvalidDomain
.get_space());
1019 isl::ctx Ctx
= Id
.get_ctx();
1020 isl::id BaseAddrId
= SAI
->getBasePtrId();
1022 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
1023 buildMemIntrinsicAccessRelation();
1024 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1029 // We overapproximate non-affine accesses with a possible access to the
1030 // whole array. For read accesses it does not make a difference, if an
1031 // access must or may happen. However, for write accesses it is important to
1032 // differentiate between writes that must happen and writes that may happen.
1033 if (AccessRelation
.is_null())
1034 AccessRelation
= createBasicAccessMap(Statement
);
1036 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1040 isl::space Space
= isl::space(Ctx
, 0, Statement
->getNumIterators(), 0);
1041 AccessRelation
= isl::map::universe(Space
);
1043 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
1044 isl::pw_aff Affine
= getPwAff(Subscripts
[i
]);
1045 isl::map SubscriptMap
= isl::map::from_pw_aff(Affine
);
1046 AccessRelation
= AccessRelation
.flat_range_product(SubscriptMap
);
1049 Space
= Statement
->getDomainSpace();
1050 AccessRelation
= AccessRelation
.set_tuple_id(
1051 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
1052 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1054 AccessRelation
= AccessRelation
.gist_domain(Statement
->getDomain());
1057 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1058 AccessType AccType
, Value
*BaseAddress
,
1059 Type
*ElementType
, bool Affine
,
1060 ArrayRef
<const SCEV
*> Subscripts
,
1061 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1063 : Kind(Kind
), AccType(AccType
), Statement(Stmt
), InvalidDomain(nullptr),
1064 BaseAddr(BaseAddress
), ElementType(ElementType
),
1065 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1066 AccessValue(AccessValue
), IsAffine(Affine
),
1067 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1068 NewAccessRelation(nullptr), FAD(nullptr) {
1069 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1070 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1072 std::string IdName
= Stmt
->getBaseName() + Access
;
1073 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1076 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
, isl::map AccRel
)
1077 : Kind(MemoryKind::Array
), AccType(AccType
), Statement(Stmt
),
1078 InvalidDomain(nullptr), AccessRelation(nullptr),
1079 NewAccessRelation(AccRel
), FAD(nullptr) {
1080 isl::id ArrayInfoId
= NewAccessRelation
.get_tuple_id(isl::dim::out
);
1081 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1082 Sizes
.push_back(nullptr);
1083 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1084 Sizes
.push_back(SAI
->getDimensionSize(i
));
1085 ElementType
= SAI
->getElementType();
1086 BaseAddr
= SAI
->getBasePtr();
1087 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1088 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1090 std::string IdName
= Stmt
->getBaseName() + Access
;
1091 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1094 MemoryAccess::~MemoryAccess() = default;
1096 void MemoryAccess::realignParams() {
1097 isl::set Ctx
= Statement
->getParent()->getContext();
1098 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1099 AccessRelation
= AccessRelation
.gist_params(Ctx
);
1102 const std::string
MemoryAccess::getReductionOperatorStr() const {
1103 return MemoryAccess::getReductionOperatorStr(getReductionType());
1106 isl::id
MemoryAccess::getId() const { return Id
; }
1108 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1109 MemoryAccess::ReductionType RT
) {
1110 if (RT
== MemoryAccess::RT_NONE
)
1113 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1117 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1119 void MemoryAccess::print(raw_ostream
&OS
) const {
1122 OS
.indent(12) << "ReadAccess :=\t";
1125 OS
.indent(12) << "MustWriteAccess :=\t";
1128 OS
.indent(12) << "MayWriteAccess :=\t";
1132 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1135 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1139 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1140 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1141 if (hasNewAccessRelation())
1142 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1145 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1146 LLVM_DUMP_METHOD
void MemoryAccess::dump() const { print(errs()); }
1149 isl::pw_aff
MemoryAccess::getPwAff(const SCEV
*E
) {
1150 auto *Stmt
= getStatement();
1151 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1152 isl::set StmtDom
= getStatement()->getDomain();
1153 StmtDom
= StmtDom
.reset_tuple_id();
1154 isl::set NewInvalidDom
= StmtDom
.intersect(isl::manage(PWAC
.second
));
1155 InvalidDomain
= InvalidDomain
.unite(NewInvalidDom
);
1156 return isl::manage(PWAC
.first
);
1159 // Create a map in the size of the provided set domain, that maps from the
1160 // one element of the provided set domain to another element of the provided
1162 // The mapping is limited to all points that are equal in all but the last
1163 // dimension and for which the last dimension of the input is strict smaller
1164 // than the last dimension of the output.
1166 // getEqualAndLarger(set[i0, i1, ..., iX]):
1168 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1169 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1171 static isl::map
getEqualAndLarger(isl::space SetDomain
) {
1172 isl::space Space
= SetDomain
.map_from_set();
1173 isl::map Map
= isl::map::universe(Space
);
1174 unsigned lastDimension
= Map
.dim(isl::dim::in
) - 1;
1176 // Set all but the last dimension to be equal for the input and output
1178 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1179 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1180 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1181 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
1183 // Set the last dimension of the input to be strict smaller than the
1184 // last dimension of the output.
1186 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1187 Map
= Map
.order_lt(isl::dim::in
, lastDimension
, isl::dim::out
, lastDimension
);
1191 isl::set
MemoryAccess::getStride(isl::map Schedule
) const {
1192 isl::map AccessRelation
= getAccessRelation();
1193 isl::space Space
= Schedule
.get_space().range();
1194 isl::map NextScatt
= getEqualAndLarger(Space
);
1196 Schedule
= Schedule
.reverse();
1197 NextScatt
= NextScatt
.lexmin();
1199 NextScatt
= NextScatt
.apply_range(Schedule
);
1200 NextScatt
= NextScatt
.apply_range(AccessRelation
);
1201 NextScatt
= NextScatt
.apply_domain(Schedule
);
1202 NextScatt
= NextScatt
.apply_domain(AccessRelation
);
1204 isl::set Deltas
= NextScatt
.deltas();
1208 bool MemoryAccess::isStrideX(isl::map Schedule
, int StrideWidth
) const {
1209 isl::set Stride
, StrideX
;
1212 Stride
= getStride(Schedule
);
1213 StrideX
= isl::set::universe(Stride
.get_space());
1214 for (unsigned i
= 0; i
< StrideX
.dim(isl::dim::set
) - 1; i
++)
1215 StrideX
= StrideX
.fix_si(isl::dim::set
, i
, 0);
1216 StrideX
= StrideX
.fix_si(isl::dim::set
, StrideX
.dim(isl::dim::set
) - 1,
1218 IsStrideX
= Stride
.is_subset(StrideX
);
1223 bool MemoryAccess::isStrideZero(isl::map Schedule
) const {
1224 return isStrideX(Schedule
, 0);
1227 bool MemoryAccess::isStrideOne(isl::map Schedule
) const {
1228 return isStrideX(Schedule
, 1);
1231 void MemoryAccess::setAccessRelation(isl::map NewAccess
) {
1232 AccessRelation
= NewAccess
;
1235 void MemoryAccess::setNewAccessRelation(isl::map NewAccess
) {
1239 // Check domain space compatibility.
1240 isl::space NewSpace
= NewAccess
.get_space();
1241 isl::space NewDomainSpace
= NewSpace
.domain();
1242 isl::space OriginalDomainSpace
= getStatement()->getDomainSpace();
1243 assert(OriginalDomainSpace
.has_equal_tuples(NewDomainSpace
));
1245 // Reads must be executed unconditionally. Writes might be executed in a
1248 // Check whether there is an access for every statement instance.
1249 isl::set StmtDomain
= getStatement()->getDomain();
1251 StmtDomain
.intersect_params(getStatement()->getParent()->getContext());
1252 isl::set NewDomain
= NewAccess
.domain();
1253 assert(StmtDomain
.is_subset(NewDomain
) &&
1254 "Partial READ accesses not supported");
1257 isl::space NewAccessSpace
= NewAccess
.get_space();
1258 assert(NewAccessSpace
.has_tuple_id(isl::dim::set
) &&
1259 "Must specify the array that is accessed");
1260 isl::id NewArrayId
= NewAccessSpace
.get_tuple_id(isl::dim::set
);
1261 auto *SAI
= static_cast<ScopArrayInfo
*>(NewArrayId
.get_user());
1262 assert(SAI
&& "Must set a ScopArrayInfo");
1264 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1265 InvariantEquivClassTy
*EqClass
=
1266 getStatement()->getParent()->lookupInvariantEquivClass(
1269 "Access functions to indirect arrays must have an invariant and "
1270 "hoisted base pointer");
1273 // Check whether access dimensions correspond to number of dimensions of the
1275 auto Dims
= SAI
->getNumberOfDimensions();
1276 assert(NewAccessSpace
.dim(isl::dim::set
) == Dims
&&
1277 "Access dims must match array dims");
1280 NewAccess
= NewAccess
.gist_domain(getStatement()->getDomain());
1281 NewAccessRelation
= NewAccess
;
1284 bool MemoryAccess::isLatestPartialAccess() const {
1285 isl::set StmtDom
= getStatement()->getDomain();
1286 isl::set AccDom
= getLatestAccessRelation().domain();
1288 return isl_set_is_subset(StmtDom
.keep(), AccDom
.keep()) == isl_bool_false
;
1291 //===----------------------------------------------------------------------===//
1293 isl::map
ScopStmt::getSchedule() const {
1294 isl::set Domain
= getDomain();
1295 if (Domain
.is_empty())
1296 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1297 auto Schedule
= getParent()->getSchedule();
1300 Schedule
= Schedule
.intersect_domain(isl::union_set(Domain
));
1301 if (Schedule
.is_empty())
1302 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1303 isl::map M
= M
.from_union_map(Schedule
);
1305 M
= M
.gist_domain(Domain
);
1310 void ScopStmt::restrictDomain(isl::set NewDomain
) {
1311 assert(NewDomain
.is_subset(Domain
) &&
1312 "New domain is not a subset of old domain!");
1316 void ScopStmt::buildAccessRelations() {
1317 Scop
&S
= *getParent();
1318 for (MemoryAccess
*Access
: MemAccs
) {
1319 Type
*ElementType
= Access
->getElementType();
1322 if (Access
->isPHIKind())
1323 Ty
= MemoryKind::PHI
;
1324 else if (Access
->isExitPHIKind())
1325 Ty
= MemoryKind::ExitPHI
;
1326 else if (Access
->isValueKind())
1327 Ty
= MemoryKind::Value
;
1329 Ty
= MemoryKind::Array
;
1331 auto *SAI
= S
.getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1332 ElementType
, Access
->Sizes
, Ty
);
1333 Access
->buildAccessRelation(SAI
);
1334 S
.addAccessData(Access
);
1338 void ScopStmt::addAccess(MemoryAccess
*Access
, bool Prepend
) {
1339 Instruction
*AccessInst
= Access
->getAccessInstruction();
1341 if (Access
->isArrayKind()) {
1342 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1343 MAL
.emplace_front(Access
);
1344 } else if (Access
->isValueKind() && Access
->isWrite()) {
1345 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1346 assert(!ValueWrites
.lookup(AccessVal
));
1348 ValueWrites
[AccessVal
] = Access
;
1349 } else if (Access
->isValueKind() && Access
->isRead()) {
1350 Value
*AccessVal
= Access
->getAccessValue();
1351 assert(!ValueReads
.lookup(AccessVal
));
1353 ValueReads
[AccessVal
] = Access
;
1354 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1355 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1356 assert(!PHIWrites
.lookup(PHI
));
1358 PHIWrites
[PHI
] = Access
;
1359 } else if (Access
->isAnyPHIKind() && Access
->isRead()) {
1360 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1361 assert(!PHIReads
.lookup(PHI
));
1363 PHIReads
[PHI
] = Access
;
1367 MemAccs
.insert(MemAccs
.begin(), Access
);
1370 MemAccs
.push_back(Access
);
1373 void ScopStmt::realignParams() {
1374 for (MemoryAccess
*MA
: *this)
1375 MA
->realignParams();
1377 isl::set Ctx
= Parent
.getContext();
1378 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1379 Domain
= Domain
.gist_params(Ctx
);
1382 /// Add @p BSet to the set @p User if @p BSet is bounded.
1383 static isl_stat
collectBoundedParts(__isl_take isl_basic_set
*BSet
,
1385 isl_set
**BoundedParts
= static_cast<isl_set
**>(User
);
1386 if (isl_basic_set_is_bounded(BSet
))
1387 *BoundedParts
= isl_set_union(*BoundedParts
, isl_set_from_basic_set(BSet
));
1389 isl_basic_set_free(BSet
);
1393 /// Return the bounded parts of @p S.
1394 static __isl_give isl_set
*collectBoundedParts(__isl_take isl_set
*S
) {
1395 isl_set
*BoundedParts
= isl_set_empty(isl_set_get_space(S
));
1396 isl_set_foreach_basic_set(S
, collectBoundedParts
, &BoundedParts
);
1398 return BoundedParts
;
1401 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1403 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1404 /// both with regards to the dimension @p Dim.
1405 static std::pair
<__isl_give isl_set
*, __isl_give isl_set
*>
1406 partitionSetParts(__isl_take isl_set
*S
, unsigned Dim
) {
1407 for (unsigned u
= 0, e
= isl_set_n_dim(S
); u
< e
; u
++)
1408 S
= isl_set_lower_bound_si(S
, isl_dim_set
, u
, 0);
1410 unsigned NumDimsS
= isl_set_n_dim(S
);
1411 isl_set
*OnlyDimS
= isl_set_copy(S
);
1413 // Remove dimensions that are greater than Dim as they are not interesting.
1414 assert(NumDimsS
>= Dim
+ 1);
1416 isl_set_project_out(OnlyDimS
, isl_dim_set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1418 // Create artificial parametric upper bounds for dimensions smaller than Dim
1419 // as we are not interested in them.
1420 OnlyDimS
= isl_set_insert_dims(OnlyDimS
, isl_dim_param
, 0, Dim
);
1421 for (unsigned u
= 0; u
< Dim
; u
++) {
1422 isl_constraint
*C
= isl_inequality_alloc(
1423 isl_local_space_from_space(isl_set_get_space(OnlyDimS
)));
1424 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, u
, 1);
1425 C
= isl_constraint_set_coefficient_si(C
, isl_dim_set
, u
, -1);
1426 OnlyDimS
= isl_set_add_constraint(OnlyDimS
, C
);
1429 // Collect all bounded parts of OnlyDimS.
1430 isl_set
*BoundedParts
= collectBoundedParts(OnlyDimS
);
1432 // Create the dimensions greater than Dim again.
1433 BoundedParts
= isl_set_insert_dims(BoundedParts
, isl_dim_set
, Dim
+ 1,
1434 NumDimsS
- Dim
- 1);
1436 // Remove the artificial upper bound parameters again.
1437 BoundedParts
= isl_set_remove_dims(BoundedParts
, isl_dim_param
, 0, Dim
);
1439 isl_set
*UnboundedParts
= isl_set_subtract(S
, isl_set_copy(BoundedParts
));
1440 return std::make_pair(UnboundedParts
, BoundedParts
);
1443 /// Set the dimension Ids from @p From in @p To.
1444 static __isl_give isl_set
*setDimensionIds(__isl_keep isl_set
*From
,
1445 __isl_take isl_set
*To
) {
1446 for (unsigned u
= 0, e
= isl_set_n_dim(From
); u
< e
; u
++) {
1447 isl_id
*DimId
= isl_set_get_dim_id(From
, isl_dim_set
, u
);
1448 To
= isl_set_set_dim_id(To
, isl_dim_set
, u
, DimId
);
1453 /// Create the conditions under which @p L @p Pred @p R is true.
1454 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1455 __isl_take isl_pw_aff
*L
,
1456 __isl_take isl_pw_aff
*R
) {
1458 case ICmpInst::ICMP_EQ
:
1459 return isl_pw_aff_eq_set(L
, R
);
1460 case ICmpInst::ICMP_NE
:
1461 return isl_pw_aff_ne_set(L
, R
);
1462 case ICmpInst::ICMP_SLT
:
1463 return isl_pw_aff_lt_set(L
, R
);
1464 case ICmpInst::ICMP_SLE
:
1465 return isl_pw_aff_le_set(L
, R
);
1466 case ICmpInst::ICMP_SGT
:
1467 return isl_pw_aff_gt_set(L
, R
);
1468 case ICmpInst::ICMP_SGE
:
1469 return isl_pw_aff_ge_set(L
, R
);
1470 case ICmpInst::ICMP_ULT
:
1471 return isl_pw_aff_lt_set(L
, R
);
1472 case ICmpInst::ICMP_UGT
:
1473 return isl_pw_aff_gt_set(L
, R
);
1474 case ICmpInst::ICMP_ULE
:
1475 return isl_pw_aff_le_set(L
, R
);
1476 case ICmpInst::ICMP_UGE
:
1477 return isl_pw_aff_ge_set(L
, R
);
1479 llvm_unreachable("Non integer predicate not supported");
1483 /// Create the conditions under which @p L @p Pred @p R is true.
1485 /// Helper function that will make sure the dimensions of the result have the
1486 /// same isl_id's as the @p Domain.
1487 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1488 __isl_take isl_pw_aff
*L
,
1489 __isl_take isl_pw_aff
*R
,
1490 __isl_keep isl_set
*Domain
) {
1491 isl_set
*ConsequenceCondSet
= buildConditionSet(Pred
, L
, R
);
1492 return setDimensionIds(Domain
, ConsequenceCondSet
);
1495 /// Compute the isl representation for the SCEV @p E in this BB.
1497 /// @param S The Scop in which @p BB resides in.
1498 /// @param BB The BB for which isl representation is to be
1500 /// @param InvalidDomainMap A map of BB to their invalid domains.
1501 /// @param E The SCEV that should be translated.
1502 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1504 /// Note that this function will also adjust the invalid context accordingly.
1506 __isl_give isl_pw_aff
*
1507 getPwAff(Scop
&S
, BasicBlock
*BB
,
1508 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
, const SCEV
*E
,
1509 bool NonNegative
= false) {
1510 PWACtx PWAC
= S
.getPwAff(E
, BB
, NonNegative
);
1511 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(isl::manage(PWAC
.second
));
1515 /// Build the conditions sets for the switch @p SI in the @p Domain.
1517 /// This will fill @p ConditionSets with the conditions under which control
1518 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1519 /// have as many elements as @p SI has successors.
1521 buildConditionSets(Scop
&S
, BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
,
1522 __isl_keep isl_set
*Domain
,
1523 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1524 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1525 Value
*Condition
= getConditionFromTerminator(SI
);
1526 assert(Condition
&& "No condition for switch");
1528 ScalarEvolution
&SE
= *S
.getSE();
1529 isl_pw_aff
*LHS
, *RHS
;
1530 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
1532 unsigned NumSuccessors
= SI
->getNumSuccessors();
1533 ConditionSets
.resize(NumSuccessors
);
1534 for (auto &Case
: SI
->cases()) {
1535 unsigned Idx
= Case
.getSuccessorIndex();
1536 ConstantInt
*CaseValue
= Case
.getCaseValue();
1538 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
1539 isl_set
*CaseConditionSet
=
1540 buildConditionSet(ICmpInst::ICMP_EQ
, isl_pw_aff_copy(LHS
), RHS
, Domain
);
1541 ConditionSets
[Idx
] = isl_set_coalesce(
1542 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1545 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1546 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1547 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1549 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1550 ConditionSets
[0] = setDimensionIds(
1551 Domain
, isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
));
1553 isl_pw_aff_free(LHS
);
1558 /// Build condition sets for unsigned ICmpInst(s).
1559 /// Special handling is required for unsigned operands to ensure that if
1560 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1561 /// it should wrap around.
1563 /// @param IsStrictUpperBound holds information on the predicate relation
1564 /// between TestVal and UpperBound, i.e,
1565 /// TestVal < UpperBound OR TestVal <= UpperBound
1566 static __isl_give isl_set
*
1567 buildUnsignedConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1568 __isl_keep isl_set
*Domain
, const SCEV
*SCEV_TestVal
,
1569 const SCEV
*SCEV_UpperBound
,
1570 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1571 bool IsStrictUpperBound
) {
1572 // Do not take NonNeg assumption on TestVal
1573 // as it might have MSB (Sign bit) set.
1574 isl_pw_aff
*TestVal
= getPwAff(S
, BB
, InvalidDomainMap
, SCEV_TestVal
, false);
1575 // Take NonNeg assumption on UpperBound.
1576 isl_pw_aff
*UpperBound
=
1577 getPwAff(S
, BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
1581 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1582 isl_pw_aff_get_domain_space(TestVal
))),
1583 isl_pw_aff_copy(TestVal
));
1586 if (IsStrictUpperBound
)
1587 // TestVal < UpperBound
1588 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
1590 // TestVal <= UpperBound
1591 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
1593 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
1594 ConsequenceCondSet
= setDimensionIds(Domain
, ConsequenceCondSet
);
1595 return ConsequenceCondSet
;
1598 /// Build the conditions sets for the branch condition @p Condition in
1601 /// This will fill @p ConditionSets with the conditions under which control
1602 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1603 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1604 /// context under which @p Condition is true/false will be returned as the
1605 /// new elements of @p ConditionSets.
1607 buildConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1608 TerminatorInst
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
1609 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1610 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1611 isl_set
*ConsequenceCondSet
= nullptr;
1612 if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1613 if (CCond
->isZero())
1614 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1616 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1617 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1618 auto Opcode
= BinOp
->getOpcode();
1619 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1621 bool Valid
= buildConditionSets(S
, BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
1622 InvalidDomainMap
, ConditionSets
) &&
1623 buildConditionSets(S
, BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
1624 InvalidDomainMap
, ConditionSets
);
1626 while (!ConditionSets
.empty())
1627 isl_set_free(ConditionSets
.pop_back_val());
1631 isl_set_free(ConditionSets
.pop_back_val());
1632 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1633 isl_set_free(ConditionSets
.pop_back_val());
1634 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1636 if (Opcode
== Instruction::And
)
1637 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1639 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1641 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1643 "Condition of exiting branch was neither constant nor ICmp!");
1645 ScalarEvolution
&SE
= *S
.getSE();
1646 isl_pw_aff
*LHS
, *RHS
;
1647 // For unsigned comparisons we assumed the signed bit of neither operand
1648 // to be set. The comparison is equal to a signed comparison under this
1650 bool NonNeg
= ICond
->isUnsigned();
1651 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
1652 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
1654 switch (ICond
->getPredicate()) {
1655 case ICmpInst::ICMP_ULT
:
1656 ConsequenceCondSet
=
1657 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1658 RightOperand
, InvalidDomainMap
, true);
1660 case ICmpInst::ICMP_ULE
:
1661 ConsequenceCondSet
=
1662 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1663 RightOperand
, InvalidDomainMap
, false);
1665 case ICmpInst::ICMP_UGT
:
1666 ConsequenceCondSet
=
1667 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1668 LeftOperand
, InvalidDomainMap
, true);
1670 case ICmpInst::ICMP_UGE
:
1671 ConsequenceCondSet
=
1672 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1673 LeftOperand
, InvalidDomainMap
, false);
1676 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
1677 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
1678 ConsequenceCondSet
=
1679 buildConditionSet(ICond
->getPredicate(), LHS
, RHS
, Domain
);
1684 // If no terminator was given we are only looking for parameter constraints
1685 // under which @p Condition is true/false.
1687 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1688 assert(ConsequenceCondSet
);
1689 ConsequenceCondSet
= isl_set_coalesce(
1690 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1692 isl_set
*AlternativeCondSet
= nullptr;
1694 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1697 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1698 isl_set_copy(ConsequenceCondSet
));
1700 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1704 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
1705 TI
? TI
->getParent() : nullptr /* BasicBlock */);
1706 isl_set_free(AlternativeCondSet
);
1707 isl_set_free(ConsequenceCondSet
);
1711 ConditionSets
.push_back(ConsequenceCondSet
);
1712 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1717 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1719 /// This will fill @p ConditionSets with the conditions under which control
1720 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1721 /// have as many elements as @p TI has successors.
1723 buildConditionSets(Scop
&S
, BasicBlock
*BB
, TerminatorInst
*TI
, Loop
*L
,
1724 __isl_keep isl_set
*Domain
,
1725 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1726 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1727 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1728 return buildConditionSets(S
, BB
, SI
, L
, Domain
, InvalidDomainMap
,
1731 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1733 if (TI
->getNumSuccessors() == 1) {
1734 ConditionSets
.push_back(isl_set_copy(Domain
));
1738 Value
*Condition
= getConditionFromTerminator(TI
);
1739 assert(Condition
&& "No condition for Terminator");
1741 return buildConditionSets(S
, BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
1745 void ScopStmt::buildDomain() {
1746 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1748 Domain
= getParent()->getDomainConditions(this);
1749 Domain
= Domain
.set_tuple_id(Id
);
1752 void ScopStmt::collectSurroundingLoops() {
1753 for (unsigned u
= 0, e
= Domain
.dim(isl::dim::set
); u
< e
; u
++) {
1754 isl::id DimId
= Domain
.get_dim_id(isl::dim::set
, u
);
1755 NestLoops
.push_back(static_cast<Loop
*>(DimId
.get_user()));
1759 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, Loop
*SurroundingLoop
)
1760 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), R(&R
),
1761 Build(nullptr), SurroundingLoop(SurroundingLoop
) {
1762 BaseName
= getIslCompatibleName(
1763 "Stmt", R
.getNameStr(), parent
.getNextStmtIdx(), "", UseInstructionNames
);
1766 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, Loop
*SurroundingLoop
,
1767 std::vector
<Instruction
*> Instructions
, int Count
)
1768 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1769 Build(nullptr), SurroundingLoop(SurroundingLoop
),
1770 Instructions(Instructions
) {
1773 S
+= std::to_string(Count
);
1774 BaseName
= getIslCompatibleName("Stmt", &bb
, parent
.getNextStmtIdx(), S
,
1775 UseInstructionNames
);
1778 ScopStmt::ScopStmt(Scop
&parent
, isl::map SourceRel
, isl::map TargetRel
,
1780 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
),
1782 BaseName
= getIslCompatibleName("CopyStmt_", "",
1783 std::to_string(parent
.getCopyStmtsNum()));
1784 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1785 Domain
= Domain
.set_tuple_id(Id
);
1786 TargetRel
= TargetRel
.set_tuple_id(isl::dim::in
, Id
);
1788 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1789 parent
.addAccessFunction(Access
);
1791 SourceRel
= SourceRel
.set_tuple_id(isl::dim::in
, Id
);
1792 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1793 parent
.addAccessFunction(Access
);
1797 ScopStmt::~ScopStmt() = default;
1799 void ScopStmt::init(LoopInfo
&LI
) {
1800 assert(!Domain
&& "init must be called only once");
1803 collectSurroundingLoops();
1804 buildAccessRelations();
1806 if (DetectReductions
)
1807 checkForReductions();
1810 /// Collect loads which might form a reduction chain with @p StoreMA.
1812 /// Check if the stored value for @p StoreMA is a binary operator with one or
1813 /// two loads as operands. If the binary operand is commutative & associative,
1814 /// used only once (by @p StoreMA) and its load operands are also used only
1815 /// once, we have found a possible reduction chain. It starts at an operand
1816 /// load and includes the binary operator and @p StoreMA.
1818 /// Note: We allow only one use to ensure the load and binary operator cannot
1819 /// escape this block or into any other store except @p StoreMA.
1820 void ScopStmt::collectCandiateReductionLoads(
1821 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1822 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1826 // Skip if there is not one binary operator between the load and the store
1827 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1831 // Skip if the binary operators has multiple uses
1832 if (BinOp
->getNumUses() != 1)
1835 // Skip if the opcode of the binary operator is not commutative/associative
1836 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1839 // Skip if the binary operator is outside the current SCoP
1840 if (BinOp
->getParent() != Store
->getParent())
1843 // Skip if it is a multiplicative reduction and we disabled them
1844 if (DisableMultiplicativeReductions
&&
1845 (BinOp
->getOpcode() == Instruction::Mul
||
1846 BinOp
->getOpcode() == Instruction::FMul
))
1849 // Check the binary operator operands for a candidate load
1850 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1851 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1852 if (!PossibleLoad0
&& !PossibleLoad1
)
1855 // A load is only a candidate if it cannot escape (thus has only this use)
1856 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1857 if (PossibleLoad0
->getParent() == Store
->getParent())
1858 Loads
.push_back(&getArrayAccessFor(PossibleLoad0
));
1859 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1860 if (PossibleLoad1
->getParent() == Store
->getParent())
1861 Loads
.push_back(&getArrayAccessFor(PossibleLoad1
));
1864 /// Check for reductions in this ScopStmt.
1866 /// Iterate over all store memory accesses and check for valid binary reduction
1867 /// like chains. For all candidates we check if they have the same base address
1868 /// and there are no other accesses which overlap with them. The base address
1869 /// check rules out impossible reductions candidates early. The overlap check,
1870 /// together with the "only one user" check in collectCandiateReductionLoads,
1871 /// guarantees that none of the intermediate results will escape during
1872 /// execution of the loop nest. We basically check here that no other memory
1873 /// access can access the same memory as the potential reduction.
1874 void ScopStmt::checkForReductions() {
1875 SmallVector
<MemoryAccess
*, 2> Loads
;
1876 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1878 // First collect candidate load-store reduction chains by iterating over all
1879 // stores and collecting possible reduction loads.
1880 for (MemoryAccess
*StoreMA
: MemAccs
) {
1881 if (StoreMA
->isRead())
1885 collectCandiateReductionLoads(StoreMA
, Loads
);
1886 for (MemoryAccess
*LoadMA
: Loads
)
1887 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1890 // Then check each possible candidate pair.
1891 for (const auto &CandidatePair
: Candidates
) {
1893 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
1894 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
1896 // Skip those with obviously unequal base addresses.
1897 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
1901 // And check if the remaining for overlap with other memory accesses.
1902 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
1903 AllAccsRel
= AllAccsRel
.intersect_domain(getDomain());
1904 isl::set AllAccs
= AllAccsRel
.range();
1906 for (MemoryAccess
*MA
: MemAccs
) {
1907 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1910 isl::map AccRel
= MA
->getAccessRelation().intersect_domain(getDomain());
1911 isl::set Accs
= AccRel
.range();
1913 if (AllAccs
.has_equal_space(Accs
)) {
1914 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
1915 Valid
= Valid
&& OverlapAccs
.is_empty();
1922 const LoadInst
*Load
=
1923 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1924 MemoryAccess::ReductionType RT
=
1925 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1927 // If no overlapping access was found we mark the load and store as
1929 CandidatePair
.first
->markAsReductionLike(RT
);
1930 CandidatePair
.second
->markAsReductionLike(RT
);
1934 std::string
ScopStmt::getDomainStr() const { return Domain
.to_str(); }
1936 std::string
ScopStmt::getScheduleStr() const {
1937 auto *S
= getSchedule().release();
1940 auto Str
= stringFromIslObj(S
);
1945 void ScopStmt::setInvalidDomain(isl::set ID
) { InvalidDomain
= ID
; }
1947 BasicBlock
*ScopStmt::getEntryBlock() const {
1949 return getBasicBlock();
1950 return getRegion()->getEntry();
1953 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1955 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1957 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1958 return NestLoops
[Dimension
];
1961 isl_ctx
*ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1963 isl::set
ScopStmt::getDomain() const { return Domain
; }
1965 isl::space
ScopStmt::getDomainSpace() const { return Domain
.get_space(); }
1967 isl::id
ScopStmt::getDomainId() const { return Domain
.get_tuple_id(); }
1969 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1970 OS
<< "Instructions {\n";
1972 for (Instruction
*Inst
: Instructions
)
1973 OS
.indent(16) << *Inst
<< "\n";
1975 OS
.indent(12) << "}\n";
1978 void ScopStmt::print(raw_ostream
&OS
, bool PrintInstructions
) const {
1979 OS
<< "\t" << getBaseName() << "\n";
1980 OS
.indent(12) << "Domain :=\n";
1983 OS
.indent(16) << getDomainStr() << ";\n";
1985 OS
.indent(16) << "n/a\n";
1987 OS
.indent(12) << "Schedule :=\n";
1990 OS
.indent(16) << getScheduleStr() << ";\n";
1992 OS
.indent(16) << "n/a\n";
1994 for (MemoryAccess
*Access
: MemAccs
)
1997 if (PrintInstructions
&& isBlockStmt())
1998 printInstructions(OS
.indent(12));
2001 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2002 LLVM_DUMP_METHOD
void ScopStmt::dump() const { print(dbgs(), true); }
2005 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
2006 if (MA
->isRead() && MA
->isOriginalValueKind()) {
2007 bool Found
= ValueReads
.erase(MA
->getAccessValue());
2009 assert(Found
&& "Expected access data not found");
2011 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
2012 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
2014 assert(Found
&& "Expected access data not found");
2016 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
2017 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
2019 assert(Found
&& "Expected access data not found");
2021 if (MA
->isRead() && MA
->isOriginalAnyPHIKind()) {
2022 bool Found
= PHIReads
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
2024 assert(Found
&& "Expected access data not found");
2028 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
2029 // Remove the memory accesses from this statement together with all scalar
2030 // accesses that were caused by it. MemoryKind::Value READs have no access
2031 // instruction, hence would not be removed by this function. However, it is
2032 // only used for invariant LoadInst accesses, its arguments are always affine,
2033 // hence synthesizable, and therefore there are no MemoryKind::Value READ
2034 // accesses to be removed.
2035 auto Predicate
= [&](MemoryAccess
*Acc
) {
2036 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
2038 for (auto *MA
: MemAccs
) {
2039 if (Predicate(MA
)) {
2040 removeAccessData(MA
);
2041 Parent
.removeAccessData(MA
);
2044 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
2046 InstructionToAccess
.erase(MA
->getAccessInstruction());
2049 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
) {
2050 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
2051 assert(MAIt
!= MemAccs
.end());
2052 MemAccs
.erase(MAIt
);
2054 removeAccessData(MA
);
2055 Parent
.removeAccessData(MA
);
2057 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
2058 if (It
!= InstructionToAccess
.end()) {
2059 It
->second
.remove(MA
);
2060 if (It
->second
.empty())
2061 InstructionToAccess
.erase(MA
->getAccessInstruction());
2065 MemoryAccess
*ScopStmt::ensureValueRead(Value
*V
) {
2066 MemoryAccess
*Access
= lookupInputAccessOf(V
);
2070 ScopArrayInfo
*SAI
=
2071 Parent
.getOrCreateScopArrayInfo(V
, V
->getType(), {}, MemoryKind::Value
);
2072 Access
= new MemoryAccess(this, nullptr, MemoryAccess::READ
, V
, V
->getType(),
2073 true, {}, {}, V
, MemoryKind::Value
);
2074 Parent
.addAccessFunction(Access
);
2075 Access
->buildAccessRelation(SAI
);
2077 Parent
.addAccessData(Access
);
2081 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const ScopStmt
&S
) {
2082 S
.print(OS
, PollyPrintInstructions
);
2086 //===----------------------------------------------------------------------===//
2087 /// Scop class implement
2089 void Scop::setContext(__isl_take isl_set
*NewContext
) {
2090 NewContext
= isl_set_align_params(NewContext
, isl_set_get_space(Context
));
2091 isl_set_free(Context
);
2092 Context
= NewContext
;
2097 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
2098 struct SCEVSensitiveParameterRewriter
2099 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
2100 const ValueToValueMap
&VMap
;
2103 SCEVSensitiveParameterRewriter(const ValueToValueMap
&VMap
,
2104 ScalarEvolution
&SE
)
2105 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
2107 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
2108 const ValueToValueMap
&VMap
) {
2109 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
2110 return SSPR
.visit(E
);
2113 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
2114 auto *Start
= visit(E
->getStart());
2115 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
2116 visit(E
->getStepRecurrence(SE
)),
2117 E
->getLoop(), SCEV::FlagAnyWrap
);
2118 return SE
.getAddExpr(Start
, AddRec
);
2121 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
2122 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
2123 return SE
.getUnknown(NewValue
);
2128 /// Check whether we should remap a SCEV expression.
2129 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
2130 const ValueToValueMap
&VMap
;
2131 bool FoundInside
= false;
2135 SCEVFindInsideScop(const ValueToValueMap
&VMap
, ScalarEvolution
&SE
,
2137 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
2139 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
2140 const ValueToValueMap
&VMap
, const Scop
*S
) {
2141 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
2143 return SFIS
.FoundInside
;
2146 bool follow(const SCEV
*E
) {
2147 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
2148 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
2149 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
2150 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
2151 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
2153 return !FoundInside
;
2156 bool isDone() { return FoundInside
; }
2159 } // end anonymous namespace
2161 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) const {
2162 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
2163 // doesn't like addition between an AddRec and an expression that
2164 // doesn't have a dominance relationship with it.)
2165 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
2169 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
2172 // This table of function names is used to translate parameter names in more
2173 // human-readable names. This makes it easier to interpret Polly analysis
2175 StringMap
<std::string
> KnownNames
= {
2176 {"_Z13get_global_idj", "global_id"},
2177 {"_Z12get_local_idj", "local_id"},
2178 {"_Z15get_global_sizej", "global_size"},
2179 {"_Z14get_local_sizej", "local_size"},
2180 {"_Z12get_work_dimv", "work_dim"},
2181 {"_Z17get_global_offsetj", "global_offset"},
2182 {"_Z12get_group_idj", "group_id"},
2183 {"_Z14get_num_groupsj", "num_groups"},
2186 static std::string
getCallParamName(CallInst
*Call
) {
2188 raw_string_ostream
OS(Result
);
2189 std::string Name
= Call
->getCalledFunction()->getName();
2191 auto Iterator
= KnownNames
.find(Name
);
2192 if (Iterator
!= KnownNames
.end())
2193 Name
= "__" + Iterator
->getValue();
2195 for (auto &Operand
: Call
->arg_operands()) {
2196 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
2197 OS
<< "_" << Op
->getValue();
2203 void Scop::createParameterId(const SCEV
*Parameter
) {
2204 assert(Parameters
.count(Parameter
));
2205 assert(!ParameterIds
.count(Parameter
));
2207 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
2209 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
2210 Value
*Val
= ValueParameter
->getValue();
2211 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
2213 if (Call
&& isConstCall(Call
)) {
2214 ParameterName
= getCallParamName(Call
);
2215 } else if (UseInstructionNames
) {
2216 // If this parameter references a specific Value and this value has a name
2217 // we use this name as it is likely to be unique and more useful than just
2220 ParameterName
= Val
->getName();
2221 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
2222 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
2223 if (LoadOrigin
->hasName()) {
2224 ParameterName
+= "_loaded_from_";
2226 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
2231 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2234 isl::id Id
= isl::id::alloc(getIslCtx(), ParameterName
,
2235 const_cast<void *>((const void *)Parameter
));
2236 ParameterIds
[Parameter
] = Id
;
2239 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2240 for (const SCEV
*Parameter
: NewParameters
) {
2241 // Normalize the SCEV to get the representing element for an invariant load.
2242 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2243 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2245 if (Parameters
.insert(Parameter
))
2246 createParameterId(Parameter
);
2250 isl::id
Scop::getIdForParam(const SCEV
*Parameter
) const {
2251 // Normalize the SCEV to get the representing element for an invariant load.
2252 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2253 return ParameterIds
.lookup(Parameter
);
2256 isl::set
Scop::addNonEmptyDomainConstraints(isl::set C
) const {
2257 isl_set
*DomainContext
= isl_union_set_params(getDomains().release());
2258 return isl::manage(isl_set_intersect_params(C
.release(), DomainContext
));
2261 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2262 return DT
.dominates(BB
, getEntry());
2265 void Scop::addUserAssumptions(
2266 AssumptionCache
&AC
, DominatorTree
&DT
, LoopInfo
&LI
,
2267 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2268 for (auto &Assumption
: AC
.assumptions()) {
2269 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2270 if (!CI
|| CI
->getNumArgOperands() != 1)
2273 bool InScop
= contains(CI
);
2274 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2277 auto *L
= LI
.getLoopFor(CI
->getParent());
2278 auto *Val
= CI
->getArgOperand(0);
2279 ParameterSetTy DetectedParams
;
2280 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2282 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
2283 << "Non-affine user assumption ignored.");
2287 // Collect all newly introduced parameters.
2288 ParameterSetTy NewParams
;
2289 for (auto *Param
: DetectedParams
) {
2290 Param
= extractConstantFactor(Param
, *SE
).second
;
2291 Param
= getRepresentingInvariantLoadSCEV(Param
);
2292 if (Parameters
.count(Param
))
2294 NewParams
.insert(Param
);
2297 SmallVector
<isl_set
*, 2> ConditionSets
;
2298 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2299 BasicBlock
*BB
= InScop
? CI
->getParent() : getRegion().getEntry();
2300 auto *Dom
= InScop
? DomainMap
[BB
].copy() : isl_set_copy(Context
);
2301 assert(Dom
&& "Cannot propagate a nullptr.");
2302 bool Valid
= buildConditionSets(*this, BB
, Val
, TI
, L
, Dom
,
2303 InvalidDomainMap
, ConditionSets
);
2309 isl_set
*AssumptionCtx
= nullptr;
2311 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2312 isl_set_free(ConditionSets
[0]);
2314 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2315 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2318 // Project out newly introduced parameters as they are not otherwise useful.
2319 if (!NewParams
.empty()) {
2320 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2321 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2322 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2325 if (!NewParams
.count(Param
))
2329 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2332 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
2333 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
2334 Context
= isl_set_intersect(Context
, AssumptionCtx
);
2338 void Scop::addUserContext() {
2339 if (UserContextStr
.empty())
2342 isl_set
*UserContext
=
2343 isl_set_read_from_str(getIslCtx(), UserContextStr
.c_str());
2344 isl_space
*Space
= getParamSpace().release();
2345 if (isl_space_dim(Space
, isl_dim_param
) !=
2346 isl_set_dim(UserContext
, isl_dim_param
)) {
2347 auto SpaceStr
= isl_space_to_str(Space
);
2348 errs() << "Error: the context provided in -polly-context has not the same "
2349 << "number of dimensions than the computed context. Due to this "
2350 << "mismatch, the -polly-context option is ignored. Please provide "
2351 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2353 isl_set_free(UserContext
);
2354 isl_space_free(Space
);
2358 for (unsigned i
= 0; i
< isl_space_dim(Space
, isl_dim_param
); i
++) {
2359 auto *NameContext
= isl_set_get_dim_name(Context
, isl_dim_param
, i
);
2360 auto *NameUserContext
= isl_set_get_dim_name(UserContext
, isl_dim_param
, i
);
2362 if (strcmp(NameContext
, NameUserContext
) != 0) {
2363 auto SpaceStr
= isl_space_to_str(Space
);
2364 errs() << "Error: the name of dimension " << i
2365 << " provided in -polly-context "
2366 << "is '" << NameUserContext
<< "', but the name in the computed "
2367 << "context is '" << NameContext
2368 << "'. Due to this name mismatch, "
2369 << "the -polly-context option is ignored. Please provide "
2370 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2372 isl_set_free(UserContext
);
2373 isl_space_free(Space
);
2378 isl_set_set_dim_id(UserContext
, isl_dim_param
, i
,
2379 isl_space_get_dim_id(Space
, isl_dim_param
, i
));
2382 Context
= isl_set_intersect(Context
, UserContext
);
2383 isl_space_free(Space
);
2386 void Scop::buildInvariantEquivalenceClasses() {
2387 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2389 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2390 for (LoadInst
*LInst
: RIL
) {
2391 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2393 Type
*Ty
= LInst
->getType();
2394 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2396 InvEquivClassVMap
[LInst
] = ClassRep
;
2401 InvariantEquivClasses
.emplace_back(
2402 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2406 void Scop::buildContext() {
2407 isl_space
*Space
= isl_space_params_alloc(getIslCtx(), 0);
2408 Context
= isl_set_universe(isl_space_copy(Space
));
2409 InvalidContext
= isl_set_empty(isl_space_copy(Space
));
2410 AssumedContext
= isl_set_universe(Space
);
2413 void Scop::addParameterBounds() {
2415 for (auto *Parameter
: Parameters
) {
2416 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2418 addRangeBoundsToSet(give(Context
), SRange
, PDim
++, isl::dim::param
)
2423 static std::vector
<isl::id
> getFortranArrayIds(Scop::array_range Arrays
) {
2424 std::vector
<isl::id
> OutermostSizeIds
;
2425 for (auto Array
: Arrays
) {
2426 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2427 // for its outermost dimension. Fortran arrays will have this since the
2428 // outermost dimension size can be picked up from their runtime description.
2429 // TODO: actually need to check if it has a FAD, but for now this works.
2430 if (Array
->getNumberOfDimensions() > 0) {
2431 isl::pw_aff PwAff
= Array
->getDimensionSizePw(0);
2436 isl::manage(isl_pw_aff_get_dim_id(PwAff
.get(), isl_dim_param
, 0));
2437 assert(!Id
.is_null() &&
2438 "Invalid Id for PwAff expression in Fortran array");
2440 OutermostSizeIds
.push_back(Id
);
2443 return OutermostSizeIds
;
2446 // The FORTRAN array size parameters are known to be non-negative.
2447 static isl_set
*boundFortranArrayParams(__isl_give isl_set
*Context
,
2448 Scop::array_range Arrays
) {
2449 std::vector
<isl::id
> OutermostSizeIds
;
2450 OutermostSizeIds
= getFortranArrayIds(Arrays
);
2452 for (isl::id Id
: OutermostSizeIds
) {
2453 int dim
= isl_set_find_dim_by_id(Context
, isl_dim_param
, Id
.get());
2454 Context
= isl_set_lower_bound_si(Context
, isl_dim_param
, dim
, 0);
2460 void Scop::realignParams() {
2461 if (PollyIgnoreParamBounds
)
2464 // Add all parameters into a common model.
2465 isl::space Space
= getFullParamSpace();
2467 // Align the parameters of all data structures to the model.
2468 Context
= isl_set_align_params(Context
, Space
.copy());
2470 // Bound the size of the fortran array dimensions.
2471 Context
= boundFortranArrayParams(Context
, arrays());
2473 // As all parameters are known add bounds to them.
2474 addParameterBounds();
2476 for (ScopStmt
&Stmt
: *this)
2477 Stmt
.realignParams();
2478 // Simplify the schedule according to the context too.
2479 Schedule
= isl_schedule_gist_domain_params(Schedule
, getContext().release());
2482 static __isl_give isl_set
*
2483 simplifyAssumptionContext(__isl_take isl_set
*AssumptionContext
,
2485 // If we have modeled all blocks in the SCoP that have side effects we can
2486 // simplify the context with the constraints that are needed for anything to
2487 // be executed at all. However, if we have error blocks in the SCoP we already
2488 // assumed some parameter combinations cannot occur and removed them from the
2489 // domains, thus we cannot use the remaining domain to simplify the
2491 if (!S
.hasErrorBlock()) {
2492 isl_set
*DomainParameters
= isl_union_set_params(S
.getDomains().release());
2494 isl_set_gist_params(AssumptionContext
, DomainParameters
);
2498 isl_set_gist_params(AssumptionContext
, S
.getContext().release());
2499 return AssumptionContext
;
2502 void Scop::simplifyContexts() {
2503 // The parameter constraints of the iteration domains give us a set of
2504 // constraints that need to hold for all cases where at least a single
2505 // statement iteration is executed in the whole scop. We now simplify the
2506 // assumed context under the assumption that such constraints hold and at
2507 // least a single statement iteration is executed. For cases where no
2508 // statement instances are executed, the assumptions we have taken about
2509 // the executed code do not matter and can be changed.
2511 // WARNING: This only holds if the assumptions we have taken do not reduce
2512 // the set of statement instances that are executed. Otherwise we
2513 // may run into a case where the iteration domains suggest that
2514 // for a certain set of parameter constraints no code is executed,
2515 // but in the original program some computation would have been
2516 // performed. In such a case, modifying the run-time conditions and
2517 // possibly influencing the run-time check may cause certain scops
2518 // to not be executed.
2522 // When delinearizing the following code:
2524 // for (long i = 0; i < 100; i++)
2525 // for (long j = 0; j < m; j++)
2528 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2529 // otherwise we would access out of bound data. Now, knowing that code is
2530 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2531 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2533 isl_set_align_params(InvalidContext
, getParamSpace().release());
2536 /// Add the minimal/maximal access in @p Set to @p User.
2538 buildMinMaxAccess(isl::set Set
, Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2539 isl::pw_multi_aff MinPMA
, MaxPMA
;
2540 isl::pw_aff LastDimAff
;
2543 isl::ctx Ctx
= Set
.get_ctx();
2545 Set
= Set
.remove_divs();
2547 if (isl_set_n_basic_set(Set
.get()) >= MaxDisjunctsInDomain
)
2548 return isl::stat::error
;
2550 // Restrict the number of parameters involved in the access as the lexmin/
2551 // lexmax computation will take too long if this number is high.
2553 // Experiments with a simple test case using an i7 4800MQ:
2555 // #Parameters involved | Time (in sec)
2564 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2565 unsigned InvolvedParams
= 0;
2566 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2567 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2570 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2571 return isl::stat::error
;
2574 if (isl_set_n_basic_set(Set
.get()) > RunTimeChecksMaxAccessDisjuncts
)
2575 return isl::stat::error
;
2577 MinPMA
= Set
.lexmin_pw_multi_aff();
2578 MaxPMA
= Set
.lexmax_pw_multi_aff();
2580 if (isl_ctx_last_error(Ctx
.get()) == isl_error_quota
)
2581 return isl::stat::error
;
2583 MinPMA
= MinPMA
.coalesce();
2584 MaxPMA
= MaxPMA
.coalesce();
2586 // Adjust the last dimension of the maximal access by one as we want to
2587 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2588 // we test during code generation might now point after the end of the
2589 // allocated array but we will never dereference it anyway.
2590 assert(MaxPMA
.dim(isl::dim::out
) && "Assumed at least one output dimension");
2591 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2592 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2593 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2594 OneAff
= OneAff
.add_constant_si(1);
2595 LastDimAff
= LastDimAff
.add(OneAff
);
2596 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2598 MinMaxAccesses
.push_back(std::make_pair(MinPMA
.copy(), MaxPMA
.copy()));
2600 return isl::stat::ok
;
2603 static __isl_give isl_set
*getAccessDomain(MemoryAccess
*MA
) {
2604 isl_set
*Domain
= MA
->getStatement()->getDomain().release();
2605 Domain
= isl_set_project_out(Domain
, isl_dim_set
, 0, isl_set_n_dim(Domain
));
2606 return isl_set_reset_tuple_id(Domain
);
2609 /// Wrapper function to calculate minimal/maximal accesses to each array.
2610 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2611 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2612 MinMaxAccesses
.reserve(AliasGroup
.size());
2614 isl::union_set Domains
= S
.getDomains();
2615 isl::union_map Accesses
= isl::union_map::empty(S
.getParamSpace());
2617 for (MemoryAccess
*MA
: AliasGroup
)
2618 Accesses
= Accesses
.add_map(give(MA
->getAccessRelation().release()));
2620 Accesses
= Accesses
.intersect_domain(Domains
);
2621 isl::union_set Locations
= Accesses
.range();
2622 Locations
= Locations
.coalesce();
2623 Locations
= Locations
.detect_equalities();
2625 auto Lambda
= [&MinMaxAccesses
, &S
](isl::set Set
) -> isl::stat
{
2626 return buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2628 return Locations
.foreach_set(Lambda
) == isl::stat::ok
;
2631 /// Helper to treat non-affine regions and basic blocks the same.
2635 /// Return the block that is the representing block for @p RN.
2636 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2637 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2638 : RN
->getNodeAs
<BasicBlock
>();
2641 /// Return the @p idx'th block that is executed after @p RN.
2642 static inline BasicBlock
*
2643 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2644 if (RN
->isSubRegion()) {
2646 return RN
->getNodeAs
<Region
>()->getExit();
2648 return TI
->getSuccessor(idx
);
2651 /// Return the smallest loop surrounding @p RN.
2652 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2653 if (!RN
->isSubRegion()) {
2654 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2655 Loop
*L
= LI
.getLoopFor(BB
);
2657 // Unreachable statements are not considered to belong to a LLVM loop, as
2658 // they are not part of an actual loop in the control flow graph.
2659 // Nevertheless, we handle certain unreachable statements that are common
2660 // when modeling run-time bounds checks as being part of the loop to be
2661 // able to model them and to later eliminate the run-time bounds checks.
2663 // Specifically, for basic blocks that terminate in an unreachable and
2664 // where the immediate predecessor is part of a loop, we assume these
2665 // basic blocks belong to the loop the predecessor belongs to. This
2666 // allows us to model the following code.
2668 // for (i = 0; i < N; i++) {
2670 // abort(); <- this abort might be translated to an
2675 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2676 L
= LI
.getLoopFor(BB
->getPrevNode());
2680 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2681 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2682 while (L
&& NonAffineSubRegion
->contains(L
))
2683 L
= L
->getParentLoop();
2687 /// Get the number of blocks in @p L.
2689 /// The number of blocks in a loop are the number of basic blocks actually
2690 /// belonging to the loop, as well as all single basic blocks that the loop
2691 /// exits to and which terminate in an unreachable instruction. We do not
2692 /// allow such basic blocks in the exit of a scop, hence they belong to the
2693 /// scop and represent run-time conditions which we want to model and
2694 /// subsequently speculate away.
2696 /// @see getRegionNodeLoop for additional details.
2697 unsigned getNumBlocksInLoop(Loop
*L
) {
2698 unsigned NumBlocks
= L
->getNumBlocks();
2699 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
2700 L
->getExitBlocks(ExitBlocks
);
2702 for (auto ExitBlock
: ExitBlocks
) {
2703 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2709 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2710 if (!RN
->isSubRegion())
2713 Region
*R
= RN
->getNodeAs
<Region
>();
2714 return std::distance(R
->block_begin(), R
->block_end());
2717 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2718 const DominatorTree
&DT
) {
2719 if (!RN
->isSubRegion())
2720 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2721 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2722 if (isErrorBlock(*BB
, R
, LI
, DT
))
2729 static inline __isl_give isl_set
*addDomainDimId(__isl_take isl_set
*Domain
,
2730 unsigned Dim
, Loop
*L
) {
2731 Domain
= isl_set_lower_bound_si(Domain
, isl_dim_set
, Dim
, -1);
2733 isl_id_alloc(isl_set_get_ctx(Domain
), nullptr, static_cast<void *>(L
));
2734 return isl_set_set_dim_id(Domain
, isl_dim_set
, Dim
, DimId
);
2737 isl::set
Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2738 return getDomainConditions(Stmt
->getEntryBlock());
2741 isl::set
Scop::getDomainConditions(BasicBlock
*BB
) const {
2742 auto DIt
= DomainMap
.find(BB
);
2743 if (DIt
!= DomainMap
.end())
2744 return DIt
->getSecond();
2746 auto &RI
= *R
.getRegionInfo();
2747 auto *BBR
= RI
.getRegionFor(BB
);
2748 while (BBR
->getEntry() == BB
)
2749 BBR
= BBR
->getParent();
2750 return getDomainConditions(BBR
->getEntry());
2753 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2754 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2755 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2756 auto *EntryBB
= R
->getEntry();
2757 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2758 int LD
= getRelativeLoopDepth(L
);
2759 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD
+ 1));
2762 S
= addDomainDimId(S
, LD
+ 1, L
);
2763 L
= L
->getParentLoop();
2766 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
2767 DomainMap
[EntryBB
] = isl::manage(S
);
2769 if (IsOnlyNonAffineRegion
)
2770 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2772 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
, InvalidDomainMap
))
2775 if (!propagateDomainConstraints(R
, DT
, LI
, InvalidDomainMap
))
2778 // Error blocks and blocks dominated by them have been assumed to never be
2779 // executed. Representing them in the Scop does not add any value. In fact,
2780 // it is likely to cause issues during construction of the ScopStmts. The
2781 // contents of error blocks have not been verified to be expressible and
2782 // will cause problems when building up a ScopStmt for them.
2783 // Furthermore, basic blocks dominated by error blocks may reference
2784 // instructions in the error block which, if the error block is not modeled,
2785 // can themselves not be constructed properly. To this end we will replace
2786 // the domains of error blocks and those only reachable via error blocks
2787 // with an empty set. Additionally, we will record for each block under which
2788 // parameter combination it would be reached via an error block in its
2789 // InvalidDomain. This information is needed during load hoisting.
2790 if (!propagateInvalidStmtDomains(R
, DT
, LI
, InvalidDomainMap
))
2796 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2797 /// to be compatible to domains constructed for loop @p NewL.
2799 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2800 /// edge from @p OldL to @p NewL.
2801 static __isl_give isl_set
*adjustDomainDimensions(Scop
&S
,
2802 __isl_take isl_set
*Dom
,
2803 Loop
*OldL
, Loop
*NewL
) {
2804 // If the loops are the same there is nothing to do.
2808 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2809 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2810 // If both loops are non-affine loops there is nothing to do.
2811 if (OldDepth
== -1 && NewDepth
== -1)
2814 // Distinguish three cases:
2815 // 1) The depth is the same but the loops are not.
2816 // => One loop was left one was entered.
2817 // 2) The depth increased from OldL to NewL.
2818 // => One loop was entered, none was left.
2819 // 3) The depth decreased from OldL to NewL.
2820 // => Loops were left were difference of the depths defines how many.
2821 if (OldDepth
== NewDepth
) {
2822 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2823 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NewDepth
, 1);
2824 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2825 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2826 } else if (OldDepth
< NewDepth
) {
2827 assert(OldDepth
+ 1 == NewDepth
);
2828 auto &R
= S
.getRegion();
2830 assert(NewL
->getParentLoop() == OldL
||
2831 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2832 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2833 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2835 assert(OldDepth
> NewDepth
);
2836 int Diff
= OldDepth
- NewDepth
;
2837 int NumDim
= isl_set_n_dim(Dom
);
2838 assert(NumDim
>= Diff
);
2839 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NumDim
- Diff
, Diff
);
2845 bool Scop::propagateInvalidStmtDomains(
2846 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2847 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2848 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2849 for (auto *RN
: RTraversal
) {
2851 // Recurse for affine subregions but go on for basic blocks and non-affine
2853 if (RN
->isSubRegion()) {
2854 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2855 if (!isNonAffineSubRegion(SubRegion
)) {
2856 propagateInvalidStmtDomains(SubRegion
, DT
, LI
, InvalidDomainMap
);
2861 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2862 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2863 isl::set
&Domain
= DomainMap
[BB
];
2864 assert(Domain
&& "Cannot propagate a nullptr");
2866 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
2868 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
2870 if (!IsInvalidBlock
) {
2871 InvalidDomain
= InvalidDomain
.intersect(Domain
);
2873 InvalidDomain
= Domain
;
2874 isl::set DomPar
= Domain
.params();
2875 recordAssumption(ERRORBLOCK
, DomPar
.release(),
2876 BB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
2880 if (InvalidDomain
.is_empty()) {
2881 InvalidDomainMap
[BB
] = InvalidDomain
;
2885 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2886 auto *TI
= BB
->getTerminator();
2887 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2888 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2889 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2891 // Skip successors outside the SCoP.
2892 if (!contains(SuccBB
))
2896 if (DT
.dominates(SuccBB
, BB
))
2899 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2901 auto *AdjustedInvalidDomain
= adjustDomainDimensions(
2902 *this, InvalidDomain
.copy(), BBLoop
, SuccBBLoop
);
2904 auto *SuccInvalidDomain
= InvalidDomainMap
[SuccBB
].copy();
2906 isl_set_union(SuccInvalidDomain
, AdjustedInvalidDomain
);
2907 SuccInvalidDomain
= isl_set_coalesce(SuccInvalidDomain
);
2908 unsigned NumConjucts
= isl_set_n_basic_set(SuccInvalidDomain
);
2910 InvalidDomainMap
[SuccBB
] = isl::manage(SuccInvalidDomain
);
2912 // Check if the maximal number of domain disjunctions was reached.
2913 // In case this happens we will bail.
2914 if (NumConjucts
< MaxDisjunctsInDomain
)
2917 InvalidDomainMap
.erase(BB
);
2918 invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
2922 InvalidDomainMap
[BB
] = InvalidDomain
;
2928 void Scop::propagateDomainConstraintsToRegionExit(
2929 BasicBlock
*BB
, Loop
*BBLoop
,
2930 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
,
2931 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2932 // Check if the block @p BB is the entry of a region. If so we propagate it's
2933 // domain to the exit block of the region. Otherwise we are done.
2934 auto *RI
= R
.getRegionInfo();
2935 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2936 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2937 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2940 // Do not propagate the domain if there is a loop backedge inside the region
2941 // that would prevent the exit block from being executed.
2943 while (L
&& contains(L
)) {
2944 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2945 BBLoop
->getLoopLatches(LatchBBs
);
2946 for (auto *LatchBB
: LatchBBs
)
2947 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2949 L
= L
->getParentLoop();
2952 isl::set Domain
= DomainMap
[BB
];
2953 assert(Domain
&& "Cannot propagate a nullptr");
2955 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, getBoxedLoops());
2957 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2958 // adjust the domain before we can propagate it.
2959 isl::set AdjustedDomain
= isl::manage(
2960 adjustDomainDimensions(*this, Domain
.copy(), BBLoop
, ExitBBLoop
));
2961 isl::set
&ExitDomain
= DomainMap
[ExitBB
];
2963 // If the exit domain is not yet created we set it otherwise we "add" the
2965 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
2967 // Initialize the invalid domain.
2968 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
2970 FinishedExitBlocks
.insert(ExitBB
);
2973 bool Scop::buildDomainsWithBranchConstraints(
2974 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2975 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2976 // To create the domain for each block in R we iterate over all blocks and
2977 // subregions in R and propagate the conditions under which the current region
2978 // element is executed. To this end we iterate in reverse post order over R as
2979 // it ensures that we first visit all predecessors of a region node (either a
2980 // basic block or a subregion) before we visit the region node itself.
2981 // Initially, only the domain for the SCoP region entry block is set and from
2982 // there we propagate the current domain to all successors, however we add the
2983 // condition that the successor is actually executed next.
2984 // As we are only interested in non-loop carried constraints here we can
2985 // simply skip loop back edges.
2987 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2988 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2989 for (auto *RN
: RTraversal
) {
2990 // Recurse for affine subregions but go on for basic blocks and non-affine
2992 if (RN
->isSubRegion()) {
2993 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2994 if (!isNonAffineSubRegion(SubRegion
)) {
2995 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
,
3002 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
3003 HasErrorBlock
= true;
3005 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
3006 TerminatorInst
*TI
= BB
->getTerminator();
3008 if (isa
<UnreachableInst
>(TI
))
3011 isl::set Domain
= DomainMap
.lookup(BB
);
3014 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
.get()));
3016 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
3017 // Propagate the domain from BB directly to blocks that have a superset
3018 // domain, at the moment only region exit nodes of regions that start in BB.
3019 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
,
3022 // If all successors of BB have been set a domain through the propagation
3023 // above we do not need to build condition sets but can just skip this
3024 // block. However, it is important to note that this is a local property
3025 // with regards to the region @p R. To this end FinishedExitBlocks is a
3027 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
3028 return FinishedExitBlocks
.count(SuccBB
);
3030 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
3033 // Build the condition sets for the successor nodes of the current region
3034 // node. If it is a non-affine subregion we will always execute the single
3035 // exit node, hence the single entry node domain is the condition set. For
3036 // basic blocks we use the helper function buildConditionSets.
3037 SmallVector
<isl_set
*, 8> ConditionSets
;
3038 if (RN
->isSubRegion())
3039 ConditionSets
.push_back(Domain
.copy());
3040 else if (!buildConditionSets(*this, BB
, TI
, BBLoop
, Domain
.get(),
3041 InvalidDomainMap
, ConditionSets
))
3044 // Now iterate over the successors and set their initial domain based on
3045 // their condition set. We skip back edges here and have to be careful when
3046 // we leave a loop not to keep constraints over a dimension that doesn't
3048 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
3049 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
3050 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
3051 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
3053 // Skip blocks outside the region.
3054 if (!contains(SuccBB
))
3057 // If we propagate the domain of some block to "SuccBB" we do not have to
3058 // adjust the domain.
3059 if (FinishedExitBlocks
.count(SuccBB
))
3063 if (DT
.dominates(SuccBB
, BB
))
3066 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
3068 CondSet
= isl::manage(
3069 adjustDomainDimensions(*this, CondSet
.copy(), BBLoop
, SuccBBLoop
));
3071 // Set the domain for the successor or merge it with an existing domain in
3072 // case there are multiple paths (without loop back edges) to the
3074 isl::set
&SuccDomain
= DomainMap
[SuccBB
];
3077 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
3079 // Initialize the invalid domain.
3080 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
3081 SuccDomain
= CondSet
;
3084 SuccDomain
= SuccDomain
.detect_equalities();
3086 // Check if the maximal number of domain disjunctions was reached.
3087 // In case this happens we will clean up and bail.
3088 if (isl_set_n_basic_set(SuccDomain
.get()) < MaxDisjunctsInDomain
)
3091 invalidate(COMPLEXITY
, DebugLoc());
3092 while (++u
< ConditionSets
.size())
3093 isl_set_free(ConditionSets
[u
]);
3101 isl::set
Scop::getPredecessorDomainConstraints(BasicBlock
*BB
, isl::set Domain
,
3104 // If @p BB is the ScopEntry we are done
3105 if (R
.getEntry() == BB
)
3106 return isl::set::universe(Domain
.get_space());
3108 // The region info of this function.
3109 auto &RI
= *R
.getRegionInfo();
3111 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, getBoxedLoops());
3113 // A domain to collect all predecessor domains, thus all conditions under
3114 // which the block is executed. To this end we start with the empty domain.
3115 isl::set PredDom
= isl::set::empty(Domain
.get_space());
3117 // Set of regions of which the entry block domain has been propagated to BB.
3118 // all predecessors inside any of the regions can be skipped.
3119 SmallSet
<Region
*, 8> PropagatedRegions
;
3121 for (auto *PredBB
: predecessors(BB
)) {
3123 if (DT
.dominates(BB
, PredBB
))
3126 // If the predecessor is in a region we used for propagation we can skip it.
3127 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
3128 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
3133 // Check if there is a valid region we can use for propagation, thus look
3134 // for a region that contains the predecessor and has @p BB as exit block.
3135 auto *PredR
= RI
.getRegionFor(PredBB
);
3136 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
3139 // If a valid region for propagation was found use the entry of that region
3140 // for propagation, otherwise the PredBB directly.
3141 if (PredR
->getExit() == BB
) {
3142 PredBB
= PredR
->getEntry();
3143 PropagatedRegions
.insert(PredR
);
3146 auto *PredBBDom
= getDomainConditions(PredBB
).release();
3147 Loop
*PredBBLoop
= getFirstNonBoxedLoopFor(PredBB
, LI
, getBoxedLoops());
3149 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
3151 PredDom
= PredDom
.unite(isl::manage(PredBBDom
));
3157 bool Scop::propagateDomainConstraints(
3158 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
3159 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
3160 // Iterate over the region R and propagate the domain constrains from the
3161 // predecessors to the current node. In contrast to the
3162 // buildDomainsWithBranchConstraints function, this one will pull the domain
3163 // information from the predecessors instead of pushing it to the successors.
3164 // Additionally, we assume the domains to be already present in the domain
3165 // map here. However, we iterate again in reverse post order so we know all
3166 // predecessors have been visited before a block or non-affine subregion is
3169 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
3170 for (auto *RN
: RTraversal
) {
3171 // Recurse for affine subregions but go on for basic blocks and non-affine
3173 if (RN
->isSubRegion()) {
3174 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
3175 if (!isNonAffineSubRegion(SubRegion
)) {
3176 if (!propagateDomainConstraints(SubRegion
, DT
, LI
, InvalidDomainMap
))
3182 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
3183 isl::set
&Domain
= DomainMap
[BB
];
3186 // Under the union of all predecessor conditions we can reach this block.
3187 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
3188 Domain
= Domain
.intersect(PredDom
).coalesce();
3189 Domain
= Domain
.align_params(getParamSpace());
3191 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
3192 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
3193 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
, InvalidDomainMap
))
3200 /// Create a map to map from a given iteration to a subsequent iteration.
3202 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3203 /// is incremented by one and all other dimensions are equal, e.g.,
3204 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3206 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3207 static __isl_give isl_map
*
3208 createNextIterationMap(__isl_take isl_space
*SetSpace
, unsigned Dim
) {
3209 auto *MapSpace
= isl_space_map_from_set(SetSpace
);
3210 auto *NextIterationMap
= isl_map_universe(isl_space_copy(MapSpace
));
3211 for (unsigned u
= 0; u
< isl_map_dim(NextIterationMap
, isl_dim_in
); u
++)
3214 isl_map_equate(NextIterationMap
, isl_dim_in
, u
, isl_dim_out
, u
);
3215 auto *C
= isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace
));
3216 C
= isl_constraint_set_constant_si(C
, 1);
3217 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, Dim
, 1);
3218 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, Dim
, -1);
3219 NextIterationMap
= isl_map_add_constraint(NextIterationMap
, C
);
3220 return NextIterationMap
;
3223 bool Scop::addLoopBoundsToHeaderDomain(
3224 Loop
*L
, LoopInfo
&LI
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
3225 int LoopDepth
= getRelativeLoopDepth(L
);
3226 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
3228 BasicBlock
*HeaderBB
= L
->getHeader();
3229 assert(DomainMap
.count(HeaderBB
));
3230 isl::set
&HeaderBBDom
= DomainMap
[HeaderBB
];
3232 isl::map NextIterationMap
= isl::manage(
3233 createNextIterationMap(HeaderBBDom
.get_space().release(), LoopDepth
));
3235 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
3237 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
3238 L
->getLoopLatches(LatchBlocks
);
3240 for (BasicBlock
*LatchBB
: LatchBlocks
) {
3241 // If the latch is only reachable via error statements we skip it.
3242 isl::set LatchBBDom
= DomainMap
.lookup(LatchBB
);
3246 isl::set BackedgeCondition
= nullptr;
3248 TerminatorInst
*TI
= LatchBB
->getTerminator();
3249 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
3250 assert(BI
&& "Only branch instructions allowed in loop latches");
3252 if (BI
->isUnconditional())
3253 BackedgeCondition
= LatchBBDom
;
3255 SmallVector
<isl_set
*, 8> ConditionSets
;
3256 int idx
= BI
->getSuccessor(0) != HeaderBB
;
3257 if (!buildConditionSets(*this, LatchBB
, TI
, L
, LatchBBDom
.get(),
3258 InvalidDomainMap
, ConditionSets
))
3261 // Free the non back edge condition set as we do not need it.
3262 isl_set_free(ConditionSets
[1 - idx
]);
3264 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
3267 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3268 assert(LatchLoopDepth
>= LoopDepth
);
3269 BackedgeCondition
= BackedgeCondition
.project_out(
3270 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
3271 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
3274 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
3275 for (int i
= 0; i
< LoopDepth
; i
++)
3276 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3278 isl::set UnionBackedgeConditionComplement
=
3279 UnionBackedgeCondition
.complement();
3280 UnionBackedgeConditionComplement
=
3281 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
3283 UnionBackedgeConditionComplement
=
3284 UnionBackedgeConditionComplement
.apply(ForwardMap
);
3285 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
3286 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
3288 auto Parts
= partitionSetParts(HeaderBBDom
.copy(), LoopDepth
);
3289 HeaderBBDom
= isl::manage(Parts
.second
);
3291 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3292 // the bounded assumptions to the context as they are already implied by the
3294 if (Affinator
.hasNSWAddRecForLoop(L
)) {
3295 isl_set_free(Parts
.first
);
3299 isl_set
*UnboundedCtx
= isl_set_params(Parts
.first
);
3300 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3301 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3305 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3306 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3308 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3309 if (!PointerBaseInst
)
3312 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3316 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3319 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3320 isl::union_map Writes
) {
3321 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3322 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3325 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3326 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3327 if (!isa
<LoadInst
>(BasePtrInst
))
3328 return contains(BasePtrInst
);
3333 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3334 if (!PollyUseRuntimeAliasChecks
)
3337 if (buildAliasGroups(AA
)) {
3338 // Aliasing assumptions do not go through addAssumption but we still want to
3339 // collect statistics so we do it here explicitly.
3340 if (MinMaxAliasGroups
.size())
3341 AssumptionsAliasing
++;
3345 // If a problem occurs while building the alias groups we need to delete
3346 // this SCoP and pretend it wasn't valid in the first place. To this end
3347 // we make the assumed context infeasible.
3348 invalidate(ALIASING
, DebugLoc());
3350 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3351 << " could not be created as the number of parameters involved "
3352 "is too high. The SCoP will be "
3353 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3354 "the maximal number of parameters but be advised that the "
3355 "compile time might increase exponentially.\n\n");
3359 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3360 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3361 AliasSetTracker
AST(AA
);
3363 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3364 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3365 for (ScopStmt
&Stmt
: *this) {
3367 isl_set
*StmtDomain
= Stmt
.getDomain().release();
3368 bool StmtDomainEmpty
= isl_set_is_empty(StmtDomain
);
3369 isl_set_free(StmtDomain
);
3371 // Statements with an empty domain will never be executed.
3372 if (StmtDomainEmpty
)
3375 for (MemoryAccess
*MA
: Stmt
) {
3376 if (MA
->isScalarKind())
3379 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3380 MemAccInst
Acc(MA
->getAccessInstruction());
3381 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3382 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3384 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3389 AliasGroupVectorTy AliasGroups
;
3390 for (AliasSet
&AS
: AST
) {
3391 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3395 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3398 AliasGroups
.push_back(std::move(AG
));
3401 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3404 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3405 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3407 AliasGroupTy
&AG
= AliasGroups
[u
];
3408 AliasGroupTy::iterator AGI
= AG
.begin();
3409 isl_set
*AGDomain
= getAccessDomain(*AGI
);
3410 while (AGI
!= AG
.end()) {
3411 MemoryAccess
*MA
= *AGI
;
3412 isl_set
*MADomain
= getAccessDomain(MA
);
3413 if (isl_set_is_disjoint(AGDomain
, MADomain
)) {
3414 NewAG
.push_back(MA
);
3415 AGI
= AG
.erase(AGI
);
3416 isl_set_free(MADomain
);
3418 AGDomain
= isl_set_union(AGDomain
, MADomain
);
3422 if (NewAG
.size() > 1)
3423 AliasGroups
.push_back(std::move(NewAG
));
3424 isl_set_free(AGDomain
);
3428 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3429 // To create sound alias checks we perform the following steps:
3430 // o) We partition each group into read only and non read only accesses.
3431 // o) For each group with more than one base pointer we then compute minimal
3432 // and maximal accesses to each array of a group in read only and non
3433 // read only partitions separately.
3434 AliasGroupVectorTy AliasGroups
;
3435 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3437 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3439 splitAliasGroupsByDomain(AliasGroups
);
3441 for (AliasGroupTy
&AG
: AliasGroups
) {
3442 if (!hasFeasibleRuntimeContext())
3446 IslMaxOperationsGuard
MaxOpGuard(getIslCtx(), OptComputeOut
);
3447 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3451 if (isl_ctx_last_error(getIslCtx()) == isl_error_quota
) {
3452 invalidate(COMPLEXITY
, DebugLoc());
3460 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3461 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3462 AliasGroupTy ReadOnlyAccesses
;
3463 AliasGroupTy ReadWriteAccesses
;
3464 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3465 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3467 if (AliasGroup
.size() < 2)
3470 for (MemoryAccess
*Access
: AliasGroup
) {
3471 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3472 Access
->getAccessInstruction())
3473 << "Possibly aliasing pointer, use restrict keyword.");
3474 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3475 if (HasWriteAccess
.count(Array
)) {
3476 ReadWriteArrays
.insert(Array
);
3477 ReadWriteAccesses
.push_back(Access
);
3479 ReadOnlyArrays
.insert(Array
);
3480 ReadOnlyAccesses
.push_back(Access
);
3484 // If there are no read-only pointers, and less than two read-write pointers,
3485 // no alias check is needed.
3486 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3489 // If there is no read-write pointer, no alias check is needed.
3490 if (ReadWriteArrays
.empty())
3493 // For non-affine accesses, no alias check can be generated as we cannot
3494 // compute a sufficiently tight lower and upper bound: bail out.
3495 for (MemoryAccess
*MA
: AliasGroup
) {
3496 if (!MA
->isAffine()) {
3497 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3498 MA
->getAccessInstruction()->getParent());
3503 // Ensure that for all memory accesses for which we generate alias checks,
3504 // their base pointers are available.
3505 for (MemoryAccess
*MA
: AliasGroup
) {
3506 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3507 addRequiredInvariantLoad(
3508 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3511 MinMaxAliasGroups
.emplace_back();
3512 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3513 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3514 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3519 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3524 // Bail out if the number of values we need to compare is too large.
3525 // This is important as the number of comparisons grows quadratically with
3526 // the number of values we need to compare.
3527 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3528 RunTimeChecksMaxArraysPerGroup
)
3532 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3540 /// Get the smallest loop that contains @p S but is not in @p S.
3541 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3542 // Start with the smallest loop containing the entry and expand that
3543 // loop until it contains all blocks in the region. If there is a loop
3544 // containing all blocks in the region check if it is itself contained
3545 // and if so take the parent loop as it will be the smallest containing
3546 // the region but not contained by it.
3547 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3549 bool AllContained
= true;
3550 for (auto *BB
: S
.blocks())
3551 AllContained
&= L
->contains(BB
);
3554 L
= L
->getParentLoop();
3557 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3560 int Scop::NextScopID
= 0;
3562 std::string
Scop::CurrentFunc
;
3564 int Scop::getNextID(std::string ParentFunc
) {
3565 if (ParentFunc
!= CurrentFunc
) {
3566 CurrentFunc
= ParentFunc
;
3569 return NextScopID
++;
3572 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3573 ScopDetection::DetectionContext
&DC
, OptimizationRemarkEmitter
&ORE
)
3574 : SE(&ScalarEvolution
), R(R
), name(R
.getNameStr()),
3575 HasSingleExitEdge(R
.getExitingBlock()), DC(DC
), ORE(ORE
),
3576 IslCtx(isl_ctx_alloc(), isl_ctx_free
), Affinator(this, LI
),
3577 ID(getNextID((*R
.getEntry()->getParent()).getName().str())) {
3578 if (IslOnErrorAbort
)
3579 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT
);
3584 isl_set_free(Context
);
3585 isl_set_free(AssumedContext
);
3586 isl_set_free(InvalidContext
);
3587 isl_schedule_free(Schedule
);
3589 ParameterIds
.clear();
3591 for (auto &AS
: RecordedAssumptions
)
3592 isl_set_free(AS
.Set
);
3594 // Free the alias groups
3595 for (MinMaxVectorPairTy
&MinMaxAccessPair
: MinMaxAliasGroups
) {
3596 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.first
) {
3597 isl_pw_multi_aff_free(MMA
.first
);
3598 isl_pw_multi_aff_free(MMA
.second
);
3600 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.second
) {
3601 isl_pw_multi_aff_free(MMA
.first
);
3602 isl_pw_multi_aff_free(MMA
.second
);
3606 for (const auto &IAClass
: InvariantEquivClasses
)
3607 isl_set_free(IAClass
.ExecutionContext
);
3609 // Explicitly release all Scop objects and the underlying isl objects before
3610 // we release the isl context.
3612 ScopArrayInfoSet
.clear();
3613 ScopArrayInfoMap
.clear();
3614 ScopArrayNameMap
.clear();
3615 AccessFunctions
.clear();
3618 void Scop::foldSizeConstantsToRight() {
3619 isl_union_set
*Accessed
= isl_union_map_range(getAccesses().release());
3621 for (auto Array
: arrays()) {
3622 if (Array
->getNumberOfDimensions() <= 1)
3625 isl_space
*Space
= Array
->getSpace().release();
3627 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3629 if (!isl_union_set_contains(Accessed
, Space
)) {
3630 isl_space_free(Space
);
3634 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3636 isl_map
*Transform
=
3637 isl_map_universe(isl_space_map_from_set(Array
->getSpace().release()));
3639 std::vector
<int> Int
;
3641 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3642 for (int i
= 0; i
< Dims
; i
++) {
3644 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3645 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3646 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3648 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3650 if (i
== Dims
- 1) {
3652 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3653 isl_basic_set_free(DimHull
);
3657 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3658 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3659 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3664 if (isl_val_is_int(Val
))
3665 ValInt
= isl_val_get_num_si(Val
);
3668 Int
.push_back(ValInt
);
3670 isl_constraint
*C
= isl_constraint_alloc_equality(
3671 isl_local_space_from_space(isl_map_get_space(Transform
)));
3672 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3673 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3674 Transform
= isl_map_add_constraint(Transform
, C
);
3675 isl_basic_set_free(DimHull
);
3679 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3680 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3683 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3687 Int
.push_back(ValInt
);
3688 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3689 isl_basic_set_free(DimHull
);
3690 isl_basic_set_free(ZeroSet
);
3693 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3695 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3696 isl_set_free(Elements
);
3697 isl_set_free(MappedElements
);
3698 isl_map_free(Transform
);
3702 isl_set_free(MappedElements
);
3704 bool CanFold
= true;
3709 unsigned NumDims
= Array
->getNumberOfDimensions();
3710 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3711 if (Int
[0] != Int
[i
] && Int
[i
])
3715 isl_set_free(Elements
);
3716 isl_map_free(Transform
);
3720 for (auto &Access
: AccessFunctions
)
3721 if (Access
->getScopArrayInfo() == Array
)
3722 Access
->setAccessRelation(Access
->getAccessRelation().apply_range(
3723 isl::manage(isl_map_copy(Transform
))));
3725 isl_map_free(Transform
);
3727 std::vector
<const SCEV
*> Sizes
;
3728 for (unsigned i
= 0; i
< NumDims
; i
++) {
3729 auto Size
= Array
->getDimensionSize(i
);
3731 if (i
== NumDims
- 1)
3732 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3733 Sizes
.push_back(Size
);
3736 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3738 isl_set_free(Elements
);
3740 isl_union_set_free(Accessed
);
3743 void Scop::markFortranArrays() {
3744 for (ScopStmt
&Stmt
: Stmts
) {
3745 for (MemoryAccess
*MemAcc
: Stmt
) {
3746 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3750 // TODO: const_cast-ing to edit
3751 ScopArrayInfo
*SAI
=
3752 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3753 assert(SAI
&& "memory access into a Fortran array does not "
3754 "have an associated ScopArrayInfo");
3755 SAI
->applyAndSetFAD(FAD
);
3760 void Scop::finalizeAccesses() {
3761 updateAccessDimensionality();
3762 foldSizeConstantsToRight();
3763 foldAccessRelations();
3764 assumeNoOutOfBounds();
3765 markFortranArrays();
3768 void Scop::updateAccessDimensionality() {
3769 // Check all array accesses for each base pointer and find a (virtual) element
3770 // size for the base pointer that divides all access functions.
3771 for (ScopStmt
&Stmt
: *this)
3772 for (MemoryAccess
*Access
: Stmt
) {
3773 if (!Access
->isArrayKind())
3775 ScopArrayInfo
*Array
=
3776 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3778 if (Array
->getNumberOfDimensions() != 1)
3780 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3781 const SCEV
*Subscript
= Access
->getSubscript(0);
3782 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3784 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3785 Array
->updateElementType(Ty
);
3788 for (auto &Stmt
: *this)
3789 for (auto &Access
: Stmt
)
3790 Access
->updateDimensionality();
3793 void Scop::foldAccessRelations() {
3794 for (auto &Stmt
: *this)
3795 for (auto &Access
: Stmt
)
3796 Access
->foldAccessRelation();
3799 void Scop::assumeNoOutOfBounds() {
3800 for (auto &Stmt
: *this)
3801 for (auto &Access
: Stmt
)
3802 Access
->assumeNoOutOfBound();
3805 void Scop::removeFromStmtMap(ScopStmt
&Stmt
) {
3806 if (Stmt
.isRegionStmt())
3807 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks()) {
3809 for (Instruction
&Inst
: *BB
)
3810 InstStmtMap
.erase(&Inst
);
3813 StmtMap
.erase(Stmt
.getBasicBlock());
3814 for (Instruction
*Inst
: Stmt
.getInstructions())
3815 InstStmtMap
.erase(Inst
);
3819 void Scop::removeStmts(std::function
<bool(ScopStmt
&)> ShouldDelete
) {
3820 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3821 if (!ShouldDelete(*StmtIt
)) {
3826 removeFromStmtMap(*StmtIt
);
3827 StmtIt
= Stmts
.erase(StmtIt
);
3831 void Scop::removeStmtNotInDomainMap() {
3832 auto ShouldDelete
= [this](ScopStmt
&Stmt
) -> bool {
3833 return !this->DomainMap
.lookup(Stmt
.getEntryBlock());
3835 removeStmts(ShouldDelete
);
3838 void Scop::simplifySCoP(bool AfterHoisting
) {
3839 auto ShouldDelete
= [AfterHoisting
](ScopStmt
&Stmt
) -> bool {
3840 bool RemoveStmt
= Stmt
.isEmpty();
3842 // Remove read only statements only after invariant load hoisting.
3843 if (!RemoveStmt
&& AfterHoisting
) {
3844 bool OnlyRead
= true;
3845 for (MemoryAccess
*MA
: Stmt
) {
3853 RemoveStmt
= OnlyRead
;
3858 removeStmts(ShouldDelete
);
3861 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3862 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3866 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3867 LInst
= cast
<LoadInst
>(Rep
);
3869 Type
*Ty
= LInst
->getType();
3870 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3871 for (auto &IAClass
: InvariantEquivClasses
) {
3872 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3875 auto &MAs
= IAClass
.InvariantAccesses
;
3876 for (auto *MA
: MAs
)
3877 if (MA
->getAccessInstruction() == Val
)
3884 bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
3885 for (const llvm::Argument
&Arg
: F
.args())
3886 if (&Arg
== maybeParam
)
3892 bool Scop::canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3893 bool MAInvalidCtxIsEmpty
,
3894 bool NonHoistableCtxIsEmpty
) {
3895 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3896 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3897 if (PollyAllowDereferenceOfAllFunctionParams
&&
3898 isAParameter(LInst
->getPointerOperand(), getFunction()))
3901 // TODO: We can provide more information for better but more expensive
3903 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3904 LInst
->getAlignment(), DL
))
3907 // If the location might be overwritten we do not hoist it unconditionally.
3909 // TODO: This is probably too conservative.
3910 if (!NonHoistableCtxIsEmpty
)
3913 // If a dereferenceable load is in a statement that is modeled precisely we
3915 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3918 // Even if the statement is not modeled precisely we can hoist the load if it
3919 // does not involve any parameters that might have been specialized by the
3920 // statement domain.
3921 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3922 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3927 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3931 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3932 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3934 // Get the context under which the statement is executed but remove the error
3935 // context under which this statement is reached.
3936 isl::set DomainCtx
= Stmt
.getDomain().params();
3937 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3939 if (isl_set_n_basic_set(DomainCtx
.get()) >= MaxDisjunctsInDomain
) {
3940 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3941 invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3945 // Project out all parameters that relate to loads in the statement. Otherwise
3946 // we could have cyclic dependences on the constraints under which the
3947 // hoisted loads are executed and we could not determine an order in which to
3948 // pre-load them. This happens because not only lower bounds are part of the
3949 // domain but also upper bounds.
3950 for (auto &InvMA
: InvMAs
) {
3951 auto *MA
= InvMA
.MA
;
3952 Instruction
*AccInst
= MA
->getAccessInstruction();
3953 if (SE
->isSCEVable(AccInst
->getType())) {
3954 SetVector
<Value
*> Values
;
3955 for (const SCEV
*Parameter
: Parameters
) {
3957 findValues(Parameter
, *SE
, Values
);
3958 if (!Values
.count(AccInst
))
3961 if (isl::id ParamId
= getIdForParam(Parameter
)) {
3962 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3964 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3970 for (auto &InvMA
: InvMAs
) {
3971 auto *MA
= InvMA
.MA
;
3972 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3974 // Check for another invariant access that accesses the same location as
3975 // MA and if found consolidate them. Otherwise create a new equivalence
3976 // class at the end of InvariantEquivClasses.
3977 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3978 Type
*Ty
= LInst
->getType();
3979 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3981 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3982 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3983 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3986 // Check if we know that this pointer can be speculatively accessed.
3987 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3988 NonHoistableCtxIsEmpty
)) {
3989 MACtx
= isl::set::universe(DomainCtx
.get_space());
3992 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3993 MACtx
= MACtx
.gist_params(getContext());
3996 bool Consolidated
= false;
3997 for (auto &IAClass
: InvariantEquivClasses
) {
3998 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
4001 // If the pointer and the type is equal check if the access function wrt.
4002 // to the domain is equal too. It can happen that the domain fixes
4003 // parameter values and these can be different for distinct part of the
4004 // SCoP. If this happens we cannot consolidate the loads but need to
4005 // create a new invariant load equivalence class.
4006 auto &MAs
= IAClass
.InvariantAccesses
;
4008 auto *LastMA
= MAs
.front();
4010 isl::set AR
= MA
->getAccessRelation().range();
4011 isl::set LastAR
= LastMA
->getAccessRelation().range();
4012 bool SameAR
= AR
.is_equal(LastAR
);
4018 // Add MA to the list of accesses that are in this class.
4021 Consolidated
= true;
4023 // Unify the execution context of the class and this statement.
4024 isl::set IAClassDomainCtx
= isl::manage(IAClass
.ExecutionContext
);
4025 if (IAClassDomainCtx
)
4026 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
4028 IAClassDomainCtx
= MACtx
;
4029 IAClass
.ExecutionContext
= IAClassDomainCtx
.release();
4036 // If we did not consolidate MA, thus did not find an equivalence class
4037 // for it, we create a new one.
4038 InvariantEquivClasses
.emplace_back(InvariantEquivClassTy
{
4039 PointerSCEV
, MemoryAccessList
{MA
}, MACtx
.release(), Ty
});
4043 /// Check if an access range is too complex.
4045 /// An access range is too complex, if it contains either many disjuncts or
4046 /// very complex expressions. As a simple heuristic, we assume if a set to
4047 /// be too complex if the sum of existentially quantified dimensions and
4048 /// set dimensions is larger than a threshold. This reliably detects both
4049 /// sets with many disjuncts as well as sets with many divisions as they
4052 /// @param AccessRange The range to check for complexity.
4054 /// @returns True if the access range is too complex.
4055 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
4056 unsigned NumTotalDims
= 0;
4058 auto CountDimensions
= [&NumTotalDims
](isl::basic_set BSet
) -> isl::stat
{
4059 NumTotalDims
+= BSet
.dim(isl::dim::div
);
4060 NumTotalDims
+= BSet
.dim(isl::dim::set
);
4061 return isl::stat::ok
;
4064 AccessRange
.foreach_basic_set(CountDimensions
);
4066 if (NumTotalDims
> MaxDimensionsInAccessRange
)
4072 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
4073 // TODO: Loads that are not loop carried, hence are in a statement with
4074 // zero iterators, are by construction invariant, though we
4075 // currently "hoist" them anyway. This is necessary because we allow
4076 // them to be treated as parameters (e.g., in conditions) and our code
4077 // generation would otherwise use the old value.
4079 auto &Stmt
= *Access
->getStatement();
4080 BasicBlock
*BB
= Stmt
.getEntryBlock();
4082 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
4083 Access
->isMemoryIntrinsic())
4086 // Skip accesses that have an invariant base pointer which is defined but
4087 // not loaded inside the SCoP. This can happened e.g., if a readnone call
4088 // returns a pointer that is used as a base address. However, as we want
4089 // to hoist indirect pointers, we allow the base pointer to be defined in
4090 // the region if it is also a memory access. Each ScopArrayInfo object
4091 // that has a base pointer origin has a base pointer that is loaded and
4092 // that it is invariant, thus it will be hoisted too. However, if there is
4093 // no base pointer origin we check that the base pointer is defined
4094 // outside the region.
4095 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
4096 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
4099 isl::map AccessRelation
= give(Access
->getAccessRelation().release());
4100 assert(!AccessRelation
.is_empty());
4102 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
4105 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
4106 isl::set SafeToLoad
;
4108 auto &DL
= getFunction().getParent()->getDataLayout();
4109 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
4111 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
4112 } else if (BB
!= LI
->getParent()) {
4113 // Skip accesses in non-affine subregions as they might not be executed
4114 // under the same condition as the entry of the non-affine subregion.
4117 SafeToLoad
= AccessRelation
.range();
4120 if (isAccessRangeTooComplex(AccessRelation
.range()))
4123 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
4124 isl::set WrittenCtx
= Written
.params();
4125 bool IsWritten
= !WrittenCtx
.is_empty();
4130 WrittenCtx
= WrittenCtx
.remove_divs();
4132 isl_set_n_basic_set(WrittenCtx
.get()) >= MaxDisjunctsInDomain
;
4133 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
4136 addAssumption(INVARIANTLOAD
, WrittenCtx
.copy(), LI
->getDebugLoc(),
4137 AS_RESTRICTION
, LI
->getParent());
4141 void Scop::verifyInvariantLoads() {
4142 auto &RIL
= getRequiredInvariantLoads();
4143 for (LoadInst
*LI
: RIL
) {
4144 assert(LI
&& contains(LI
));
4145 // If there exists a statement in the scop which has a memory access for
4146 // @p LI, then mark this scop as infeasible for optimization.
4147 for (ScopStmt
&Stmt
: Stmts
)
4148 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
4149 invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
4155 void Scop::hoistInvariantLoads() {
4156 if (!PollyInvariantLoadHoisting
)
4159 isl::union_map Writes
= getWrites();
4160 for (ScopStmt
&Stmt
: *this) {
4161 InvariantAccessesTy InvariantAccesses
;
4163 for (MemoryAccess
*Access
: Stmt
)
4164 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
4165 InvariantAccesses
.push_back({Access
, NHCtx
});
4167 // Transfer the memory access from the statement to the SCoP.
4168 for (auto InvMA
: InvariantAccesses
)
4169 Stmt
.removeMemoryAccess(InvMA
.MA
);
4170 addInvariantLoads(Stmt
, InvariantAccesses
);
4174 /// Find the canonical scop array info object for a set of invariant load
4175 /// hoisted loads. The canonical array is the one that corresponds to the
4176 /// first load in the list of accesses which is used as base pointer of a
4178 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
4179 MemoryAccessList
&Accesses
) {
4180 for (MemoryAccess
*Access
: Accesses
) {
4181 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
4182 Access
->getAccessInstruction(), MemoryKind::Array
);
4184 return CanonicalArray
;
4189 /// Check if @p Array severs as base array in an invariant load.
4190 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
4191 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
4192 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
4193 if (Access2
->getScopArrayInfo() == Array
)
4198 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4199 /// with a reference to @p New.
4200 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
4201 const ScopArrayInfo
*New
) {
4202 for (ScopStmt
&Stmt
: *S
)
4203 for (MemoryAccess
*Access
: Stmt
) {
4204 if (Access
->getLatestScopArrayInfo() != Old
)
4207 isl::id Id
= New
->getBasePtrId();
4208 isl::map Map
= Access
->getAccessRelation();
4209 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
4210 Access
->setAccessRelation(Map
);
4214 void Scop::canonicalizeDynamicBasePtrs() {
4215 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
4216 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
4218 const ScopArrayInfo
*CanonicalBasePtrSAI
=
4219 findCanonicalArray(this, BasePtrAccesses
);
4221 if (!CanonicalBasePtrSAI
)
4224 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
4225 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
4226 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
4227 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
4228 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
4231 // we currently do not canonicalize arrays where some accesses are
4232 // hoisted as invariant loads. If we would, we need to update the access
4233 // function of the invariant loads as well. However, as this is not a
4234 // very common situation, we leave this for now to avoid further
4235 // complexity increases.
4236 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
4239 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
4244 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
4245 ArrayRef
<const SCEV
*> Sizes
,
4247 const char *BaseName
) {
4248 assert((BasePtr
|| BaseName
) &&
4249 "BasePtr and BaseName can not be nullptr at the same time.");
4250 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
4251 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
4252 : ScopArrayNameMap
[BaseName
];
4254 auto &DL
= getFunction().getParent()->getDataLayout();
4255 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
4256 DL
, this, BaseName
));
4257 ScopArrayInfoSet
.insert(SAI
.get());
4259 SAI
->updateElementType(ElementType
);
4260 // In case of mismatching array sizes, we bail out by setting the run-time
4261 // context to false.
4262 if (!SAI
->updateSizes(Sizes
))
4263 invalidate(DELINEARIZATION
, DebugLoc());
4268 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
4269 const std::string
&BaseName
,
4270 const std::vector
<unsigned> &Sizes
) {
4271 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
4272 std::vector
<const SCEV
*> SCEVSizes
;
4274 for (auto size
: Sizes
)
4276 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
4278 SCEVSizes
.push_back(nullptr);
4280 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
4281 MemoryKind::Array
, BaseName
.c_str());
4285 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
4287 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
4291 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
4292 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
4293 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
4297 std::string
Scop::getContextStr() const { return getContext().to_str(); }
4299 std::string
Scop::getAssumedContextStr() const {
4300 assert(AssumedContext
&& "Assumed context not yet built");
4301 return stringFromIslObj(AssumedContext
);
4304 std::string
Scop::getInvalidContextStr() const {
4305 return stringFromIslObj(InvalidContext
);
4308 std::string
Scop::getNameStr() const {
4309 std::string ExitName
, EntryName
;
4310 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4311 return EntryName
+ "---" + ExitName
;
4314 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4315 std::string ExitName
, EntryName
;
4316 raw_string_ostream
ExitStr(ExitName
);
4317 raw_string_ostream
EntryStr(EntryName
);
4319 R
.getEntry()->printAsOperand(EntryStr
, false);
4323 R
.getExit()->printAsOperand(ExitStr
, false);
4326 ExitName
= "FunctionExit";
4328 return std::make_pair(EntryName
, ExitName
);
4331 isl::set
Scop::getContext() const { return isl::manage(isl_set_copy(Context
)); }
4332 isl::space
Scop::getParamSpace() const { return getContext().get_space(); }
4334 isl::space
Scop::getFullParamSpace() const {
4335 std::vector
<isl::id
> FortranIDs
;
4336 FortranIDs
= getFortranArrayIds(arrays());
4338 isl::space Space
= isl::space::params_alloc(
4339 getIslCtx(), ParameterIds
.size() + FortranIDs
.size());
4342 for (const SCEV
*Parameter
: Parameters
) {
4343 isl::id Id
= getIdForParam(Parameter
);
4344 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4347 for (isl::id Id
: FortranIDs
)
4348 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4353 isl::set
Scop::getAssumedContext() const {
4354 assert(AssumedContext
&& "Assumed context not yet built");
4355 return isl::manage(isl_set_copy(AssumedContext
));
4358 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4359 if (PollyProcessUnprofitable
)
4365 unsigned OptimizableStmtsOrLoops
= 0;
4366 for (auto &Stmt
: *this) {
4367 if (Stmt
.getNumIterators() == 0)
4370 bool ContainsArrayAccs
= false;
4371 bool ContainsScalarAccs
= false;
4372 for (auto *MA
: Stmt
) {
4375 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4376 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4379 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4380 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4383 return OptimizableStmtsOrLoops
> 1;
4386 bool Scop::hasFeasibleRuntimeContext() const {
4387 auto *PositiveContext
= getAssumedContext().release();
4388 auto *NegativeContext
= getInvalidContext().release();
4390 addNonEmptyDomainConstraints(isl::manage(PositiveContext
)).release();
4391 bool IsFeasible
= !(isl_set_is_empty(PositiveContext
) ||
4392 isl_set_is_subset(PositiveContext
, NegativeContext
));
4393 isl_set_free(PositiveContext
);
4395 isl_set_free(NegativeContext
);
4399 auto *DomainContext
= isl_union_set_params(getDomains().release());
4400 IsFeasible
= !isl_set_is_subset(DomainContext
, NegativeContext
);
4401 IsFeasible
&= !isl_set_is_subset(Context
, NegativeContext
);
4402 isl_set_free(NegativeContext
);
4403 isl_set_free(DomainContext
);
4408 static std::string
toString(AssumptionKind Kind
) {
4411 return "No-aliasing";
4415 return "No-overflows";
4417 return "Signed-unsigned";
4419 return "Low complexity";
4421 return "Profitable";
4425 return "Finite loop";
4427 return "Invariant load";
4428 case DELINEARIZATION
:
4429 return "Delinearization";
4431 llvm_unreachable("Unknown AssumptionKind!");
4434 bool Scop::isEffectiveAssumption(__isl_keep isl_set
*Set
, AssumptionSign Sign
) {
4435 if (Sign
== AS_ASSUMPTION
) {
4436 if (isl_set_is_subset(Context
, Set
))
4439 if (isl_set_is_subset(AssumedContext
, Set
))
4442 if (isl_set_is_disjoint(Set
, Context
))
4445 if (isl_set_is_subset(Set
, InvalidContext
))
4451 bool Scop::trackAssumption(AssumptionKind Kind
, __isl_keep isl_set
*Set
,
4452 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4453 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4456 // Do never emit trivial assumptions as they only clutter the output.
4457 if (!PollyRemarksMinimal
) {
4458 isl_set
*Univ
= nullptr;
4459 if (Sign
== AS_ASSUMPTION
)
4460 Univ
= isl_set_universe(isl_set_get_space(Set
));
4462 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& isl_set_is_empty(Set
)) ||
4463 (Sign
== AS_ASSUMPTION
&& isl_set_is_equal(Univ
, Set
));
4472 AssumptionsAliasing
++;
4475 AssumptionsInbounds
++;
4478 AssumptionsWrapping
++;
4481 AssumptionsUnsigned
++;
4484 AssumptionsComplexity
++;
4487 AssumptionsUnprofitable
++;
4490 AssumptionsErrorBlock
++;
4493 AssumptionsInfiniteLoop
++;
4496 AssumptionsInvariantLoad
++;
4498 case DELINEARIZATION
:
4499 AssumptionsDelinearization
++;
4503 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4504 std::string Msg
= toString(Kind
) + Suffix
+ stringFromIslObj(Set
);
4506 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
, BB
)
4509 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
,
4515 void Scop::addAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4516 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4517 // Simplify the assumptions/restrictions first.
4518 Set
= isl_set_gist_params(Set
, getContext().release());
4520 if (!trackAssumption(Kind
, Set
, Loc
, Sign
, BB
)) {
4525 if (Sign
== AS_ASSUMPTION
) {
4526 AssumedContext
= isl_set_intersect(AssumedContext
, Set
);
4527 AssumedContext
= isl_set_coalesce(AssumedContext
);
4529 InvalidContext
= isl_set_union(InvalidContext
, Set
);
4530 InvalidContext
= isl_set_coalesce(InvalidContext
);
4534 void Scop::recordAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4535 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4536 assert((isl_set_is_params(Set
) || BB
) &&
4537 "Assumptions without a basic block must be parameter sets");
4538 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4541 void Scop::addRecordedAssumptions() {
4542 while (!RecordedAssumptions
.empty()) {
4543 const Assumption
&AS
= RecordedAssumptions
.pop_back_val();
4546 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
, nullptr /* BasicBlock */);
4550 // If the domain was deleted the assumptions are void.
4551 isl_set
*Dom
= getDomainConditions(AS
.BB
).release();
4553 isl_set_free(AS
.Set
);
4557 // If a basic block was given use its domain to simplify the assumption.
4558 // In case of restrictions we know they only have to hold on the domain,
4559 // thus we can intersect them with the domain of the block. However, for
4560 // assumptions the domain has to imply them, thus:
4562 // Dom => S <==> A v B <==> A - B
4564 // To avoid the complement we will register A - B as a restriction not an
4566 isl_set
*S
= AS
.Set
;
4567 if (AS
.Sign
== AS_RESTRICTION
)
4568 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4569 else /* (AS.Sign == AS_ASSUMPTION) */
4570 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4572 addAssumption(AS
.Kind
, S
, AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
4576 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
, BasicBlock
*BB
) {
4577 DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind
<< "\n");
4578 addAssumption(Kind
, isl_set_empty(getParamSpace().release()), Loc
,
4582 isl::set
Scop::getInvalidContext() const {
4583 return isl::manage(isl_set_copy(InvalidContext
));
4586 void Scop::printContext(raw_ostream
&OS
) const {
4588 OS
.indent(4) << Context
<< "\n";
4590 OS
.indent(4) << "Assumed Context:\n";
4591 OS
.indent(4) << AssumedContext
<< "\n";
4593 OS
.indent(4) << "Invalid Context:\n";
4594 OS
.indent(4) << InvalidContext
<< "\n";
4597 for (const SCEV
*Parameter
: Parameters
)
4598 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4601 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4603 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4604 if (Pair
.second
.size() == 0)
4607 noOfGroups
+= Pair
.second
.size();
4610 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4611 if (MinMaxAliasGroups
.empty()) {
4612 OS
.indent(8) << "n/a\n";
4616 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4618 // If the group has no read only accesses print the write accesses.
4619 if (Pair
.second
.empty()) {
4620 OS
.indent(8) << "[[";
4621 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4622 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4628 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4629 OS
.indent(8) << "[[";
4630 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4631 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4632 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4640 void Scop::printStatements(raw_ostream
&OS
, bool PrintInstructions
) const {
4641 OS
<< "Statements {\n";
4643 for (const ScopStmt
&Stmt
: *this) {
4645 Stmt
.print(OS
, PrintInstructions
);
4648 OS
.indent(4) << "}\n";
4651 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4654 for (auto &Array
: arrays())
4657 OS
.indent(4) << "}\n";
4659 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4661 for (auto &Array
: arrays())
4662 Array
->print(OS
, /* SizeAsPwAff */ true);
4664 OS
.indent(4) << "}\n";
4667 void Scop::print(raw_ostream
&OS
, bool PrintInstructions
) const {
4668 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4669 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4670 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4671 OS
.indent(4) << "Invariant Accesses: {\n";
4672 for (const auto &IAClass
: InvariantEquivClasses
) {
4673 const auto &MAs
= IAClass
.InvariantAccesses
;
4675 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4677 MAs
.front()->print(OS
);
4678 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4682 OS
.indent(4) << "}\n";
4683 printContext(OS
.indent(4));
4684 printArrayInfo(OS
.indent(4));
4685 printAliasAssumptions(OS
);
4686 printStatements(OS
.indent(4), PrintInstructions
);
4689 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4690 LLVM_DUMP_METHOD
void Scop::dump() const { print(dbgs(), true); }
4693 isl_ctx
*Scop::getIslCtx() const { return IslCtx
.get(); }
4695 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4697 // First try to use the SCEVAffinator to generate a piecewise defined
4698 // affine function from @p E in the context of @p BB. If that tasks becomes to
4699 // complex the affinator might return a nullptr. In such a case we invalidate
4700 // the SCoP and return a dummy value. This way we do not need to add error
4701 // handling code to all users of this function.
4702 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4704 // TODO: We could use a heuristic and either use:
4705 // SCEVAffinator::takeNonNegativeAssumption
4707 // SCEVAffinator::interpretAsUnsigned
4708 // to deal with unsigned or "NonNegative" SCEVs.
4710 Affinator
.takeNonNegativeAssumption(PWAC
);
4714 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4715 invalidate(COMPLEXITY
, DL
, BB
);
4716 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4719 isl::union_set
Scop::getDomains() const {
4720 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx(), 0);
4721 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4723 for (const ScopStmt
&Stmt
: *this)
4724 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain().release());
4726 return isl::manage(Domain
);
4729 isl::pw_aff
Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4730 PWACtx PWAC
= getPwAff(E
, BB
);
4731 isl_set_free(PWAC
.second
);
4732 return isl::manage(PWAC
.first
);
4736 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4737 isl::union_map Accesses
= isl::union_map::empty(getParamSpace());
4739 for (ScopStmt
&Stmt
: *this) {
4740 for (MemoryAccess
*MA
: Stmt
) {
4741 if (!Predicate(*MA
))
4744 isl::set Domain
= Stmt
.getDomain();
4745 isl::map AccessDomain
= MA
->getAccessRelation();
4746 AccessDomain
= AccessDomain
.intersect_domain(Domain
);
4747 Accesses
= Accesses
.add_map(AccessDomain
);
4751 return Accesses
.coalesce();
4754 isl::union_map
Scop::getMustWrites() {
4755 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4758 isl::union_map
Scop::getMayWrites() {
4759 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4762 isl::union_map
Scop::getWrites() {
4763 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4766 isl::union_map
Scop::getReads() {
4767 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4770 isl::union_map
Scop::getAccesses() {
4771 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4774 isl::union_map
Scop::getAccesses(ScopArrayInfo
*Array
) {
4775 return getAccessesOfType(
4776 [Array
](MemoryAccess
&MA
) { return MA
.getScopArrayInfo() == Array
; });
4779 // Check whether @p Node is an extension node.
4781 // @return true if @p Node is an extension node.
4782 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4783 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4784 return isl_bool_error
;
4786 return isl_bool_true
;
4789 bool Scop::containsExtensionNode(__isl_keep isl_schedule
*Schedule
) {
4790 return isl_schedule_foreach_schedule_node_top_down(Schedule
, isNotExtNode
,
4791 nullptr) == isl_stat_error
;
4794 isl::union_map
Scop::getSchedule() const {
4795 auto *Tree
= getScheduleTree().release();
4796 if (containsExtensionNode(Tree
)) {
4797 isl_schedule_free(Tree
);
4800 auto *S
= isl_schedule_get_map(Tree
);
4801 isl_schedule_free(Tree
);
4802 return isl::manage(S
);
4805 isl::schedule
Scop::getScheduleTree() const {
4806 return isl::manage(isl_schedule_intersect_domain(isl_schedule_copy(Schedule
),
4807 getDomains().release()));
4810 void Scop::setSchedule(__isl_take isl_union_map
*NewSchedule
) {
4811 auto *S
= isl_schedule_from_domain(getDomains().release());
4812 S
= isl_schedule_insert_partial_schedule(
4813 S
, isl_multi_union_pw_aff_from_union_map(NewSchedule
));
4814 isl_schedule_free(Schedule
);
4818 void Scop::setScheduleTree(__isl_take isl_schedule
*NewSchedule
) {
4819 isl_schedule_free(Schedule
);
4820 Schedule
= NewSchedule
;
4823 bool Scop::restrictDomains(isl::union_set Domain
) {
4824 bool Changed
= false;
4825 for (ScopStmt
&Stmt
: *this) {
4826 isl::union_set StmtDomain
= isl::union_set(Stmt
.getDomain());
4827 isl::union_set NewStmtDomain
= StmtDomain
.intersect(Domain
);
4829 if (StmtDomain
.is_subset(NewStmtDomain
))
4834 NewStmtDomain
= NewStmtDomain
.coalesce();
4836 if (NewStmtDomain
.is_empty())
4837 Stmt
.restrictDomain(isl::set::empty(Stmt
.getDomainSpace()));
4839 Stmt
.restrictDomain(isl::set(NewStmtDomain
));
4844 ScalarEvolution
*Scop::getSE() const { return SE
; }
4846 // Create an isl_multi_union_aff that defines an identity mapping from the
4847 // elements of USet to their N-th dimension.
4851 // Domain: { A[i,j]; B[i,j,k] }
4854 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4856 // @param USet A union set describing the elements for which to generate a
4858 // @param N The dimension to map to.
4859 // @returns A mapping from USet to its N-th dimension.
4860 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4863 assert(!USet
.is_empty());
4865 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4867 auto Lambda
= [&Result
, N
](isl::set S
) -> isl::stat
{
4868 int Dim
= S
.dim(isl::dim::set
);
4869 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4872 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4874 Result
= Result
.add_pw_multi_aff(PMA
);
4875 return isl::stat::ok
;
4878 isl::stat Res
= USet
.foreach_set(Lambda
);
4881 assert(Res
== isl::stat::ok
);
4883 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4886 void Scop::addScopStmt(BasicBlock
*BB
, Loop
*SurroundingLoop
,
4887 std::vector
<Instruction
*> Instructions
, int Count
) {
4888 assert(BB
&& "Unexpected nullptr!");
4889 Stmts
.emplace_back(*this, *BB
, SurroundingLoop
, Instructions
, Count
);
4890 auto *Stmt
= &Stmts
.back();
4891 StmtMap
[BB
].push_back(Stmt
);
4892 for (Instruction
*Inst
: Instructions
) {
4893 assert(!InstStmtMap
.count(Inst
) &&
4894 "Unexpected statement corresponding to the instruction.");
4895 InstStmtMap
[Inst
] = Stmt
;
4899 void Scop::addScopStmt(Region
*R
, Loop
*SurroundingLoop
) {
4900 assert(R
&& "Unexpected nullptr!");
4901 Stmts
.emplace_back(*this, *R
, SurroundingLoop
);
4902 auto *Stmt
= &Stmts
.back();
4903 for (BasicBlock
*BB
: R
->blocks()) {
4904 StmtMap
[BB
].push_back(Stmt
);
4905 for (Instruction
&Inst
: *BB
) {
4906 assert(!InstStmtMap
.count(&Inst
) &&
4907 "Unexpected statement corresponding to the instruction.");
4908 InstStmtMap
[&Inst
] = Stmt
;
4913 ScopStmt
*Scop::addScopStmt(isl::map SourceRel
, isl::map TargetRel
,
4916 isl::set SourceDomain
= SourceRel
.domain();
4917 isl::set TargetDomain
= TargetRel
.domain();
4918 assert(Domain
.is_subset(TargetDomain
) &&
4919 "Target access not defined for complete statement domain");
4920 assert(Domain
.is_subset(SourceDomain
) &&
4921 "Source access not defined for complete statement domain");
4923 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4925 return &(Stmts
.back());
4928 void Scop::buildSchedule(LoopInfo
&LI
) {
4929 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4930 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4931 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4932 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4933 Schedule
= LoopStack
[0].Schedule
;
4936 /// To generate a schedule for the elements in a Region we traverse the Region
4937 /// in reverse-post-order and add the contained RegionNodes in traversal order
4938 /// to the schedule of the loop that is currently at the top of the LoopStack.
4939 /// For loop-free codes, this results in a correct sequential ordering.
4950 /// Including loops requires additional processing. Whenever a loop header is
4951 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4952 /// from an empty schedule, we first process all RegionNodes that are within
4953 /// this loop and complete the sequential schedule at this loop-level before
4954 /// processing about any other nodes. To implement this
4955 /// loop-nodes-first-processing, the reverse post-order traversal is
4956 /// insufficient. Hence, we additionally check if the traversal yields
4957 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4958 /// These region-nodes are then queue and only traverse after the all nodes
4959 /// within the current loop have been processed.
4960 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4961 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4963 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4964 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4965 std::deque
<RegionNode
*> DelayList
;
4966 bool LastRNWaiting
= false;
4968 // Iterate over the region @p R in reverse post-order but queue
4969 // sub-regions/blocks iff they are not part of the last encountered but not
4970 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4971 // that we queued the last sub-region/block from the reverse post-order
4972 // iterator. If it is set we have to explore the next sub-region/block from
4973 // the iterator (if any) to guarantee progress. If it is not set we first try
4974 // the next queued sub-region/blocks.
4975 while (!WorkList
.empty() || !DelayList
.empty()) {
4978 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
4979 RN
= WorkList
.front();
4980 WorkList
.pop_front();
4981 LastRNWaiting
= false;
4983 RN
= DelayList
.front();
4984 DelayList
.pop_front();
4987 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4991 Loop
*LastLoop
= LoopStack
.back().L
;
4992 if (LastLoop
!= L
) {
4993 if (LastLoop
&& !LastLoop
->contains(L
)) {
4994 LastRNWaiting
= true;
4995 DelayList
.push_back(RN
);
4998 LoopStack
.push_back({L
, nullptr, 0});
5000 buildSchedule(RN
, LoopStack
, LI
);
5004 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
5005 if (RN
->isSubRegion()) {
5006 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
5007 if (!isNonAffineSubRegion(LocalRegion
)) {
5008 buildSchedule(LocalRegion
, LoopStack
, LI
);
5013 auto &LoopData
= LoopStack
.back();
5014 LoopData
.NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
5016 for (auto *Stmt
: getStmtListFor(RN
)) {
5017 auto *UDomain
= isl_union_set_from_set(Stmt
->getDomain().release());
5018 auto *StmtSchedule
= isl_schedule_from_domain(UDomain
);
5019 LoopData
.Schedule
= combineInSequence(LoopData
.Schedule
, StmtSchedule
);
5022 // Check if we just processed the last node in this loop. If we did, finalize
5025 // - adding new schedule dimensions
5026 // - folding the resulting schedule into the parent loop schedule
5027 // - dropping the loop schedule from the LoopStack.
5029 // Then continue to check surrounding loops, which might also have been
5030 // completed by this node.
5031 while (LoopData
.L
&&
5032 LoopData
.NumBlocksProcessed
== getNumBlocksInLoop(LoopData
.L
)) {
5033 auto *Schedule
= LoopData
.Schedule
;
5034 auto NumBlocksProcessed
= LoopData
.NumBlocksProcessed
;
5036 LoopStack
.pop_back();
5037 auto &NextLoopData
= LoopStack
.back();
5040 isl::union_set Domain
= give(isl_schedule_get_domain(Schedule
));
5041 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, LoopStack
.size());
5042 Schedule
= isl_schedule_insert_partial_schedule(Schedule
, MUPA
.release());
5043 NextLoopData
.Schedule
=
5044 combineInSequence(NextLoopData
.Schedule
, Schedule
);
5047 NextLoopData
.NumBlocksProcessed
+= NumBlocksProcessed
;
5048 LoopData
= NextLoopData
;
5052 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(BasicBlock
*BB
) const {
5053 auto StmtMapIt
= StmtMap
.find(BB
);
5054 if (StmtMapIt
== StmtMap
.end())
5056 return StmtMapIt
->second
;
5059 ScopStmt
*Scop::getLastStmtFor(BasicBlock
*BB
) const {
5060 ArrayRef
<ScopStmt
*> StmtList
= getStmtListFor(BB
);
5061 if (!StmtList
.empty())
5062 return StmtList
.back();
5066 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(RegionNode
*RN
) const {
5067 if (RN
->isSubRegion())
5068 return getStmtListFor(RN
->getNodeAs
<Region
>());
5069 return getStmtListFor(RN
->getNodeAs
<BasicBlock
>());
5072 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(Region
*R
) const {
5073 return getStmtListFor(R
->getEntry());
5076 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
5077 if (!L
|| !R
.contains(L
))
5079 // outermostLoopInRegion always returns nullptr for top level regions
5080 if (R
.isTopLevelRegion()) {
5081 // LoopInfo's depths start at 1, we start at 0
5082 return L
->getLoopDepth() - 1;
5084 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
5086 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
5090 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
5091 for (auto &SAI
: arrays()) {
5092 if (SAI
->getName() == BaseName
)
5098 void Scop::addAccessData(MemoryAccess
*Access
) {
5099 const ScopArrayInfo
*SAI
= Access
->getOriginalScopArrayInfo();
5100 assert(SAI
&& "can only use after access relations have been constructed");
5102 if (Access
->isOriginalValueKind() && Access
->isRead())
5103 ValueUseAccs
[SAI
].push_back(Access
);
5104 else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite())
5105 PHIIncomingAccs
[SAI
].push_back(Access
);
5108 void Scop::removeAccessData(MemoryAccess
*Access
) {
5109 if (Access
->isOriginalValueKind() && Access
->isRead()) {
5110 auto &Uses
= ValueUseAccs
[Access
->getScopArrayInfo()];
5111 std::remove(Uses
.begin(), Uses
.end(), Access
);
5112 } else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite()) {
5113 auto &Incomings
= PHIIncomingAccs
[Access
->getScopArrayInfo()];
5114 std::remove(Incomings
.begin(), Incomings
.end(), Access
);
5118 MemoryAccess
*Scop::getValueDef(const ScopArrayInfo
*SAI
) const {
5119 assert(SAI
->isValueKind());
5121 Instruction
*Val
= dyn_cast
<Instruction
>(SAI
->getBasePtr());
5125 ScopStmt
*Stmt
= getStmtFor(Val
);
5129 return Stmt
->lookupValueWriteOf(Val
);
5132 ArrayRef
<MemoryAccess
*> Scop::getValueUses(const ScopArrayInfo
*SAI
) const {
5133 assert(SAI
->isValueKind());
5134 auto It
= ValueUseAccs
.find(SAI
);
5135 if (It
== ValueUseAccs
.end())
5140 MemoryAccess
*Scop::getPHIRead(const ScopArrayInfo
*SAI
) const {
5141 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
5143 if (SAI
->isExitPHIKind())
5146 PHINode
*PHI
= cast
<PHINode
>(SAI
->getBasePtr());
5147 ScopStmt
*Stmt
= getStmtFor(PHI
);
5148 assert(Stmt
&& "PHINode must be within the SCoP");
5150 return Stmt
->lookupPHIReadOf(PHI
);
5153 ArrayRef
<MemoryAccess
*> Scop::getPHIIncomings(const ScopArrayInfo
*SAI
) const {
5154 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
5155 auto It
= PHIIncomingAccs
.find(SAI
);
5156 if (It
== PHIIncomingAccs
.end())
5161 bool Scop::isEscaping(Instruction
*Inst
) {
5162 assert(contains(Inst
) && "The concept of escaping makes only sense for "
5163 "values defined inside the SCoP");
5165 for (Use
&Use
: Inst
->uses()) {
5166 BasicBlock
*UserBB
= getUseBlock(Use
);
5167 if (!contains(UserBB
))
5170 // When the SCoP region exit needs to be simplified, PHIs in the region exit
5171 // move to a new basic block such that its incoming blocks are not in the
5173 if (hasSingleExitEdge() && isa
<PHINode
>(Use
.getUser()) &&
5174 isExit(cast
<PHINode
>(Use
.getUser())->getParent()))
5180 Scop::ScopStatistics
Scop::getStatistics() const {
5181 ScopStatistics Result
;
5182 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5183 auto LoopStat
= ScopDetection::countBeneficialLoops(&R
, *SE
, *getLI(), 0);
5185 int NumTotalLoops
= LoopStat
.NumLoops
;
5186 Result
.NumBoxedLoops
= getBoxedLoops().size();
5187 Result
.NumAffineLoops
= NumTotalLoops
- Result
.NumBoxedLoops
;
5189 for (const ScopStmt
&Stmt
: *this) {
5190 isl::set Domain
= Stmt
.getDomain().intersect_params(getContext());
5191 bool IsInLoop
= Stmt
.getNumIterators() >= 1;
5192 for (MemoryAccess
*MA
: Stmt
) {
5196 if (MA
->isLatestValueKind()) {
5197 Result
.NumValueWrites
+= 1;
5199 Result
.NumValueWritesInLoops
+= 1;
5202 if (MA
->isLatestAnyPHIKind()) {
5203 Result
.NumPHIWrites
+= 1;
5205 Result
.NumPHIWritesInLoops
+= 1;
5209 MA
->getAccessRelation().intersect_domain(Domain
).range();
5210 if (AccSet
.is_singleton()) {
5211 Result
.NumSingletonWrites
+= 1;
5213 Result
.NumSingletonWritesInLoops
+= 1;
5221 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const Scop
&scop
) {
5222 scop
.print(OS
, PollyPrintInstructions
);
5226 //===----------------------------------------------------------------------===//
5227 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5228 AU
.addRequired
<LoopInfoWrapperPass
>();
5229 AU
.addRequired
<RegionInfoPass
>();
5230 AU
.addRequired
<DominatorTreeWrapperPass
>();
5231 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5232 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5233 AU
.addRequired
<AAResultsWrapperPass
>();
5234 AU
.addRequired
<AssumptionCacheTracker
>();
5235 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5236 AU
.setPreservesAll();
5239 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
,
5240 Scop::ScopStatistics ScopStats
) {
5241 assert(Stats
.NumLoops
== ScopStats
.NumAffineLoops
+ ScopStats
.NumBoxedLoops
);
5244 NumLoopsInScop
+= Stats
.NumLoops
;
5246 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
5248 if (Stats
.MaxDepth
== 1)
5250 else if (Stats
.MaxDepth
== 2)
5252 else if (Stats
.MaxDepth
== 3)
5253 NumScopsDepthThree
++;
5254 else if (Stats
.MaxDepth
== 4)
5255 NumScopsDepthFour
++;
5256 else if (Stats
.MaxDepth
== 5)
5257 NumScopsDepthFive
++;
5259 NumScopsDepthLarger
++;
5261 NumAffineLoops
+= ScopStats
.NumAffineLoops
;
5262 NumBoxedLoops
+= ScopStats
.NumBoxedLoops
;
5264 NumValueWrites
+= ScopStats
.NumValueWrites
;
5265 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
5266 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
5267 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
5268 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
5269 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
5272 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
5273 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5275 if (!SD
.isMaxRegionInScop(*R
))
5278 Function
*F
= R
->getEntry()->getParent();
5279 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5280 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5281 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5282 auto const &DL
= F
->getParent()->getDataLayout();
5283 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5284 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
5285 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5287 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5288 S
= SB
.getScop(); // take ownership of scop object
5290 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5292 ScopDetection::LoopStats Stats
=
5293 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5294 updateLoopCountStatistic(Stats
, S
->getStatistics());
5301 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
5303 S
->print(OS
, PollyPrintInstructions
);
5305 OS
<< "Invalid Scop!\n";
5308 char ScopInfoRegionPass::ID
= 0;
5310 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5312 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
5313 "Polly - Create polyhedral description of Scops", false,
5315 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5316 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5317 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5318 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5319 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5320 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5321 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5322 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
5323 "Polly - Create polyhedral description of Scops", false,
5326 //===----------------------------------------------------------------------===//
5327 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
5328 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
5329 AssumptionCache
&AC
, OptimizationRemarkEmitter
&ORE
)
5330 : DL(DL
), SD(SD
), SE(SE
), LI(LI
), AA(AA
), DT(DT
), AC(AC
), ORE(ORE
) {
5334 void ScopInfo::recompute() {
5335 RegionToScopMap
.clear();
5336 /// Create polyhedral description of scops for all the valid regions of a
5338 for (auto &It
: SD
) {
5339 Region
*R
= const_cast<Region
*>(It
);
5340 if (!SD
.isMaxRegionInScop(*R
))
5343 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5344 std::unique_ptr
<Scop
> S
= SB
.getScop();
5347 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5348 ScopDetection::LoopStats Stats
=
5349 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5350 updateLoopCountStatistic(Stats
, S
->getStatistics());
5352 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5353 assert(Inserted
&& "Building Scop for the same region twice!");
5358 bool ScopInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
5359 FunctionAnalysisManager::Invalidator
&Inv
) {
5360 // Check whether the analysis, all analyses on functions have been preserved
5361 // or anything we're holding references to is being invalidated
5362 auto PAC
= PA
.getChecker
<ScopInfoAnalysis
>();
5363 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>()) ||
5364 Inv
.invalidate
<ScopAnalysis
>(F
, PA
) ||
5365 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
5366 Inv
.invalidate
<LoopAnalysis
>(F
, PA
) ||
5367 Inv
.invalidate
<AAManager
>(F
, PA
) ||
5368 Inv
.invalidate
<DominatorTreeAnalysis
>(F
, PA
) ||
5369 Inv
.invalidate
<AssumptionAnalysis
>(F
, PA
);
5372 AnalysisKey
ScopInfoAnalysis::Key
;
5374 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5375 FunctionAnalysisManager
&FAM
) {
5376 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5377 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5378 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5379 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5380 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5381 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5382 auto &DL
= F
.getParent()->getDataLayout();
5383 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
5384 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
};
5387 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5388 FunctionAnalysisManager
&FAM
) {
5389 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5390 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5391 // order here to keep the output persistent
5392 for (auto &It
: reverse(SI
)) {
5394 It
.second
->print(Stream
, PollyPrintInstructions
);
5396 Stream
<< "Invalid Scop!\n";
5398 return PreservedAnalyses::all();
5401 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5402 AU
.addRequired
<LoopInfoWrapperPass
>();
5403 AU
.addRequired
<RegionInfoPass
>();
5404 AU
.addRequired
<DominatorTreeWrapperPass
>();
5405 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5406 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5407 AU
.addRequired
<AAResultsWrapperPass
>();
5408 AU
.addRequired
<AssumptionCacheTracker
>();
5409 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5410 AU
.setPreservesAll();
5413 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5414 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5415 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5416 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5417 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5418 auto const &DL
= F
.getParent()->getDataLayout();
5419 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5420 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5421 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5423 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
});
5427 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5428 for (auto &It
: *Result
) {
5430 It
.second
->print(OS
, PollyPrintInstructions
);
5432 OS
<< "Invalid Scop!\n";
5436 char ScopInfoWrapperPass::ID
= 0;
5438 Pass
*polly::createScopInfoWrapperPassPass() {
5439 return new ScopInfoWrapperPass();
5442 INITIALIZE_PASS_BEGIN(
5443 ScopInfoWrapperPass
, "polly-function-scops",
5444 "Polly - Create polyhedral description of all Scops of a function", false,
5446 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5447 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5448 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5449 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5450 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5451 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5452 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5453 INITIALIZE_PASS_END(
5454 ScopInfoWrapperPass
, "polly-function-scops",
5455 "Polly - Create polyhedral description of all Scops of a function", false,