1 //===- ScopInfo.cpp -------------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
106 using namespace llvm
;
107 using namespace polly
;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad
,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization
,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops
, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops
, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops
, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
130 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
131 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
132 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
133 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
134 STATISTIC(NumScopsDepthLarger
,
135 "Number of scops with maximal loop depth 6 and larger");
136 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
138 STATISTIC(NumValueWrites
, "Number of scalar value writes after ScopInfo");
140 NumValueWritesInLoops
,
141 "Number of scalar value writes nested in affine loops after ScopInfo");
142 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after ScopInfo");
143 STATISTIC(NumPHIWritesInLoops
,
144 "Number of scalar phi writes nested in affine loops after ScopInfo");
145 STATISTIC(NumSingletonWrites
, "Number of singleton writes after ScopInfo");
146 STATISTIC(NumSingletonWritesInLoops
,
147 "Number of singleton writes nested in affine loops after ScopInfo");
149 // The maximal number of basic sets we allow during domain construction to
150 // be created. More complex scops will result in very high compile time and
151 // are also unlikely to result in good code
152 static int const MaxDisjunctsInDomain
= 20;
154 // The number of disjunct in the context after which we stop to add more
155 // disjuncts. This parameter is there to avoid exponential growth in the
156 // number of disjunct when adding non-convex sets to the context.
157 static int const MaxDisjunctsInContext
= 4;
159 // The maximal number of dimensions we allow during invariant load construction.
160 // More complex access ranges will result in very high compile time and are also
161 // unlikely to result in good code. This value is very high and should only
162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
163 static int const MaxDimensionsInAccessRange
= 9;
166 OptComputeOut("polly-analysis-computeout",
167 cl::desc("Bound the scop analysis by a maximal amount of "
168 "computational steps (0 means no bound)"),
169 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
170 cl::cat(PollyCategory
));
172 static cl::opt
<bool> PollyRemarksMinimal(
173 "polly-remarks-minimal",
174 cl::desc("Do not emit remarks about assumptions that are known"),
175 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
177 // Multiplicative reductions can be disabled separately as these kind of
178 // operations can overflow easily. Additive reductions and bit operations
179 // are in contrast pretty stable.
180 static cl::opt
<bool> DisableMultiplicativeReductions(
181 "polly-disable-multiplicative-reductions",
182 cl::desc("Disable multiplicative reductions"), cl::Hidden
, cl::ZeroOrMore
,
183 cl::init(false), cl::cat(PollyCategory
));
185 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
186 "polly-rtc-max-array-disjuncts",
187 cl::desc("The maximal number of disjunts allowed in memory accesses to "
189 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
191 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
192 "polly-rtc-max-parameters",
193 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
194 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
196 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
197 "polly-rtc-max-arrays-per-group",
198 cl::desc("The maximal number of arrays to compare in each alias group."),
199 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
201 static cl::opt
<std::string
> UserContextStr(
202 "polly-context", cl::value_desc("isl parameter set"),
203 cl::desc("Provide additional constraints on the context parameters"),
204 cl::init(""), cl::cat(PollyCategory
));
206 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
207 cl::desc("Detect and exploit reductions"),
208 cl::Hidden
, cl::ZeroOrMore
,
209 cl::init(true), cl::cat(PollyCategory
));
212 IslOnErrorAbort("polly-on-isl-error-abort",
213 cl::desc("Abort if an isl error is encountered"),
214 cl::init(true), cl::cat(PollyCategory
));
216 static cl::opt
<bool> PollyPreciseInbounds(
217 "polly-precise-inbounds",
218 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
219 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
222 PollyIgnoreInbounds("polly-ignore-inbounds",
223 cl::desc("Do not take inbounds assumptions at all"),
224 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
226 static cl::opt
<bool> PollyIgnoreParamBounds(
227 "polly-ignore-parameter-bounds",
229 "Do not add parameter bounds and do no gist simplify sets accordingly"),
230 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
232 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
233 "polly-allow-dereference-of-all-function-parameters",
235 "Treat all parameters to functions that are pointers as dereferencible."
236 " This is useful for invariant load hoisting, since we can generate"
237 " less runtime checks. This is only valid if all pointers to functions"
238 " are always initialized, so that Polly can choose to hoist"
240 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
242 static cl::opt
<bool> PollyPreciseFoldAccesses(
243 "polly-precise-fold-accesses",
244 cl::desc("Fold memory accesses to model more possible delinearizations "
245 "(does not scale well)"),
246 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
248 bool polly::UseInstructionNames
;
250 static cl::opt
<bool, true> XUseInstructionNames(
251 "polly-use-llvm-names",
252 cl::desc("Use LLVM-IR names when deriving statement names"),
253 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
254 cl::ZeroOrMore
, cl::cat(PollyCategory
));
256 static cl::opt
<bool> PollyPrintInstructions(
257 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
258 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
260 //===----------------------------------------------------------------------===//
262 // Create a sequence of two schedules. Either argument may be null and is
263 // interpreted as the empty schedule. Can also return null if both schedules are
265 static __isl_give isl_schedule
*
266 combineInSequence(__isl_take isl_schedule
*Prev
,
267 __isl_take isl_schedule
*Succ
) {
273 return isl_schedule_sequence(Prev
, Succ
);
276 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
277 int dim
, isl::dim type
) {
279 isl::ctx Ctx
= S
.get_ctx();
281 // The upper and lower bound for a parameter value is derived either from
282 // the data type of the parameter or from the - possibly more restrictive -
284 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
285 S
= S
.lower_bound_val(type
, dim
, V
);
286 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
287 S
= S
.upper_bound_val(type
, dim
, V
);
289 if (Range
.isFullSet())
292 if (isl_set_n_basic_set(S
.get()) > MaxDisjunctsInContext
)
295 // In case of signed wrapping, we can refine the set of valid values by
296 // excluding the part not covered by the wrapping range.
297 if (Range
.isSignWrappedSet()) {
298 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
299 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
301 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
303 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
310 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
311 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
315 if (!S
->contains(BasePtrLI
))
318 ScalarEvolution
&SE
= *S
->getSE();
320 auto *OriginBaseSCEV
=
321 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
325 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
326 if (!OriginBaseSCEVUnknown
)
329 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
333 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl::ctx Ctx
,
334 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
335 const DataLayout
&DL
, Scop
*S
,
336 const char *BaseName
)
337 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
338 std::string BasePtrName
=
340 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
341 Kind
== MemoryKind::PHI
? "__phi" : "",
342 UseInstructionNames
);
343 Id
= isl::id::alloc(Ctx
, BasePtrName
, this);
347 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
348 BasePtrOriginSAI
= nullptr;
352 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
353 if (BasePtrOriginSAI
)
354 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
357 ScopArrayInfo::~ScopArrayInfo() = default;
359 isl::space
ScopArrayInfo::getSpace() const {
360 auto Space
= isl::space(Id
.get_ctx(), 0, getNumberOfDimensions());
361 Space
= Space
.set_tuple_id(isl::dim::set
, Id
);
365 bool ScopArrayInfo::isReadOnly() {
366 isl::union_set WriteSet
= S
.getWrites().range();
367 isl::space Space
= getSpace();
368 WriteSet
= WriteSet
.extract_set(Space
);
370 return bool(WriteSet
.is_empty());
373 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
374 if (Array
->getElementType() != getElementType())
377 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
380 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
381 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
387 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
388 if (NewElementType
== ElementType
)
391 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
392 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
394 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
397 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
398 ElementType
= NewElementType
;
400 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
401 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
405 /// Make the ScopArrayInfo model a Fortran Array
406 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
407 assert(FAD
&& "got invalid Fortran array descriptor");
409 assert(this->FAD
== FAD
&&
410 "receiving different array descriptors for same array");
414 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
418 isl::space
Space(S
.getIslCtx(), 1, 0);
420 std::string param_name
= getName();
421 param_name
+= "_fortranarr_size";
422 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
, this);
424 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
426 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
428 DimensionSizesPw
[0] = PwAff
;
431 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
432 bool CheckConsistency
) {
433 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
434 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
435 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
437 if (CheckConsistency
) {
438 for (int i
= 0; i
< SharedDims
; i
++) {
439 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
440 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
441 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
445 if (DimensionSizes
.size() >= NewSizes
.size())
449 DimensionSizes
.clear();
450 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
452 DimensionSizesPw
.clear();
453 for (const SCEV
*Expr
: DimensionSizes
) {
455 DimensionSizesPw
.push_back(nullptr);
458 isl::pw_aff Size
= S
.getPwAffOnly(Expr
);
459 DimensionSizesPw
.push_back(Size
);
464 std::string
ScopArrayInfo::getName() const { return Id
.get_name(); }
466 int ScopArrayInfo::getElemSizeInBytes() const {
467 return DL
.getTypeAllocSize(ElementType
);
470 isl::id
ScopArrayInfo::getBasePtrId() const { return Id
; }
472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
473 LLVM_DUMP_METHOD
void ScopArrayInfo::dump() const { print(errs()); }
476 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
477 OS
.indent(8) << *getElementType() << " " << getName();
479 // If this is a Fortran array, then we can print the outermost dimension
480 // as a isl_pw_aff even though there is no SCEV information.
481 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
483 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
484 !getDimensionSize(0)) {
488 for (; u
< getNumberOfDimensions(); u
++) {
492 isl::pw_aff Size
= getDimensionSizePw(u
);
493 OS
<< " " << Size
<< " ";
495 OS
<< *getDimensionSize(u
);
503 if (BasePtrOriginSAI
)
504 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
506 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
509 const ScopArrayInfo
*
510 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA
) {
511 isl::id Id
= PMA
.get_tuple_id(isl::dim::out
);
512 assert(!Id
.is_null() && "Output dimension didn't have an ID");
513 return getFromId(Id
);
516 const ScopArrayInfo
*ScopArrayInfo::getFromId(isl::id Id
) {
517 void *User
= Id
.get_user();
518 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
522 void MemoryAccess::wrapConstantDimensions() {
523 auto *SAI
= getScopArrayInfo();
524 isl::space ArraySpace
= SAI
->getSpace();
525 isl::ctx Ctx
= ArraySpace
.get_ctx();
526 unsigned DimsArray
= SAI
->getNumberOfDimensions();
528 isl::multi_aff DivModAff
= isl::multi_aff::identity(
529 ArraySpace
.map_from_domain_and_range(ArraySpace
));
530 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
532 // Begin with last dimension, to iteratively carry into higher dimensions.
533 for (int i
= DimsArray
- 1; i
> 0; i
--) {
534 auto *DimSize
= SAI
->getDimensionSize(i
);
535 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
537 // This transformation is not applicable to dimensions with dynamic size.
541 // This transformation is not applicable to dimensions of size zero.
542 if (DimSize
->isZero())
545 isl::val DimSizeVal
=
546 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
547 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
549 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
551 // Compute: index % size
552 // Modulo must apply in the divide of the previous iteration, if any.
553 isl::aff Modulo
= Var
.mod(DimSizeVal
);
554 Modulo
= Modulo
.pullback(DivModAff
);
556 // Compute: floor(index / size)
557 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
558 Divide
= Divide
.floor();
559 Divide
= Divide
.add(PrevVar
);
560 Divide
= Divide
.pullback(DivModAff
);
562 // Apply Modulo and Divide.
563 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
564 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
567 // Apply all modulo/divides on the accesses.
568 isl::map Relation
= AccessRelation
;
569 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
570 Relation
= Relation
.detect_equalities();
571 AccessRelation
= Relation
;
574 void MemoryAccess::updateDimensionality() {
575 auto *SAI
= getScopArrayInfo();
576 isl::space ArraySpace
= SAI
->getSpace();
577 isl::space AccessSpace
= AccessRelation
.get_space().range();
578 isl::ctx Ctx
= ArraySpace
.get_ctx();
580 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
581 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
582 auto DimsMissing
= DimsArray
- DimsAccess
;
584 auto *BB
= getStatement()->getEntryBlock();
585 auto &DL
= BB
->getModule()->getDataLayout();
586 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
587 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
589 isl::map Map
= isl::map::from_domain_and_range(
590 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
592 for (unsigned i
= 0; i
< DimsMissing
; i
++)
593 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
595 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
596 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
598 AccessRelation
= AccessRelation
.apply_range(Map
);
600 // For the non delinearized arrays, divide the access function of the last
601 // subscript by the size of the elements in the array.
603 // A stride one array access in C expressed as A[i] is expressed in
604 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
605 // two subsequent values of 'i' index two values that are stored next to
606 // each other in memory. By this division we make this characteristic
607 // obvious again. If the base pointer was accessed with offsets not divisible
608 // by the accesses element size, we will have chosen a smaller ArrayElemSize
609 // that divides the offsets of all accesses to this base pointer.
610 if (DimsAccess
== 1) {
611 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
612 AccessRelation
= AccessRelation
.floordiv_val(V
);
615 // We currently do this only if we added at least one dimension, which means
616 // some dimension's indices have not been specified, an indicator that some
617 // index values have been added together.
618 // TODO: Investigate general usefulness; Effect on unit tests is to make index
619 // expressions more complicated.
621 wrapConstantDimensions();
624 computeBoundsOnAccessRelation(ArrayElemSize
);
626 // Introduce multi-element accesses in case the type loaded by this memory
627 // access is larger than the canonical element type of the array.
629 // An access ((float *)A)[i] to an array char *A is modeled as
630 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
631 if (ElemBytes
> ArrayElemSize
) {
632 assert(ElemBytes
% ArrayElemSize
== 0 &&
633 "Loaded element size should be multiple of canonical element size");
634 isl::map Map
= isl::map::from_domain_and_range(
635 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
636 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
637 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
642 LS
= isl::local_space(Map
.get_space());
643 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
645 C
= isl::constraint::alloc_inequality(LS
);
646 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
647 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
648 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
649 Map
= Map
.add_constraint(C
);
651 C
= isl::constraint::alloc_inequality(LS
);
652 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
653 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
654 C
= C
.set_constant_val(isl::val(Ctx
, 0));
655 Map
= Map
.add_constraint(C
);
656 AccessRelation
= AccessRelation
.apply_range(Map
);
661 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
663 case MemoryAccess::RT_NONE
:
664 llvm_unreachable("Requested a reduction operator string for a memory "
665 "access which isn't a reduction");
666 case MemoryAccess::RT_ADD
:
668 case MemoryAccess::RT_MUL
:
670 case MemoryAccess::RT_BOR
:
672 case MemoryAccess::RT_BXOR
:
674 case MemoryAccess::RT_BAND
:
677 llvm_unreachable("Unknown reduction type");
680 /// Return the reduction type for a given binary operator.
681 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
682 const Instruction
*Load
) {
684 return MemoryAccess::RT_NONE
;
685 switch (BinOp
->getOpcode()) {
686 case Instruction::FAdd
:
687 if (!BinOp
->hasUnsafeAlgebra())
688 return MemoryAccess::RT_NONE
;
690 case Instruction::Add
:
691 return MemoryAccess::RT_ADD
;
692 case Instruction::Or
:
693 return MemoryAccess::RT_BOR
;
694 case Instruction::Xor
:
695 return MemoryAccess::RT_BXOR
;
696 case Instruction::And
:
697 return MemoryAccess::RT_BAND
;
698 case Instruction::FMul
:
699 if (!BinOp
->hasUnsafeAlgebra())
700 return MemoryAccess::RT_NONE
;
702 case Instruction::Mul
:
703 if (DisableMultiplicativeReductions
)
704 return MemoryAccess::RT_NONE
;
705 return MemoryAccess::RT_MUL
;
707 return MemoryAccess::RT_NONE
;
711 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
712 isl::id ArrayId
= getArrayId();
713 void *User
= ArrayId
.get_user();
714 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
718 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
719 isl::id ArrayId
= getLatestArrayId();
720 void *User
= ArrayId
.get_user();
721 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
725 isl::id
MemoryAccess::getOriginalArrayId() const {
726 return AccessRelation
.get_tuple_id(isl::dim::out
);
729 isl::id
MemoryAccess::getLatestArrayId() const {
730 if (!hasNewAccessRelation())
731 return getOriginalArrayId();
732 return NewAccessRelation
.get_tuple_id(isl::dim::out
);
735 isl::map
MemoryAccess::getAddressFunction() const {
736 return getAccessRelation().lexmin();
740 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule
) const {
741 isl::map Schedule
, ScheduledAccRel
;
742 isl::union_set UDomain
;
744 UDomain
= getStatement()->getDomain();
745 USchedule
= USchedule
.intersect_domain(UDomain
);
746 Schedule
= isl::map::from_union_map(USchedule
);
747 ScheduledAccRel
= getAddressFunction().apply_domain(Schedule
);
748 return isl::pw_multi_aff::from_map(ScheduledAccRel
);
751 isl::map
MemoryAccess::getOriginalAccessRelation() const {
752 return AccessRelation
;
755 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
756 return stringFromIslObj(AccessRelation
.get());
759 isl::space
MemoryAccess::getOriginalAccessRelationSpace() const {
760 return AccessRelation
.get_space();
763 isl::map
MemoryAccess::getNewAccessRelation() const {
764 return NewAccessRelation
;
767 std::string
MemoryAccess::getNewAccessRelationStr() const {
768 return stringFromIslObj(NewAccessRelation
.get());
771 std::string
MemoryAccess::getAccessRelationStr() const {
772 return getAccessRelation().to_str();
775 isl::basic_map
MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
776 isl::space Space
= isl::space(Statement
->getIslCtx(), 0, 1);
777 Space
= Space
.align_params(Statement
->getDomainSpace());
779 return isl::basic_map::from_domain_and_range(
780 isl::basic_set::universe(Statement
->getDomainSpace()),
781 isl::basic_set::universe(Space
));
784 // Formalize no out-of-bound access assumption
786 // When delinearizing array accesses we optimistically assume that the
787 // delinearized accesses do not access out of bound locations (the subscript
788 // expression of each array evaluates for each statement instance that is
789 // executed to a value that is larger than zero and strictly smaller than the
790 // size of the corresponding dimension). The only exception is the outermost
791 // dimension for which we do not need to assume any upper bound. At this point
792 // we formalize this assumption to ensure that at code generation time the
793 // relevant run-time checks can be generated.
795 // To find the set of constraints necessary to avoid out of bound accesses, we
796 // first build the set of data locations that are not within array bounds. We
797 // then apply the reverse access relation to obtain the set of iterations that
798 // may contain invalid accesses and reduce this set of iterations to the ones
799 // that are actually executed by intersecting them with the domain of the
800 // statement. If we now project out all loop dimensions, we obtain a set of
801 // parameters that may cause statement instances to be executed that may
802 // possibly yield out of bound memory accesses. The complement of these
803 // constraints is the set of constraints that needs to be assumed to ensure such
804 // statement instances are never executed.
805 void MemoryAccess::assumeNoOutOfBound() {
806 if (PollyIgnoreInbounds
)
808 auto *SAI
= getScopArrayInfo();
809 isl::space Space
= getOriginalAccessRelationSpace().range();
810 isl::set Outside
= isl::set::empty(Space
);
811 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
812 isl::local_space
LS(Space
);
813 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
814 isl::pw_aff Zero
= isl::pw_aff(LS
);
816 isl::set DimOutside
= Var
.lt_set(Zero
);
817 isl::pw_aff SizeE
= SAI
->getDimensionSizePw(i
);
818 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
819 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
820 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
822 Outside
= Outside
.unite(DimOutside
);
825 Outside
= Outside
.apply(getAccessRelation().reverse());
826 Outside
= Outside
.intersect(Statement
->getDomain());
827 Outside
= Outside
.params();
829 // Remove divs to avoid the construction of overly complicated assumptions.
830 // Doing so increases the set of parameter combinations that are assumed to
831 // not appear. This is always save, but may make the resulting run-time check
832 // bail out more often than strictly necessary.
833 Outside
= Outside
.remove_divs();
834 Outside
= Outside
.complement();
835 const auto &Loc
= getAccessInstruction()
836 ? getAccessInstruction()->getDebugLoc()
838 if (!PollyPreciseInbounds
)
839 Outside
= Outside
.gist_params(Statement
->getDomain().params());
840 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
.release(), Loc
,
844 void MemoryAccess::buildMemIntrinsicAccessRelation() {
845 assert(isMemoryIntrinsic());
846 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
848 isl::pw_aff SubscriptPWA
= getPwAff(Subscripts
[0]);
849 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
852 if (Subscripts
[1] == nullptr) {
853 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
855 isl::pw_aff LengthPWA
= getPwAff(Subscripts
[1]);
856 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
857 isl::space RangeSpace
= LengthMap
.get_space().range();
858 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
860 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
861 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
862 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
863 LengthMap
= LengthMap
.sum(SubscriptMap
);
865 LengthMap
.set_tuple_id(isl::dim::in
, getStatement()->getDomainId());
868 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
869 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
871 auto MAI
= MemAccInst(getAccessInstruction());
872 if (isa
<MemIntrinsic
>(MAI
))
875 Value
*Ptr
= MAI
.getPointerOperand();
876 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
879 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
880 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
883 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
884 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
885 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
887 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
888 if (Range
.isFullSet())
891 if (Range
.isWrappedSet() || Range
.isSignWrappedSet())
894 bool isWrapping
= Range
.isSignWrappedSet();
896 unsigned BW
= Range
.getBitWidth();
897 const auto One
= APInt(BW
, 1);
898 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
899 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
901 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
902 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
904 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
906 isl::map Relation
= AccessRelation
;
907 isl::set AccessRange
= Relation
.range();
908 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
910 AccessRelation
= Relation
.intersect_range(AccessRange
);
913 void MemoryAccess::foldAccessRelation() {
914 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
917 int Size
= Subscripts
.size();
919 isl::map NewAccessRelation
= AccessRelation
;
921 for (int i
= Size
- 2; i
>= 0; --i
) {
923 isl::map MapOne
, MapTwo
;
924 isl::pw_aff DimSize
= getPwAff(Sizes
[i
+ 1]);
926 isl::space SpaceSize
= DimSize
.get_space();
928 give(isl_space_get_dim_id(SpaceSize
.get(), isl_dim_param
, 0));
930 Space
= AccessRelation
.get_space();
931 Space
= Space
.range().map_from_set();
932 Space
= Space
.align_params(SpaceSize
);
934 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
936 MapOne
= isl::map::universe(Space
);
937 for (int j
= 0; j
< Size
; ++j
)
938 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
939 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
941 MapTwo
= isl::map::universe(Space
);
942 for (int j
= 0; j
< Size
; ++j
)
943 if (j
< i
|| j
> i
+ 1)
944 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
946 isl::local_space
LS(Space
);
948 C
= isl::constraint::alloc_equality(LS
);
949 C
= C
.set_constant_si(-1);
950 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
951 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
952 MapTwo
= MapTwo
.add_constraint(C
);
953 C
= isl::constraint::alloc_equality(LS
);
954 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
955 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
956 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
957 MapTwo
= MapTwo
.add_constraint(C
);
958 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
960 MapOne
= MapOne
.unite(MapTwo
);
961 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
964 isl::id BaseAddrId
= getScopArrayInfo()->getBasePtrId();
965 isl::space Space
= Statement
->getDomainSpace();
966 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
967 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
968 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
969 NewAccessRelation
= NewAccessRelation
.gist_domain(Statement
->getDomain());
971 // Access dimension folding might in certain cases increase the number of
972 // disjuncts in the memory access, which can possibly complicate the generated
973 // run-time checks and can lead to costly compilation.
974 if (!PollyPreciseFoldAccesses
&&
975 isl_map_n_basic_map(NewAccessRelation
.get()) >
976 isl_map_n_basic_map(AccessRelation
.get())) {
978 AccessRelation
= NewAccessRelation
;
982 /// Check if @p Expr is divisible by @p Size.
983 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
988 // Only one factor needs to be divisible.
989 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
990 for (auto *FactorExpr
: MulExpr
->operands())
991 if (isDivisible(FactorExpr
, Size
, SE
))
996 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
998 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
999 for (auto *OpExpr
: NAryExpr
->operands())
1000 if (!isDivisible(OpExpr
, Size
, SE
))
1005 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
1006 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
1007 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
1008 return MulSCEV
== Expr
;
1011 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
1012 assert(AccessRelation
.is_null() && "AccessRelation already built");
1014 // Initialize the invalid domain which describes all iterations for which the
1015 // access relation is not modeled correctly.
1016 isl::set StmtInvalidDomain
= getStatement()->getInvalidDomain();
1017 InvalidDomain
= isl::set::empty(StmtInvalidDomain
.get_space());
1019 isl::ctx Ctx
= Id
.get_ctx();
1020 isl::id BaseAddrId
= SAI
->getBasePtrId();
1022 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
1023 buildMemIntrinsicAccessRelation();
1024 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1029 // We overapproximate non-affine accesses with a possible access to the
1030 // whole array. For read accesses it does not make a difference, if an
1031 // access must or may happen. However, for write accesses it is important to
1032 // differentiate between writes that must happen and writes that may happen.
1033 if (AccessRelation
.is_null())
1034 AccessRelation
= createBasicAccessMap(Statement
);
1036 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1040 isl::space Space
= isl::space(Ctx
, 0, Statement
->getNumIterators(), 0);
1041 AccessRelation
= isl::map::universe(Space
);
1043 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
1044 isl::pw_aff Affine
= getPwAff(Subscripts
[i
]);
1045 isl::map SubscriptMap
= isl::map::from_pw_aff(Affine
);
1046 AccessRelation
= AccessRelation
.flat_range_product(SubscriptMap
);
1049 Space
= Statement
->getDomainSpace();
1050 AccessRelation
= AccessRelation
.set_tuple_id(
1051 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
1052 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1054 AccessRelation
= AccessRelation
.gist_domain(Statement
->getDomain());
1057 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1058 AccessType AccType
, Value
*BaseAddress
,
1059 Type
*ElementType
, bool Affine
,
1060 ArrayRef
<const SCEV
*> Subscripts
,
1061 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1063 : Kind(Kind
), AccType(AccType
), Statement(Stmt
), InvalidDomain(nullptr),
1064 BaseAddr(BaseAddress
), ElementType(ElementType
),
1065 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1066 AccessValue(AccessValue
), IsAffine(Affine
),
1067 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1068 NewAccessRelation(nullptr), FAD(nullptr) {
1069 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1070 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1072 std::string IdName
= Stmt
->getBaseName() + Access
;
1073 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1076 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
, isl::map AccRel
)
1077 : Kind(MemoryKind::Array
), AccType(AccType
), Statement(Stmt
),
1078 InvalidDomain(nullptr), AccessRelation(nullptr),
1079 NewAccessRelation(AccRel
), FAD(nullptr) {
1080 isl::id ArrayInfoId
= NewAccessRelation
.get_tuple_id(isl::dim::out
);
1081 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1082 Sizes
.push_back(nullptr);
1083 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1084 Sizes
.push_back(SAI
->getDimensionSize(i
));
1085 ElementType
= SAI
->getElementType();
1086 BaseAddr
= SAI
->getBasePtr();
1087 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1088 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1090 std::string IdName
= Stmt
->getBaseName() + Access
;
1091 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1094 MemoryAccess::~MemoryAccess() = default;
1096 void MemoryAccess::realignParams() {
1097 isl::set Ctx
= Statement
->getParent()->getContext();
1098 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1099 AccessRelation
= AccessRelation
.gist_params(Ctx
);
1102 const std::string
MemoryAccess::getReductionOperatorStr() const {
1103 return MemoryAccess::getReductionOperatorStr(getReductionType());
1106 isl::id
MemoryAccess::getId() const { return Id
; }
1108 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1109 MemoryAccess::ReductionType RT
) {
1110 if (RT
== MemoryAccess::RT_NONE
)
1113 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1117 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1119 void MemoryAccess::print(raw_ostream
&OS
) const {
1122 OS
.indent(12) << "ReadAccess :=\t";
1125 OS
.indent(12) << "MustWriteAccess :=\t";
1128 OS
.indent(12) << "MayWriteAccess :=\t";
1132 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1135 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1139 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1140 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1141 if (hasNewAccessRelation())
1142 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1145 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1146 LLVM_DUMP_METHOD
void MemoryAccess::dump() const { print(errs()); }
1149 isl::pw_aff
MemoryAccess::getPwAff(const SCEV
*E
) {
1150 auto *Stmt
= getStatement();
1151 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1152 isl::set StmtDom
= getStatement()->getDomain();
1153 StmtDom
= StmtDom
.reset_tuple_id();
1154 isl::set NewInvalidDom
= StmtDom
.intersect(isl::manage(PWAC
.second
));
1155 InvalidDomain
= InvalidDomain
.unite(NewInvalidDom
);
1156 return isl::manage(PWAC
.first
);
1159 // Create a map in the size of the provided set domain, that maps from the
1160 // one element of the provided set domain to another element of the provided
1162 // The mapping is limited to all points that are equal in all but the last
1163 // dimension and for which the last dimension of the input is strict smaller
1164 // than the last dimension of the output.
1166 // getEqualAndLarger(set[i0, i1, ..., iX]):
1168 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1169 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1171 static isl::map
getEqualAndLarger(isl::space SetDomain
) {
1172 isl::space Space
= SetDomain
.map_from_set();
1173 isl::map Map
= isl::map::universe(Space
);
1174 unsigned lastDimension
= Map
.dim(isl::dim::in
) - 1;
1176 // Set all but the last dimension to be equal for the input and output
1178 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1179 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1180 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1181 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
1183 // Set the last dimension of the input to be strict smaller than the
1184 // last dimension of the output.
1186 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1187 Map
= Map
.order_lt(isl::dim::in
, lastDimension
, isl::dim::out
, lastDimension
);
1191 isl::set
MemoryAccess::getStride(isl::map Schedule
) const {
1192 isl::map AccessRelation
= getAccessRelation();
1193 isl::space Space
= Schedule
.get_space().range();
1194 isl::map NextScatt
= getEqualAndLarger(Space
);
1196 Schedule
= Schedule
.reverse();
1197 NextScatt
= NextScatt
.lexmin();
1199 NextScatt
= NextScatt
.apply_range(Schedule
);
1200 NextScatt
= NextScatt
.apply_range(AccessRelation
);
1201 NextScatt
= NextScatt
.apply_domain(Schedule
);
1202 NextScatt
= NextScatt
.apply_domain(AccessRelation
);
1204 isl::set Deltas
= NextScatt
.deltas();
1208 bool MemoryAccess::isStrideX(isl::map Schedule
, int StrideWidth
) const {
1209 isl::set Stride
, StrideX
;
1212 Stride
= getStride(Schedule
);
1213 StrideX
= isl::set::universe(Stride
.get_space());
1214 for (unsigned i
= 0; i
< StrideX
.dim(isl::dim::set
) - 1; i
++)
1215 StrideX
= StrideX
.fix_si(isl::dim::set
, i
, 0);
1216 StrideX
= StrideX
.fix_si(isl::dim::set
, StrideX
.dim(isl::dim::set
) - 1,
1218 IsStrideX
= Stride
.is_subset(StrideX
);
1223 bool MemoryAccess::isStrideZero(isl::map Schedule
) const {
1224 return isStrideX(Schedule
, 0);
1227 bool MemoryAccess::isStrideOne(isl::map Schedule
) const {
1228 return isStrideX(Schedule
, 1);
1231 void MemoryAccess::setAccessRelation(isl::map NewAccess
) {
1232 AccessRelation
= NewAccess
;
1235 void MemoryAccess::setNewAccessRelation(isl::map NewAccess
) {
1239 // Check domain space compatibility.
1240 isl::space NewSpace
= NewAccess
.get_space();
1241 isl::space NewDomainSpace
= NewSpace
.domain();
1242 isl::space OriginalDomainSpace
= getStatement()->getDomainSpace();
1243 assert(OriginalDomainSpace
.has_equal_tuples(NewDomainSpace
));
1245 // Reads must be executed unconditionally. Writes might be executed in a
1248 // Check whether there is an access for every statement instance.
1249 isl::set StmtDomain
= getStatement()->getDomain();
1251 StmtDomain
.intersect_params(getStatement()->getParent()->getContext());
1252 isl::set NewDomain
= NewAccess
.domain();
1253 assert(StmtDomain
.is_subset(NewDomain
) &&
1254 "Partial READ accesses not supported");
1257 isl::space NewAccessSpace
= NewAccess
.get_space();
1258 assert(NewAccessSpace
.has_tuple_id(isl::dim::set
) &&
1259 "Must specify the array that is accessed");
1260 isl::id NewArrayId
= NewAccessSpace
.get_tuple_id(isl::dim::set
);
1261 auto *SAI
= static_cast<ScopArrayInfo
*>(NewArrayId
.get_user());
1262 assert(SAI
&& "Must set a ScopArrayInfo");
1264 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1265 InvariantEquivClassTy
*EqClass
=
1266 getStatement()->getParent()->lookupInvariantEquivClass(
1269 "Access functions to indirect arrays must have an invariant and "
1270 "hoisted base pointer");
1273 // Check whether access dimensions correspond to number of dimensions of the
1275 auto Dims
= SAI
->getNumberOfDimensions();
1276 assert(NewAccessSpace
.dim(isl::dim::set
) == Dims
&&
1277 "Access dims must match array dims");
1280 NewAccess
= NewAccess
.gist_domain(getStatement()->getDomain());
1281 NewAccessRelation
= NewAccess
;
1284 bool MemoryAccess::isLatestPartialAccess() const {
1285 isl::set StmtDom
= getStatement()->getDomain();
1286 isl::set AccDom
= getLatestAccessRelation().domain();
1288 return isl_set_is_subset(StmtDom
.keep(), AccDom
.keep()) == isl_bool_false
;
1291 //===----------------------------------------------------------------------===//
1293 isl::map
ScopStmt::getSchedule() const {
1294 isl::set Domain
= getDomain();
1295 if (Domain
.is_empty())
1296 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1297 auto Schedule
= getParent()->getSchedule();
1300 Schedule
= Schedule
.intersect_domain(isl::union_set(Domain
));
1301 if (Schedule
.is_empty())
1302 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1303 isl::map M
= M
.from_union_map(Schedule
);
1305 M
= M
.gist_domain(Domain
);
1310 void ScopStmt::restrictDomain(isl::set NewDomain
) {
1311 assert(NewDomain
.is_subset(Domain
) &&
1312 "New domain is not a subset of old domain!");
1316 void ScopStmt::buildAccessRelations() {
1317 Scop
&S
= *getParent();
1318 for (MemoryAccess
*Access
: MemAccs
) {
1319 Type
*ElementType
= Access
->getElementType();
1322 if (Access
->isPHIKind())
1323 Ty
= MemoryKind::PHI
;
1324 else if (Access
->isExitPHIKind())
1325 Ty
= MemoryKind::ExitPHI
;
1326 else if (Access
->isValueKind())
1327 Ty
= MemoryKind::Value
;
1329 Ty
= MemoryKind::Array
;
1331 auto *SAI
= S
.getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
1332 ElementType
, Access
->Sizes
, Ty
);
1333 Access
->buildAccessRelation(SAI
);
1334 S
.addAccessData(Access
);
1338 void ScopStmt::addAccess(MemoryAccess
*Access
, bool Prepend
) {
1339 Instruction
*AccessInst
= Access
->getAccessInstruction();
1341 if (Access
->isArrayKind()) {
1342 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1343 MAL
.emplace_front(Access
);
1344 } else if (Access
->isValueKind() && Access
->isWrite()) {
1345 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1346 assert(!ValueWrites
.lookup(AccessVal
));
1348 ValueWrites
[AccessVal
] = Access
;
1349 } else if (Access
->isValueKind() && Access
->isRead()) {
1350 Value
*AccessVal
= Access
->getAccessValue();
1351 assert(!ValueReads
.lookup(AccessVal
));
1353 ValueReads
[AccessVal
] = Access
;
1354 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1355 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1356 assert(!PHIWrites
.lookup(PHI
));
1358 PHIWrites
[PHI
] = Access
;
1359 } else if (Access
->isAnyPHIKind() && Access
->isRead()) {
1360 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1361 assert(!PHIReads
.lookup(PHI
));
1363 PHIReads
[PHI
] = Access
;
1367 MemAccs
.insert(MemAccs
.begin(), Access
);
1370 MemAccs
.push_back(Access
);
1373 void ScopStmt::realignParams() {
1374 for (MemoryAccess
*MA
: *this)
1375 MA
->realignParams();
1377 isl::set Ctx
= Parent
.getContext();
1378 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1379 Domain
= Domain
.gist_params(Ctx
);
1382 /// Add @p BSet to the set @p User if @p BSet is bounded.
1383 static isl_stat
collectBoundedParts(__isl_take isl_basic_set
*BSet
,
1385 isl_set
**BoundedParts
= static_cast<isl_set
**>(User
);
1386 if (isl_basic_set_is_bounded(BSet
))
1387 *BoundedParts
= isl_set_union(*BoundedParts
, isl_set_from_basic_set(BSet
));
1389 isl_basic_set_free(BSet
);
1393 /// Return the bounded parts of @p S.
1394 static __isl_give isl_set
*collectBoundedParts(__isl_take isl_set
*S
) {
1395 isl_set
*BoundedParts
= isl_set_empty(isl_set_get_space(S
));
1396 isl_set_foreach_basic_set(S
, collectBoundedParts
, &BoundedParts
);
1398 return BoundedParts
;
1401 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1403 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1404 /// both with regards to the dimension @p Dim.
1405 static std::pair
<__isl_give isl_set
*, __isl_give isl_set
*>
1406 partitionSetParts(__isl_take isl_set
*S
, unsigned Dim
) {
1407 for (unsigned u
= 0, e
= isl_set_n_dim(S
); u
< e
; u
++)
1408 S
= isl_set_lower_bound_si(S
, isl_dim_set
, u
, 0);
1410 unsigned NumDimsS
= isl_set_n_dim(S
);
1411 isl_set
*OnlyDimS
= isl_set_copy(S
);
1413 // Remove dimensions that are greater than Dim as they are not interesting.
1414 assert(NumDimsS
>= Dim
+ 1);
1416 isl_set_project_out(OnlyDimS
, isl_dim_set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1418 // Create artificial parametric upper bounds for dimensions smaller than Dim
1419 // as we are not interested in them.
1420 OnlyDimS
= isl_set_insert_dims(OnlyDimS
, isl_dim_param
, 0, Dim
);
1421 for (unsigned u
= 0; u
< Dim
; u
++) {
1422 isl_constraint
*C
= isl_inequality_alloc(
1423 isl_local_space_from_space(isl_set_get_space(OnlyDimS
)));
1424 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, u
, 1);
1425 C
= isl_constraint_set_coefficient_si(C
, isl_dim_set
, u
, -1);
1426 OnlyDimS
= isl_set_add_constraint(OnlyDimS
, C
);
1429 // Collect all bounded parts of OnlyDimS.
1430 isl_set
*BoundedParts
= collectBoundedParts(OnlyDimS
);
1432 // Create the dimensions greater than Dim again.
1433 BoundedParts
= isl_set_insert_dims(BoundedParts
, isl_dim_set
, Dim
+ 1,
1434 NumDimsS
- Dim
- 1);
1436 // Remove the artificial upper bound parameters again.
1437 BoundedParts
= isl_set_remove_dims(BoundedParts
, isl_dim_param
, 0, Dim
);
1439 isl_set
*UnboundedParts
= isl_set_subtract(S
, isl_set_copy(BoundedParts
));
1440 return std::make_pair(UnboundedParts
, BoundedParts
);
1443 /// Set the dimension Ids from @p From in @p To.
1444 static __isl_give isl_set
*setDimensionIds(__isl_keep isl_set
*From
,
1445 __isl_take isl_set
*To
) {
1446 for (unsigned u
= 0, e
= isl_set_n_dim(From
); u
< e
; u
++) {
1447 isl_id
*DimId
= isl_set_get_dim_id(From
, isl_dim_set
, u
);
1448 To
= isl_set_set_dim_id(To
, isl_dim_set
, u
, DimId
);
1453 /// Create the conditions under which @p L @p Pred @p R is true.
1454 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1455 __isl_take isl_pw_aff
*L
,
1456 __isl_take isl_pw_aff
*R
) {
1458 case ICmpInst::ICMP_EQ
:
1459 return isl_pw_aff_eq_set(L
, R
);
1460 case ICmpInst::ICMP_NE
:
1461 return isl_pw_aff_ne_set(L
, R
);
1462 case ICmpInst::ICMP_SLT
:
1463 return isl_pw_aff_lt_set(L
, R
);
1464 case ICmpInst::ICMP_SLE
:
1465 return isl_pw_aff_le_set(L
, R
);
1466 case ICmpInst::ICMP_SGT
:
1467 return isl_pw_aff_gt_set(L
, R
);
1468 case ICmpInst::ICMP_SGE
:
1469 return isl_pw_aff_ge_set(L
, R
);
1470 case ICmpInst::ICMP_ULT
:
1471 return isl_pw_aff_lt_set(L
, R
);
1472 case ICmpInst::ICMP_UGT
:
1473 return isl_pw_aff_gt_set(L
, R
);
1474 case ICmpInst::ICMP_ULE
:
1475 return isl_pw_aff_le_set(L
, R
);
1476 case ICmpInst::ICMP_UGE
:
1477 return isl_pw_aff_ge_set(L
, R
);
1479 llvm_unreachable("Non integer predicate not supported");
1483 /// Create the conditions under which @p L @p Pred @p R is true.
1485 /// Helper function that will make sure the dimensions of the result have the
1486 /// same isl_id's as the @p Domain.
1487 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1488 __isl_take isl_pw_aff
*L
,
1489 __isl_take isl_pw_aff
*R
,
1490 __isl_keep isl_set
*Domain
) {
1491 isl_set
*ConsequenceCondSet
= buildConditionSet(Pred
, L
, R
);
1492 return setDimensionIds(Domain
, ConsequenceCondSet
);
1495 /// Compute the isl representation for the SCEV @p E in this BB.
1497 /// @param S The Scop in which @p BB resides in.
1498 /// @param BB The BB for which isl representation is to be
1500 /// @param InvalidDomainMap A map of BB to their invalid domains.
1501 /// @param E The SCEV that should be translated.
1502 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1504 /// Note that this function will also adjust the invalid context accordingly.
1506 __isl_give isl_pw_aff
*
1507 getPwAff(Scop
&S
, BasicBlock
*BB
,
1508 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
, const SCEV
*E
,
1509 bool NonNegative
= false) {
1510 PWACtx PWAC
= S
.getPwAff(E
, BB
, NonNegative
);
1511 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(isl::manage(PWAC
.second
));
1515 /// Build the conditions sets for the switch @p SI in the @p Domain.
1517 /// This will fill @p ConditionSets with the conditions under which control
1518 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1519 /// have as many elements as @p SI has successors.
1521 buildConditionSets(Scop
&S
, BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
,
1522 __isl_keep isl_set
*Domain
,
1523 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1524 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1525 Value
*Condition
= getConditionFromTerminator(SI
);
1526 assert(Condition
&& "No condition for switch");
1528 ScalarEvolution
&SE
= *S
.getSE();
1529 isl_pw_aff
*LHS
, *RHS
;
1530 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
1532 unsigned NumSuccessors
= SI
->getNumSuccessors();
1533 ConditionSets
.resize(NumSuccessors
);
1534 for (auto &Case
: SI
->cases()) {
1535 unsigned Idx
= Case
.getSuccessorIndex();
1536 ConstantInt
*CaseValue
= Case
.getCaseValue();
1538 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
1539 isl_set
*CaseConditionSet
=
1540 buildConditionSet(ICmpInst::ICMP_EQ
, isl_pw_aff_copy(LHS
), RHS
, Domain
);
1541 ConditionSets
[Idx
] = isl_set_coalesce(
1542 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1545 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1546 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1547 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1549 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1550 ConditionSets
[0] = setDimensionIds(
1551 Domain
, isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
));
1553 isl_pw_aff_free(LHS
);
1558 /// Build condition sets for unsigned ICmpInst(s).
1559 /// Special handling is required for unsigned operands to ensure that if
1560 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1561 /// it should wrap around.
1563 /// @param IsStrictUpperBound holds information on the predicate relation
1564 /// between TestVal and UpperBound, i.e,
1565 /// TestVal < UpperBound OR TestVal <= UpperBound
1566 static __isl_give isl_set
*
1567 buildUnsignedConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1568 __isl_keep isl_set
*Domain
, const SCEV
*SCEV_TestVal
,
1569 const SCEV
*SCEV_UpperBound
,
1570 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1571 bool IsStrictUpperBound
) {
1572 // Do not take NonNeg assumption on TestVal
1573 // as it might have MSB (Sign bit) set.
1574 isl_pw_aff
*TestVal
= getPwAff(S
, BB
, InvalidDomainMap
, SCEV_TestVal
, false);
1575 // Take NonNeg assumption on UpperBound.
1576 isl_pw_aff
*UpperBound
=
1577 getPwAff(S
, BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
1581 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1582 isl_pw_aff_get_domain_space(TestVal
))),
1583 isl_pw_aff_copy(TestVal
));
1586 if (IsStrictUpperBound
)
1587 // TestVal < UpperBound
1588 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
1590 // TestVal <= UpperBound
1591 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
1593 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
1594 ConsequenceCondSet
= setDimensionIds(Domain
, ConsequenceCondSet
);
1595 return ConsequenceCondSet
;
1598 /// Build the conditions sets for the branch condition @p Condition in
1601 /// This will fill @p ConditionSets with the conditions under which control
1602 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1603 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1604 /// context under which @p Condition is true/false will be returned as the
1605 /// new elements of @p ConditionSets.
1607 buildConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1608 TerminatorInst
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
1609 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1610 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1611 isl_set
*ConsequenceCondSet
= nullptr;
1612 if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1613 if (CCond
->isZero())
1614 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1616 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1617 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1618 auto Opcode
= BinOp
->getOpcode();
1619 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1621 bool Valid
= buildConditionSets(S
, BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
1622 InvalidDomainMap
, ConditionSets
) &&
1623 buildConditionSets(S
, BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
1624 InvalidDomainMap
, ConditionSets
);
1626 while (!ConditionSets
.empty())
1627 isl_set_free(ConditionSets
.pop_back_val());
1631 isl_set_free(ConditionSets
.pop_back_val());
1632 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1633 isl_set_free(ConditionSets
.pop_back_val());
1634 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1636 if (Opcode
== Instruction::And
)
1637 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1639 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1641 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1643 "Condition of exiting branch was neither constant nor ICmp!");
1645 ScalarEvolution
&SE
= *S
.getSE();
1646 isl_pw_aff
*LHS
, *RHS
;
1647 // For unsigned comparisons we assumed the signed bit of neither operand
1648 // to be set. The comparison is equal to a signed comparison under this
1650 bool NonNeg
= ICond
->isUnsigned();
1651 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
1652 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
1654 switch (ICond
->getPredicate()) {
1655 case ICmpInst::ICMP_ULT
:
1656 ConsequenceCondSet
=
1657 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1658 RightOperand
, InvalidDomainMap
, true);
1660 case ICmpInst::ICMP_ULE
:
1661 ConsequenceCondSet
=
1662 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1663 RightOperand
, InvalidDomainMap
, false);
1665 case ICmpInst::ICMP_UGT
:
1666 ConsequenceCondSet
=
1667 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1668 LeftOperand
, InvalidDomainMap
, true);
1670 case ICmpInst::ICMP_UGE
:
1671 ConsequenceCondSet
=
1672 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1673 LeftOperand
, InvalidDomainMap
, false);
1676 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
1677 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
1678 ConsequenceCondSet
=
1679 buildConditionSet(ICond
->getPredicate(), LHS
, RHS
, Domain
);
1684 // If no terminator was given we are only looking for parameter constraints
1685 // under which @p Condition is true/false.
1687 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1688 assert(ConsequenceCondSet
);
1689 ConsequenceCondSet
= isl_set_coalesce(
1690 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1692 isl_set
*AlternativeCondSet
= nullptr;
1694 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1697 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1698 isl_set_copy(ConsequenceCondSet
));
1700 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1704 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
1705 TI
? TI
->getParent() : nullptr /* BasicBlock */);
1706 isl_set_free(AlternativeCondSet
);
1707 isl_set_free(ConsequenceCondSet
);
1711 ConditionSets
.push_back(ConsequenceCondSet
);
1712 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1717 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1719 /// This will fill @p ConditionSets with the conditions under which control
1720 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1721 /// have as many elements as @p TI has successors.
1723 buildConditionSets(Scop
&S
, BasicBlock
*BB
, TerminatorInst
*TI
, Loop
*L
,
1724 __isl_keep isl_set
*Domain
,
1725 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1726 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1727 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1728 return buildConditionSets(S
, BB
, SI
, L
, Domain
, InvalidDomainMap
,
1731 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1733 if (TI
->getNumSuccessors() == 1) {
1734 ConditionSets
.push_back(isl_set_copy(Domain
));
1738 Value
*Condition
= getConditionFromTerminator(TI
);
1739 assert(Condition
&& "No condition for Terminator");
1741 return buildConditionSets(S
, BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
1745 void ScopStmt::buildDomain() {
1746 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1748 Domain
= getParent()->getDomainConditions(this);
1749 Domain
= Domain
.set_tuple_id(Id
);
1752 void ScopStmt::collectSurroundingLoops() {
1753 for (unsigned u
= 0, e
= Domain
.dim(isl::dim::set
); u
< e
; u
++) {
1754 isl::id DimId
= Domain
.get_dim_id(isl::dim::set
, u
);
1755 NestLoops
.push_back(static_cast<Loop
*>(DimId
.get_user()));
1759 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, Loop
*SurroundingLoop
)
1760 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), R(&R
),
1761 Build(nullptr), SurroundingLoop(SurroundingLoop
) {
1762 BaseName
= getIslCompatibleName(
1763 "Stmt", R
.getNameStr(), parent
.getNextStmtIdx(), "", UseInstructionNames
);
1766 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, Loop
*SurroundingLoop
,
1767 std::vector
<Instruction
*> Instructions
)
1768 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1769 Build(nullptr), SurroundingLoop(SurroundingLoop
),
1770 Instructions(Instructions
) {
1771 BaseName
= getIslCompatibleName("Stmt", &bb
, parent
.getNextStmtIdx(), "",
1772 UseInstructionNames
);
1775 ScopStmt::ScopStmt(Scop
&parent
, isl::map SourceRel
, isl::map TargetRel
,
1777 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
),
1779 BaseName
= getIslCompatibleName("CopyStmt_", "",
1780 std::to_string(parent
.getCopyStmtsNum()));
1781 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1782 Domain
= Domain
.set_tuple_id(Id
);
1783 TargetRel
= TargetRel
.set_tuple_id(isl::dim::in
, Id
);
1785 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1786 parent
.addAccessFunction(Access
);
1788 SourceRel
= SourceRel
.set_tuple_id(isl::dim::in
, Id
);
1789 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1790 parent
.addAccessFunction(Access
);
1794 ScopStmt::~ScopStmt() = default;
1796 void ScopStmt::init(LoopInfo
&LI
) {
1797 assert(!Domain
&& "init must be called only once");
1800 collectSurroundingLoops();
1801 buildAccessRelations();
1803 if (DetectReductions
)
1804 checkForReductions();
1807 /// Collect loads which might form a reduction chain with @p StoreMA.
1809 /// Check if the stored value for @p StoreMA is a binary operator with one or
1810 /// two loads as operands. If the binary operand is commutative & associative,
1811 /// used only once (by @p StoreMA) and its load operands are also used only
1812 /// once, we have found a possible reduction chain. It starts at an operand
1813 /// load and includes the binary operator and @p StoreMA.
1815 /// Note: We allow only one use to ensure the load and binary operator cannot
1816 /// escape this block or into any other store except @p StoreMA.
1817 void ScopStmt::collectCandiateReductionLoads(
1818 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
1819 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
1823 // Skip if there is not one binary operator between the load and the store
1824 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
1828 // Skip if the binary operators has multiple uses
1829 if (BinOp
->getNumUses() != 1)
1832 // Skip if the opcode of the binary operator is not commutative/associative
1833 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
1836 // Skip if the binary operator is outside the current SCoP
1837 if (BinOp
->getParent() != Store
->getParent())
1840 // Skip if it is a multiplicative reduction and we disabled them
1841 if (DisableMultiplicativeReductions
&&
1842 (BinOp
->getOpcode() == Instruction::Mul
||
1843 BinOp
->getOpcode() == Instruction::FMul
))
1846 // Check the binary operator operands for a candidate load
1847 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
1848 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
1849 if (!PossibleLoad0
&& !PossibleLoad1
)
1852 // A load is only a candidate if it cannot escape (thus has only this use)
1853 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
1854 if (PossibleLoad0
->getParent() == Store
->getParent())
1855 Loads
.push_back(&getArrayAccessFor(PossibleLoad0
));
1856 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
1857 if (PossibleLoad1
->getParent() == Store
->getParent())
1858 Loads
.push_back(&getArrayAccessFor(PossibleLoad1
));
1861 /// Check for reductions in this ScopStmt.
1863 /// Iterate over all store memory accesses and check for valid binary reduction
1864 /// like chains. For all candidates we check if they have the same base address
1865 /// and there are no other accesses which overlap with them. The base address
1866 /// check rules out impossible reductions candidates early. The overlap check,
1867 /// together with the "only one user" check in collectCandiateReductionLoads,
1868 /// guarantees that none of the intermediate results will escape during
1869 /// execution of the loop nest. We basically check here that no other memory
1870 /// access can access the same memory as the potential reduction.
1871 void ScopStmt::checkForReductions() {
1872 SmallVector
<MemoryAccess
*, 2> Loads
;
1873 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
1875 // First collect candidate load-store reduction chains by iterating over all
1876 // stores and collecting possible reduction loads.
1877 for (MemoryAccess
*StoreMA
: MemAccs
) {
1878 if (StoreMA
->isRead())
1882 collectCandiateReductionLoads(StoreMA
, Loads
);
1883 for (MemoryAccess
*LoadMA
: Loads
)
1884 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
1887 // Then check each possible candidate pair.
1888 for (const auto &CandidatePair
: Candidates
) {
1890 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
1891 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
1893 // Skip those with obviously unequal base addresses.
1894 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
1898 // And check if the remaining for overlap with other memory accesses.
1899 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
1900 AllAccsRel
= AllAccsRel
.intersect_domain(getDomain());
1901 isl::set AllAccs
= AllAccsRel
.range();
1903 for (MemoryAccess
*MA
: MemAccs
) {
1904 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
1907 isl::map AccRel
= MA
->getAccessRelation().intersect_domain(getDomain());
1908 isl::set Accs
= AccRel
.range();
1910 if (AllAccs
.has_equal_space(Accs
)) {
1911 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
1912 Valid
= Valid
&& OverlapAccs
.is_empty();
1919 const LoadInst
*Load
=
1920 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
1921 MemoryAccess::ReductionType RT
=
1922 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
1924 // If no overlapping access was found we mark the load and store as
1926 CandidatePair
.first
->markAsReductionLike(RT
);
1927 CandidatePair
.second
->markAsReductionLike(RT
);
1931 std::string
ScopStmt::getDomainStr() const { return Domain
.to_str(); }
1933 std::string
ScopStmt::getScheduleStr() const {
1934 auto *S
= getSchedule().release();
1937 auto Str
= stringFromIslObj(S
);
1942 void ScopStmt::setInvalidDomain(isl::set ID
) { InvalidDomain
= ID
; }
1944 BasicBlock
*ScopStmt::getEntryBlock() const {
1946 return getBasicBlock();
1947 return getRegion()->getEntry();
1950 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1952 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1954 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1955 return NestLoops
[Dimension
];
1958 isl_ctx
*ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1960 isl::set
ScopStmt::getDomain() const { return Domain
; }
1962 isl::space
ScopStmt::getDomainSpace() const { return Domain
.get_space(); }
1964 isl::id
ScopStmt::getDomainId() const { return Domain
.get_tuple_id(); }
1966 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1967 OS
<< "Instructions {\n";
1969 for (Instruction
*Inst
: Instructions
)
1970 OS
.indent(16) << *Inst
<< "\n";
1972 OS
.indent(12) << "}\n";
1975 void ScopStmt::print(raw_ostream
&OS
, bool PrintInstructions
) const {
1976 OS
<< "\t" << getBaseName() << "\n";
1977 OS
.indent(12) << "Domain :=\n";
1980 OS
.indent(16) << getDomainStr() << ";\n";
1982 OS
.indent(16) << "n/a\n";
1984 OS
.indent(12) << "Schedule :=\n";
1987 OS
.indent(16) << getScheduleStr() << ";\n";
1989 OS
.indent(16) << "n/a\n";
1991 for (MemoryAccess
*Access
: MemAccs
)
1994 if (PrintInstructions
&& isBlockStmt())
1995 printInstructions(OS
.indent(12));
1998 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1999 LLVM_DUMP_METHOD
void ScopStmt::dump() const { print(dbgs(), true); }
2002 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
2003 if (MA
->isRead() && MA
->isOriginalValueKind()) {
2004 bool Found
= ValueReads
.erase(MA
->getAccessValue());
2006 assert(Found
&& "Expected access data not found");
2008 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
2009 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
2011 assert(Found
&& "Expected access data not found");
2013 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
2014 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
2016 assert(Found
&& "Expected access data not found");
2018 if (MA
->isRead() && MA
->isOriginalAnyPHIKind()) {
2019 bool Found
= PHIReads
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
2021 assert(Found
&& "Expected access data not found");
2025 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
2026 // Remove the memory accesses from this statement together with all scalar
2027 // accesses that were caused by it. MemoryKind::Value READs have no access
2028 // instruction, hence would not be removed by this function. However, it is
2029 // only used for invariant LoadInst accesses, its arguments are always affine,
2030 // hence synthesizable, and therefore there are no MemoryKind::Value READ
2031 // accesses to be removed.
2032 auto Predicate
= [&](MemoryAccess
*Acc
) {
2033 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
2035 for (auto *MA
: MemAccs
) {
2036 if (Predicate(MA
)) {
2037 removeAccessData(MA
);
2038 Parent
.removeAccessData(MA
);
2041 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
2043 InstructionToAccess
.erase(MA
->getAccessInstruction());
2046 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
) {
2047 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
2048 assert(MAIt
!= MemAccs
.end());
2049 MemAccs
.erase(MAIt
);
2051 removeAccessData(MA
);
2052 Parent
.removeAccessData(MA
);
2054 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
2055 if (It
!= InstructionToAccess
.end()) {
2056 It
->second
.remove(MA
);
2057 if (It
->second
.empty())
2058 InstructionToAccess
.erase(MA
->getAccessInstruction());
2062 MemoryAccess
*ScopStmt::ensureValueRead(Value
*V
) {
2063 MemoryAccess
*Access
= lookupInputAccessOf(V
);
2067 ScopArrayInfo
*SAI
=
2068 Parent
.getOrCreateScopArrayInfo(V
, V
->getType(), {}, MemoryKind::Value
);
2069 Access
= new MemoryAccess(this, nullptr, MemoryAccess::READ
, V
, V
->getType(),
2070 true, {}, {}, V
, MemoryKind::Value
);
2071 Parent
.addAccessFunction(Access
);
2072 Access
->buildAccessRelation(SAI
);
2074 Parent
.addAccessData(Access
);
2078 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const ScopStmt
&S
) {
2079 S
.print(OS
, PollyPrintInstructions
);
2083 //===----------------------------------------------------------------------===//
2084 /// Scop class implement
2086 void Scop::setContext(__isl_take isl_set
*NewContext
) {
2087 NewContext
= isl_set_align_params(NewContext
, isl_set_get_space(Context
));
2088 isl_set_free(Context
);
2089 Context
= NewContext
;
2094 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
2095 struct SCEVSensitiveParameterRewriter
2096 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
2097 const ValueToValueMap
&VMap
;
2100 SCEVSensitiveParameterRewriter(const ValueToValueMap
&VMap
,
2101 ScalarEvolution
&SE
)
2102 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
2104 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
2105 const ValueToValueMap
&VMap
) {
2106 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
2107 return SSPR
.visit(E
);
2110 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
2111 auto *Start
= visit(E
->getStart());
2112 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
2113 visit(E
->getStepRecurrence(SE
)),
2114 E
->getLoop(), SCEV::FlagAnyWrap
);
2115 return SE
.getAddExpr(Start
, AddRec
);
2118 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
2119 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
2120 return SE
.getUnknown(NewValue
);
2125 /// Check whether we should remap a SCEV expression.
2126 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
2127 const ValueToValueMap
&VMap
;
2128 bool FoundInside
= false;
2132 SCEVFindInsideScop(const ValueToValueMap
&VMap
, ScalarEvolution
&SE
,
2134 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
2136 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
2137 const ValueToValueMap
&VMap
, const Scop
*S
) {
2138 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
2140 return SFIS
.FoundInside
;
2143 bool follow(const SCEV
*E
) {
2144 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
2145 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
2146 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
2147 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
2148 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
2150 return !FoundInside
;
2153 bool isDone() { return FoundInside
; }
2156 } // end anonymous namespace
2158 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) const {
2159 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
2160 // doesn't like addition between an AddRec and an expression that
2161 // doesn't have a dominance relationship with it.)
2162 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
2166 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
2169 // This table of function names is used to translate parameter names in more
2170 // human-readable names. This makes it easier to interpret Polly analysis
2172 StringMap
<std::string
> KnownNames
= {
2173 {"_Z13get_global_idj", "global_id"},
2174 {"_Z12get_local_idj", "local_id"},
2175 {"_Z15get_global_sizej", "global_size"},
2176 {"_Z14get_local_sizej", "local_size"},
2177 {"_Z12get_work_dimv", "work_dim"},
2178 {"_Z17get_global_offsetj", "global_offset"},
2179 {"_Z12get_group_idj", "group_id"},
2180 {"_Z14get_num_groupsj", "num_groups"},
2183 static std::string
getCallParamName(CallInst
*Call
) {
2185 raw_string_ostream
OS(Result
);
2186 std::string Name
= Call
->getCalledFunction()->getName();
2188 auto Iterator
= KnownNames
.find(Name
);
2189 if (Iterator
!= KnownNames
.end())
2190 Name
= "__" + Iterator
->getValue();
2192 for (auto &Operand
: Call
->arg_operands()) {
2193 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
2194 OS
<< "_" << Op
->getValue();
2200 void Scop::createParameterId(const SCEV
*Parameter
) {
2201 assert(Parameters
.count(Parameter
));
2202 assert(!ParameterIds
.count(Parameter
));
2204 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
2206 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
2207 Value
*Val
= ValueParameter
->getValue();
2208 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
2210 if (Call
&& isConstCall(Call
)) {
2211 ParameterName
= getCallParamName(Call
);
2212 } else if (UseInstructionNames
) {
2213 // If this parameter references a specific Value and this value has a name
2214 // we use this name as it is likely to be unique and more useful than just
2217 ParameterName
= Val
->getName();
2218 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
2219 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
2220 if (LoadOrigin
->hasName()) {
2221 ParameterName
+= "_loaded_from_";
2223 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
2228 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2231 isl::id Id
= isl::id::alloc(getIslCtx(), ParameterName
,
2232 const_cast<void *>((const void *)Parameter
));
2233 ParameterIds
[Parameter
] = Id
;
2236 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2237 for (const SCEV
*Parameter
: NewParameters
) {
2238 // Normalize the SCEV to get the representing element for an invariant load.
2239 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2240 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2242 if (Parameters
.insert(Parameter
))
2243 createParameterId(Parameter
);
2247 isl::id
Scop::getIdForParam(const SCEV
*Parameter
) const {
2248 // Normalize the SCEV to get the representing element for an invariant load.
2249 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2250 return ParameterIds
.lookup(Parameter
);
2253 isl::set
Scop::addNonEmptyDomainConstraints(isl::set C
) const {
2254 isl_set
*DomainContext
= isl_union_set_params(getDomains().release());
2255 return isl::manage(isl_set_intersect_params(C
.release(), DomainContext
));
2258 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2259 return DT
.dominates(BB
, getEntry());
2262 void Scop::addUserAssumptions(
2263 AssumptionCache
&AC
, DominatorTree
&DT
, LoopInfo
&LI
,
2264 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2265 for (auto &Assumption
: AC
.assumptions()) {
2266 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2267 if (!CI
|| CI
->getNumArgOperands() != 1)
2270 bool InScop
= contains(CI
);
2271 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2274 auto *L
= LI
.getLoopFor(CI
->getParent());
2275 auto *Val
= CI
->getArgOperand(0);
2276 ParameterSetTy DetectedParams
;
2277 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2279 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
2280 << "Non-affine user assumption ignored.");
2284 // Collect all newly introduced parameters.
2285 ParameterSetTy NewParams
;
2286 for (auto *Param
: DetectedParams
) {
2287 Param
= extractConstantFactor(Param
, *SE
).second
;
2288 Param
= getRepresentingInvariantLoadSCEV(Param
);
2289 if (Parameters
.count(Param
))
2291 NewParams
.insert(Param
);
2294 SmallVector
<isl_set
*, 2> ConditionSets
;
2295 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2296 BasicBlock
*BB
= InScop
? CI
->getParent() : getRegion().getEntry();
2297 auto *Dom
= InScop
? DomainMap
[BB
].copy() : isl_set_copy(Context
);
2298 assert(Dom
&& "Cannot propagate a nullptr.");
2299 bool Valid
= buildConditionSets(*this, BB
, Val
, TI
, L
, Dom
,
2300 InvalidDomainMap
, ConditionSets
);
2306 isl_set
*AssumptionCtx
= nullptr;
2308 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2309 isl_set_free(ConditionSets
[0]);
2311 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2312 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2315 // Project out newly introduced parameters as they are not otherwise useful.
2316 if (!NewParams
.empty()) {
2317 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2318 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2319 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2322 if (!NewParams
.count(Param
))
2326 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2329 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
2330 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
2331 Context
= isl_set_intersect(Context
, AssumptionCtx
);
2335 void Scop::addUserContext() {
2336 if (UserContextStr
.empty())
2339 isl_set
*UserContext
=
2340 isl_set_read_from_str(getIslCtx(), UserContextStr
.c_str());
2341 isl_space
*Space
= getParamSpace().release();
2342 if (isl_space_dim(Space
, isl_dim_param
) !=
2343 isl_set_dim(UserContext
, isl_dim_param
)) {
2344 auto SpaceStr
= isl_space_to_str(Space
);
2345 errs() << "Error: the context provided in -polly-context has not the same "
2346 << "number of dimensions than the computed context. Due to this "
2347 << "mismatch, the -polly-context option is ignored. Please provide "
2348 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2350 isl_set_free(UserContext
);
2351 isl_space_free(Space
);
2355 for (unsigned i
= 0; i
< isl_space_dim(Space
, isl_dim_param
); i
++) {
2356 auto *NameContext
= isl_set_get_dim_name(Context
, isl_dim_param
, i
);
2357 auto *NameUserContext
= isl_set_get_dim_name(UserContext
, isl_dim_param
, i
);
2359 if (strcmp(NameContext
, NameUserContext
) != 0) {
2360 auto SpaceStr
= isl_space_to_str(Space
);
2361 errs() << "Error: the name of dimension " << i
2362 << " provided in -polly-context "
2363 << "is '" << NameUserContext
<< "', but the name in the computed "
2364 << "context is '" << NameContext
2365 << "'. Due to this name mismatch, "
2366 << "the -polly-context option is ignored. Please provide "
2367 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2369 isl_set_free(UserContext
);
2370 isl_space_free(Space
);
2375 isl_set_set_dim_id(UserContext
, isl_dim_param
, i
,
2376 isl_space_get_dim_id(Space
, isl_dim_param
, i
));
2379 Context
= isl_set_intersect(Context
, UserContext
);
2380 isl_space_free(Space
);
2383 void Scop::buildInvariantEquivalenceClasses() {
2384 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2386 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2387 for (LoadInst
*LInst
: RIL
) {
2388 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2390 Type
*Ty
= LInst
->getType();
2391 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2393 InvEquivClassVMap
[LInst
] = ClassRep
;
2398 InvariantEquivClasses
.emplace_back(
2399 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2403 void Scop::buildContext() {
2404 isl_space
*Space
= isl_space_params_alloc(getIslCtx(), 0);
2405 Context
= isl_set_universe(isl_space_copy(Space
));
2406 InvalidContext
= isl_set_empty(isl_space_copy(Space
));
2407 AssumedContext
= isl_set_universe(Space
);
2410 void Scop::addParameterBounds() {
2412 for (auto *Parameter
: Parameters
) {
2413 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2415 addRangeBoundsToSet(give(Context
), SRange
, PDim
++, isl::dim::param
)
2420 static std::vector
<isl::id
> getFortranArrayIds(Scop::array_range Arrays
) {
2421 std::vector
<isl::id
> OutermostSizeIds
;
2422 for (auto Array
: Arrays
) {
2423 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2424 // for its outermost dimension. Fortran arrays will have this since the
2425 // outermost dimension size can be picked up from their runtime description.
2426 // TODO: actually need to check if it has a FAD, but for now this works.
2427 if (Array
->getNumberOfDimensions() > 0) {
2428 isl::pw_aff PwAff
= Array
->getDimensionSizePw(0);
2433 isl::manage(isl_pw_aff_get_dim_id(PwAff
.get(), isl_dim_param
, 0));
2434 assert(!Id
.is_null() &&
2435 "Invalid Id for PwAff expression in Fortran array");
2437 OutermostSizeIds
.push_back(Id
);
2440 return OutermostSizeIds
;
2443 // The FORTRAN array size parameters are known to be non-negative.
2444 static isl_set
*boundFortranArrayParams(__isl_give isl_set
*Context
,
2445 Scop::array_range Arrays
) {
2446 std::vector
<isl::id
> OutermostSizeIds
;
2447 OutermostSizeIds
= getFortranArrayIds(Arrays
);
2449 for (isl::id Id
: OutermostSizeIds
) {
2450 int dim
= isl_set_find_dim_by_id(Context
, isl_dim_param
, Id
.get());
2451 Context
= isl_set_lower_bound_si(Context
, isl_dim_param
, dim
, 0);
2457 void Scop::realignParams() {
2458 if (PollyIgnoreParamBounds
)
2461 // Add all parameters into a common model.
2462 isl::space Space
= getFullParamSpace();
2464 // Align the parameters of all data structures to the model.
2465 Context
= isl_set_align_params(Context
, Space
.copy());
2467 // Bound the size of the fortran array dimensions.
2468 Context
= boundFortranArrayParams(Context
, arrays());
2470 // As all parameters are known add bounds to them.
2471 addParameterBounds();
2473 for (ScopStmt
&Stmt
: *this)
2474 Stmt
.realignParams();
2475 // Simplify the schedule according to the context too.
2476 Schedule
= isl_schedule_gist_domain_params(Schedule
, getContext().release());
2479 static __isl_give isl_set
*
2480 simplifyAssumptionContext(__isl_take isl_set
*AssumptionContext
,
2482 // If we have modeled all blocks in the SCoP that have side effects we can
2483 // simplify the context with the constraints that are needed for anything to
2484 // be executed at all. However, if we have error blocks in the SCoP we already
2485 // assumed some parameter combinations cannot occur and removed them from the
2486 // domains, thus we cannot use the remaining domain to simplify the
2488 if (!S
.hasErrorBlock()) {
2489 isl_set
*DomainParameters
= isl_union_set_params(S
.getDomains().release());
2491 isl_set_gist_params(AssumptionContext
, DomainParameters
);
2495 isl_set_gist_params(AssumptionContext
, S
.getContext().release());
2496 return AssumptionContext
;
2499 void Scop::simplifyContexts() {
2500 // The parameter constraints of the iteration domains give us a set of
2501 // constraints that need to hold for all cases where at least a single
2502 // statement iteration is executed in the whole scop. We now simplify the
2503 // assumed context under the assumption that such constraints hold and at
2504 // least a single statement iteration is executed. For cases where no
2505 // statement instances are executed, the assumptions we have taken about
2506 // the executed code do not matter and can be changed.
2508 // WARNING: This only holds if the assumptions we have taken do not reduce
2509 // the set of statement instances that are executed. Otherwise we
2510 // may run into a case where the iteration domains suggest that
2511 // for a certain set of parameter constraints no code is executed,
2512 // but in the original program some computation would have been
2513 // performed. In such a case, modifying the run-time conditions and
2514 // possibly influencing the run-time check may cause certain scops
2515 // to not be executed.
2519 // When delinearizing the following code:
2521 // for (long i = 0; i < 100; i++)
2522 // for (long j = 0; j < m; j++)
2525 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2526 // otherwise we would access out of bound data. Now, knowing that code is
2527 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2528 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2530 isl_set_align_params(InvalidContext
, getParamSpace().release());
2533 /// Add the minimal/maximal access in @p Set to @p User.
2535 buildMinMaxAccess(isl::set Set
, Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2536 isl::pw_multi_aff MinPMA
, MaxPMA
;
2537 isl::pw_aff LastDimAff
;
2540 isl::ctx Ctx
= Set
.get_ctx();
2542 Set
= Set
.remove_divs();
2544 if (isl_set_n_basic_set(Set
.get()) >= MaxDisjunctsInDomain
)
2545 return isl::stat::error
;
2547 // Restrict the number of parameters involved in the access as the lexmin/
2548 // lexmax computation will take too long if this number is high.
2550 // Experiments with a simple test case using an i7 4800MQ:
2552 // #Parameters involved | Time (in sec)
2561 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2562 unsigned InvolvedParams
= 0;
2563 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2564 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2567 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2568 return isl::stat::error
;
2571 if (isl_set_n_basic_set(Set
.get()) > RunTimeChecksMaxAccessDisjuncts
)
2572 return isl::stat::error
;
2574 MinPMA
= Set
.lexmin_pw_multi_aff();
2575 MaxPMA
= Set
.lexmax_pw_multi_aff();
2577 if (isl_ctx_last_error(Ctx
.get()) == isl_error_quota
)
2578 return isl::stat::error
;
2580 MinPMA
= MinPMA
.coalesce();
2581 MaxPMA
= MaxPMA
.coalesce();
2583 // Adjust the last dimension of the maximal access by one as we want to
2584 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2585 // we test during code generation might now point after the end of the
2586 // allocated array but we will never dereference it anyway.
2587 assert(MaxPMA
.dim(isl::dim::out
) && "Assumed at least one output dimension");
2588 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2589 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2590 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2591 OneAff
= OneAff
.add_constant_si(1);
2592 LastDimAff
= LastDimAff
.add(OneAff
);
2593 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2595 MinMaxAccesses
.push_back(std::make_pair(MinPMA
.copy(), MaxPMA
.copy()));
2597 return isl::stat::ok
;
2600 static __isl_give isl_set
*getAccessDomain(MemoryAccess
*MA
) {
2601 isl_set
*Domain
= MA
->getStatement()->getDomain().release();
2602 Domain
= isl_set_project_out(Domain
, isl_dim_set
, 0, isl_set_n_dim(Domain
));
2603 return isl_set_reset_tuple_id(Domain
);
2606 /// Wrapper function to calculate minimal/maximal accesses to each array.
2607 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2608 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2609 MinMaxAccesses
.reserve(AliasGroup
.size());
2611 isl::union_set Domains
= S
.getDomains();
2612 isl::union_map Accesses
= isl::union_map::empty(S
.getParamSpace());
2614 for (MemoryAccess
*MA
: AliasGroup
)
2615 Accesses
= Accesses
.add_map(give(MA
->getAccessRelation().release()));
2617 Accesses
= Accesses
.intersect_domain(Domains
);
2618 isl::union_set Locations
= Accesses
.range();
2619 Locations
= Locations
.coalesce();
2620 Locations
= Locations
.detect_equalities();
2622 auto Lambda
= [&MinMaxAccesses
, &S
](isl::set Set
) -> isl::stat
{
2623 return buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2625 return Locations
.foreach_set(Lambda
) == isl::stat::ok
;
2628 /// Helper to treat non-affine regions and basic blocks the same.
2632 /// Return the block that is the representing block for @p RN.
2633 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2634 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2635 : RN
->getNodeAs
<BasicBlock
>();
2638 /// Return the @p idx'th block that is executed after @p RN.
2639 static inline BasicBlock
*
2640 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2641 if (RN
->isSubRegion()) {
2643 return RN
->getNodeAs
<Region
>()->getExit();
2645 return TI
->getSuccessor(idx
);
2648 /// Return the smallest loop surrounding @p RN.
2649 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2650 if (!RN
->isSubRegion()) {
2651 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2652 Loop
*L
= LI
.getLoopFor(BB
);
2654 // Unreachable statements are not considered to belong to a LLVM loop, as
2655 // they are not part of an actual loop in the control flow graph.
2656 // Nevertheless, we handle certain unreachable statements that are common
2657 // when modeling run-time bounds checks as being part of the loop to be
2658 // able to model them and to later eliminate the run-time bounds checks.
2660 // Specifically, for basic blocks that terminate in an unreachable and
2661 // where the immediate predecessor is part of a loop, we assume these
2662 // basic blocks belong to the loop the predecessor belongs to. This
2663 // allows us to model the following code.
2665 // for (i = 0; i < N; i++) {
2667 // abort(); <- this abort might be translated to an
2672 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2673 L
= LI
.getLoopFor(BB
->getPrevNode());
2677 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2678 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2679 while (L
&& NonAffineSubRegion
->contains(L
))
2680 L
= L
->getParentLoop();
2684 /// Get the number of blocks in @p L.
2686 /// The number of blocks in a loop are the number of basic blocks actually
2687 /// belonging to the loop, as well as all single basic blocks that the loop
2688 /// exits to and which terminate in an unreachable instruction. We do not
2689 /// allow such basic blocks in the exit of a scop, hence they belong to the
2690 /// scop and represent run-time conditions which we want to model and
2691 /// subsequently speculate away.
2693 /// @see getRegionNodeLoop for additional details.
2694 unsigned getNumBlocksInLoop(Loop
*L
) {
2695 unsigned NumBlocks
= L
->getNumBlocks();
2696 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
2697 L
->getExitBlocks(ExitBlocks
);
2699 for (auto ExitBlock
: ExitBlocks
) {
2700 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2706 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2707 if (!RN
->isSubRegion())
2710 Region
*R
= RN
->getNodeAs
<Region
>();
2711 return std::distance(R
->block_begin(), R
->block_end());
2714 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2715 const DominatorTree
&DT
) {
2716 if (!RN
->isSubRegion())
2717 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2718 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2719 if (isErrorBlock(*BB
, R
, LI
, DT
))
2726 static inline __isl_give isl_set
*addDomainDimId(__isl_take isl_set
*Domain
,
2727 unsigned Dim
, Loop
*L
) {
2728 Domain
= isl_set_lower_bound_si(Domain
, isl_dim_set
, Dim
, -1);
2730 isl_id_alloc(isl_set_get_ctx(Domain
), nullptr, static_cast<void *>(L
));
2731 return isl_set_set_dim_id(Domain
, isl_dim_set
, Dim
, DimId
);
2734 isl::set
Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2735 return getDomainConditions(Stmt
->getEntryBlock());
2738 isl::set
Scop::getDomainConditions(BasicBlock
*BB
) const {
2739 auto DIt
= DomainMap
.find(BB
);
2740 if (DIt
!= DomainMap
.end())
2741 return DIt
->getSecond();
2743 auto &RI
= *R
.getRegionInfo();
2744 auto *BBR
= RI
.getRegionFor(BB
);
2745 while (BBR
->getEntry() == BB
)
2746 BBR
= BBR
->getParent();
2747 return getDomainConditions(BBR
->getEntry());
2750 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2751 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2752 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2753 auto *EntryBB
= R
->getEntry();
2754 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2755 int LD
= getRelativeLoopDepth(L
);
2756 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD
+ 1));
2759 S
= addDomainDimId(S
, LD
+ 1, L
);
2760 L
= L
->getParentLoop();
2763 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
2764 DomainMap
[EntryBB
] = isl::manage(S
);
2766 if (IsOnlyNonAffineRegion
)
2767 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2769 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
, InvalidDomainMap
))
2772 if (!propagateDomainConstraints(R
, DT
, LI
, InvalidDomainMap
))
2775 // Error blocks and blocks dominated by them have been assumed to never be
2776 // executed. Representing them in the Scop does not add any value. In fact,
2777 // it is likely to cause issues during construction of the ScopStmts. The
2778 // contents of error blocks have not been verified to be expressible and
2779 // will cause problems when building up a ScopStmt for them.
2780 // Furthermore, basic blocks dominated by error blocks may reference
2781 // instructions in the error block which, if the error block is not modeled,
2782 // can themselves not be constructed properly. To this end we will replace
2783 // the domains of error blocks and those only reachable via error blocks
2784 // with an empty set. Additionally, we will record for each block under which
2785 // parameter combination it would be reached via an error block in its
2786 // InvalidDomain. This information is needed during load hoisting.
2787 if (!propagateInvalidStmtDomains(R
, DT
, LI
, InvalidDomainMap
))
2793 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2794 /// to be compatible to domains constructed for loop @p NewL.
2796 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2797 /// edge from @p OldL to @p NewL.
2798 static __isl_give isl_set
*adjustDomainDimensions(Scop
&S
,
2799 __isl_take isl_set
*Dom
,
2800 Loop
*OldL
, Loop
*NewL
) {
2801 // If the loops are the same there is nothing to do.
2805 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2806 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2807 // If both loops are non-affine loops there is nothing to do.
2808 if (OldDepth
== -1 && NewDepth
== -1)
2811 // Distinguish three cases:
2812 // 1) The depth is the same but the loops are not.
2813 // => One loop was left one was entered.
2814 // 2) The depth increased from OldL to NewL.
2815 // => One loop was entered, none was left.
2816 // 3) The depth decreased from OldL to NewL.
2817 // => Loops were left were difference of the depths defines how many.
2818 if (OldDepth
== NewDepth
) {
2819 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2820 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NewDepth
, 1);
2821 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2822 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2823 } else if (OldDepth
< NewDepth
) {
2824 assert(OldDepth
+ 1 == NewDepth
);
2825 auto &R
= S
.getRegion();
2827 assert(NewL
->getParentLoop() == OldL
||
2828 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2829 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2830 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2832 assert(OldDepth
> NewDepth
);
2833 int Diff
= OldDepth
- NewDepth
;
2834 int NumDim
= isl_set_n_dim(Dom
);
2835 assert(NumDim
>= Diff
);
2836 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NumDim
- Diff
, Diff
);
2842 bool Scop::propagateInvalidStmtDomains(
2843 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2844 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2845 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2846 for (auto *RN
: RTraversal
) {
2848 // Recurse for affine subregions but go on for basic blocks and non-affine
2850 if (RN
->isSubRegion()) {
2851 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2852 if (!isNonAffineSubRegion(SubRegion
)) {
2853 propagateInvalidStmtDomains(SubRegion
, DT
, LI
, InvalidDomainMap
);
2858 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2859 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2860 isl::set
&Domain
= DomainMap
[BB
];
2861 assert(Domain
&& "Cannot propagate a nullptr");
2863 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
2865 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
2867 if (!IsInvalidBlock
) {
2868 InvalidDomain
= InvalidDomain
.intersect(Domain
);
2870 InvalidDomain
= Domain
;
2871 isl::set DomPar
= Domain
.params();
2872 recordAssumption(ERRORBLOCK
, DomPar
.release(),
2873 BB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
2877 if (InvalidDomain
.is_empty()) {
2878 InvalidDomainMap
[BB
] = InvalidDomain
;
2882 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2883 auto *TI
= BB
->getTerminator();
2884 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2885 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2886 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2888 // Skip successors outside the SCoP.
2889 if (!contains(SuccBB
))
2893 if (DT
.dominates(SuccBB
, BB
))
2896 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2898 auto *AdjustedInvalidDomain
= adjustDomainDimensions(
2899 *this, InvalidDomain
.copy(), BBLoop
, SuccBBLoop
);
2901 auto *SuccInvalidDomain
= InvalidDomainMap
[SuccBB
].copy();
2903 isl_set_union(SuccInvalidDomain
, AdjustedInvalidDomain
);
2904 SuccInvalidDomain
= isl_set_coalesce(SuccInvalidDomain
);
2905 unsigned NumConjucts
= isl_set_n_basic_set(SuccInvalidDomain
);
2907 InvalidDomainMap
[SuccBB
] = isl::manage(SuccInvalidDomain
);
2909 // Check if the maximal number of domain disjunctions was reached.
2910 // In case this happens we will bail.
2911 if (NumConjucts
< MaxDisjunctsInDomain
)
2914 InvalidDomainMap
.erase(BB
);
2915 invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
2919 InvalidDomainMap
[BB
] = InvalidDomain
;
2925 void Scop::propagateDomainConstraintsToRegionExit(
2926 BasicBlock
*BB
, Loop
*BBLoop
,
2927 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
,
2928 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2929 // Check if the block @p BB is the entry of a region. If so we propagate it's
2930 // domain to the exit block of the region. Otherwise we are done.
2931 auto *RI
= R
.getRegionInfo();
2932 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2933 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2934 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2937 // Do not propagate the domain if there is a loop backedge inside the region
2938 // that would prevent the exit block from being executed.
2940 while (L
&& contains(L
)) {
2941 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2942 BBLoop
->getLoopLatches(LatchBBs
);
2943 for (auto *LatchBB
: LatchBBs
)
2944 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2946 L
= L
->getParentLoop();
2949 isl::set Domain
= DomainMap
[BB
];
2950 assert(Domain
&& "Cannot propagate a nullptr");
2952 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, getBoxedLoops());
2954 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2955 // adjust the domain before we can propagate it.
2956 isl::set AdjustedDomain
= isl::manage(
2957 adjustDomainDimensions(*this, Domain
.copy(), BBLoop
, ExitBBLoop
));
2958 isl::set
&ExitDomain
= DomainMap
[ExitBB
];
2960 // If the exit domain is not yet created we set it otherwise we "add" the
2962 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
2964 // Initialize the invalid domain.
2965 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
2967 FinishedExitBlocks
.insert(ExitBB
);
2970 bool Scop::buildDomainsWithBranchConstraints(
2971 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2972 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2973 // To create the domain for each block in R we iterate over all blocks and
2974 // subregions in R and propagate the conditions under which the current region
2975 // element is executed. To this end we iterate in reverse post order over R as
2976 // it ensures that we first visit all predecessors of a region node (either a
2977 // basic block or a subregion) before we visit the region node itself.
2978 // Initially, only the domain for the SCoP region entry block is set and from
2979 // there we propagate the current domain to all successors, however we add the
2980 // condition that the successor is actually executed next.
2981 // As we are only interested in non-loop carried constraints here we can
2982 // simply skip loop back edges.
2984 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2985 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2986 for (auto *RN
: RTraversal
) {
2987 // Recurse for affine subregions but go on for basic blocks and non-affine
2989 if (RN
->isSubRegion()) {
2990 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2991 if (!isNonAffineSubRegion(SubRegion
)) {
2992 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
,
2999 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
3000 HasErrorBlock
= true;
3002 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
3003 TerminatorInst
*TI
= BB
->getTerminator();
3005 if (isa
<UnreachableInst
>(TI
))
3008 isl::set Domain
= DomainMap
.lookup(BB
);
3011 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
.get()));
3013 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
3014 // Propagate the domain from BB directly to blocks that have a superset
3015 // domain, at the moment only region exit nodes of regions that start in BB.
3016 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
,
3019 // If all successors of BB have been set a domain through the propagation
3020 // above we do not need to build condition sets but can just skip this
3021 // block. However, it is important to note that this is a local property
3022 // with regards to the region @p R. To this end FinishedExitBlocks is a
3024 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
3025 return FinishedExitBlocks
.count(SuccBB
);
3027 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
3030 // Build the condition sets for the successor nodes of the current region
3031 // node. If it is a non-affine subregion we will always execute the single
3032 // exit node, hence the single entry node domain is the condition set. For
3033 // basic blocks we use the helper function buildConditionSets.
3034 SmallVector
<isl_set
*, 8> ConditionSets
;
3035 if (RN
->isSubRegion())
3036 ConditionSets
.push_back(Domain
.copy());
3037 else if (!buildConditionSets(*this, BB
, TI
, BBLoop
, Domain
.get(),
3038 InvalidDomainMap
, ConditionSets
))
3041 // Now iterate over the successors and set their initial domain based on
3042 // their condition set. We skip back edges here and have to be careful when
3043 // we leave a loop not to keep constraints over a dimension that doesn't
3045 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
3046 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
3047 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
3048 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
3050 // Skip blocks outside the region.
3051 if (!contains(SuccBB
))
3054 // If we propagate the domain of some block to "SuccBB" we do not have to
3055 // adjust the domain.
3056 if (FinishedExitBlocks
.count(SuccBB
))
3060 if (DT
.dominates(SuccBB
, BB
))
3063 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
3065 CondSet
= isl::manage(
3066 adjustDomainDimensions(*this, CondSet
.copy(), BBLoop
, SuccBBLoop
));
3068 // Set the domain for the successor or merge it with an existing domain in
3069 // case there are multiple paths (without loop back edges) to the
3071 isl::set
&SuccDomain
= DomainMap
[SuccBB
];
3074 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
3076 // Initialize the invalid domain.
3077 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
3078 SuccDomain
= CondSet
;
3081 SuccDomain
= SuccDomain
.detect_equalities();
3083 // Check if the maximal number of domain disjunctions was reached.
3084 // In case this happens we will clean up and bail.
3085 if (isl_set_n_basic_set(SuccDomain
.get()) < MaxDisjunctsInDomain
)
3088 invalidate(COMPLEXITY
, DebugLoc());
3089 while (++u
< ConditionSets
.size())
3090 isl_set_free(ConditionSets
[u
]);
3098 isl::set
Scop::getPredecessorDomainConstraints(BasicBlock
*BB
, isl::set Domain
,
3101 // If @p BB is the ScopEntry we are done
3102 if (R
.getEntry() == BB
)
3103 return isl::set::universe(Domain
.get_space());
3105 // The region info of this function.
3106 auto &RI
= *R
.getRegionInfo();
3108 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, getBoxedLoops());
3110 // A domain to collect all predecessor domains, thus all conditions under
3111 // which the block is executed. To this end we start with the empty domain.
3112 isl::set PredDom
= isl::set::empty(Domain
.get_space());
3114 // Set of regions of which the entry block domain has been propagated to BB.
3115 // all predecessors inside any of the regions can be skipped.
3116 SmallSet
<Region
*, 8> PropagatedRegions
;
3118 for (auto *PredBB
: predecessors(BB
)) {
3120 if (DT
.dominates(BB
, PredBB
))
3123 // If the predecessor is in a region we used for propagation we can skip it.
3124 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
3125 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
3130 // Check if there is a valid region we can use for propagation, thus look
3131 // for a region that contains the predecessor and has @p BB as exit block.
3132 auto *PredR
= RI
.getRegionFor(PredBB
);
3133 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
3136 // If a valid region for propagation was found use the entry of that region
3137 // for propagation, otherwise the PredBB directly.
3138 if (PredR
->getExit() == BB
) {
3139 PredBB
= PredR
->getEntry();
3140 PropagatedRegions
.insert(PredR
);
3143 auto *PredBBDom
= getDomainConditions(PredBB
).release();
3144 Loop
*PredBBLoop
= getFirstNonBoxedLoopFor(PredBB
, LI
, getBoxedLoops());
3146 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
3148 PredDom
= PredDom
.unite(isl::manage(PredBBDom
));
3154 bool Scop::propagateDomainConstraints(
3155 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
3156 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
3157 // Iterate over the region R and propagate the domain constrains from the
3158 // predecessors to the current node. In contrast to the
3159 // buildDomainsWithBranchConstraints function, this one will pull the domain
3160 // information from the predecessors instead of pushing it to the successors.
3161 // Additionally, we assume the domains to be already present in the domain
3162 // map here. However, we iterate again in reverse post order so we know all
3163 // predecessors have been visited before a block or non-affine subregion is
3166 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
3167 for (auto *RN
: RTraversal
) {
3168 // Recurse for affine subregions but go on for basic blocks and non-affine
3170 if (RN
->isSubRegion()) {
3171 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
3172 if (!isNonAffineSubRegion(SubRegion
)) {
3173 if (!propagateDomainConstraints(SubRegion
, DT
, LI
, InvalidDomainMap
))
3179 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
3180 isl::set
&Domain
= DomainMap
[BB
];
3183 // Under the union of all predecessor conditions we can reach this block.
3184 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
3185 Domain
= Domain
.intersect(PredDom
).coalesce();
3186 Domain
= Domain
.align_params(getParamSpace());
3188 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
3189 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
3190 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
, InvalidDomainMap
))
3197 /// Create a map to map from a given iteration to a subsequent iteration.
3199 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3200 /// is incremented by one and all other dimensions are equal, e.g.,
3201 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3203 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3204 static __isl_give isl_map
*
3205 createNextIterationMap(__isl_take isl_space
*SetSpace
, unsigned Dim
) {
3206 auto *MapSpace
= isl_space_map_from_set(SetSpace
);
3207 auto *NextIterationMap
= isl_map_universe(isl_space_copy(MapSpace
));
3208 for (unsigned u
= 0; u
< isl_map_dim(NextIterationMap
, isl_dim_in
); u
++)
3211 isl_map_equate(NextIterationMap
, isl_dim_in
, u
, isl_dim_out
, u
);
3212 auto *C
= isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace
));
3213 C
= isl_constraint_set_constant_si(C
, 1);
3214 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, Dim
, 1);
3215 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, Dim
, -1);
3216 NextIterationMap
= isl_map_add_constraint(NextIterationMap
, C
);
3217 return NextIterationMap
;
3220 bool Scop::addLoopBoundsToHeaderDomain(
3221 Loop
*L
, LoopInfo
&LI
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
3222 int LoopDepth
= getRelativeLoopDepth(L
);
3223 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
3225 BasicBlock
*HeaderBB
= L
->getHeader();
3226 assert(DomainMap
.count(HeaderBB
));
3227 isl::set
&HeaderBBDom
= DomainMap
[HeaderBB
];
3229 isl::map NextIterationMap
= isl::manage(
3230 createNextIterationMap(HeaderBBDom
.get_space().release(), LoopDepth
));
3232 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
3234 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
3235 L
->getLoopLatches(LatchBlocks
);
3237 for (BasicBlock
*LatchBB
: LatchBlocks
) {
3238 // If the latch is only reachable via error statements we skip it.
3239 isl::set LatchBBDom
= DomainMap
.lookup(LatchBB
);
3243 isl::set BackedgeCondition
= nullptr;
3245 TerminatorInst
*TI
= LatchBB
->getTerminator();
3246 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
3247 assert(BI
&& "Only branch instructions allowed in loop latches");
3249 if (BI
->isUnconditional())
3250 BackedgeCondition
= LatchBBDom
;
3252 SmallVector
<isl_set
*, 8> ConditionSets
;
3253 int idx
= BI
->getSuccessor(0) != HeaderBB
;
3254 if (!buildConditionSets(*this, LatchBB
, TI
, L
, LatchBBDom
.get(),
3255 InvalidDomainMap
, ConditionSets
))
3258 // Free the non back edge condition set as we do not need it.
3259 isl_set_free(ConditionSets
[1 - idx
]);
3261 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
3264 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3265 assert(LatchLoopDepth
>= LoopDepth
);
3266 BackedgeCondition
= BackedgeCondition
.project_out(
3267 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
3268 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
3271 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
3272 for (int i
= 0; i
< LoopDepth
; i
++)
3273 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3275 isl::set UnionBackedgeConditionComplement
=
3276 UnionBackedgeCondition
.complement();
3277 UnionBackedgeConditionComplement
=
3278 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
3280 UnionBackedgeConditionComplement
=
3281 UnionBackedgeConditionComplement
.apply(ForwardMap
);
3282 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
3283 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
3285 auto Parts
= partitionSetParts(HeaderBBDom
.copy(), LoopDepth
);
3286 HeaderBBDom
= isl::manage(Parts
.second
);
3288 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3289 // the bounded assumptions to the context as they are already implied by the
3291 if (Affinator
.hasNSWAddRecForLoop(L
)) {
3292 isl_set_free(Parts
.first
);
3296 isl_set
*UnboundedCtx
= isl_set_params(Parts
.first
);
3297 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3298 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3302 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3303 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3305 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3306 if (!PointerBaseInst
)
3309 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3313 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3316 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3317 isl::union_map Writes
) {
3318 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3319 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3322 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3323 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3324 if (!isa
<LoadInst
>(BasePtrInst
))
3325 return contains(BasePtrInst
);
3330 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3331 if (!PollyUseRuntimeAliasChecks
)
3334 if (buildAliasGroups(AA
)) {
3335 // Aliasing assumptions do not go through addAssumption but we still want to
3336 // collect statistics so we do it here explicitly.
3337 if (MinMaxAliasGroups
.size())
3338 AssumptionsAliasing
++;
3342 // If a problem occurs while building the alias groups we need to delete
3343 // this SCoP and pretend it wasn't valid in the first place. To this end
3344 // we make the assumed context infeasible.
3345 invalidate(ALIASING
, DebugLoc());
3347 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3348 << " could not be created as the number of parameters involved "
3349 "is too high. The SCoP will be "
3350 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3351 "the maximal number of parameters but be advised that the "
3352 "compile time might increase exponentially.\n\n");
3356 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3357 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3358 AliasSetTracker
AST(AA
);
3360 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3361 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3362 for (ScopStmt
&Stmt
: *this) {
3364 isl_set
*StmtDomain
= Stmt
.getDomain().release();
3365 bool StmtDomainEmpty
= isl_set_is_empty(StmtDomain
);
3366 isl_set_free(StmtDomain
);
3368 // Statements with an empty domain will never be executed.
3369 if (StmtDomainEmpty
)
3372 for (MemoryAccess
*MA
: Stmt
) {
3373 if (MA
->isScalarKind())
3376 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3377 MemAccInst
Acc(MA
->getAccessInstruction());
3378 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3379 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3381 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3386 AliasGroupVectorTy AliasGroups
;
3387 for (AliasSet
&AS
: AST
) {
3388 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3392 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3395 AliasGroups
.push_back(std::move(AG
));
3398 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3401 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3402 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3404 AliasGroupTy
&AG
= AliasGroups
[u
];
3405 AliasGroupTy::iterator AGI
= AG
.begin();
3406 isl_set
*AGDomain
= getAccessDomain(*AGI
);
3407 while (AGI
!= AG
.end()) {
3408 MemoryAccess
*MA
= *AGI
;
3409 isl_set
*MADomain
= getAccessDomain(MA
);
3410 if (isl_set_is_disjoint(AGDomain
, MADomain
)) {
3411 NewAG
.push_back(MA
);
3412 AGI
= AG
.erase(AGI
);
3413 isl_set_free(MADomain
);
3415 AGDomain
= isl_set_union(AGDomain
, MADomain
);
3419 if (NewAG
.size() > 1)
3420 AliasGroups
.push_back(std::move(NewAG
));
3421 isl_set_free(AGDomain
);
3425 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3426 // To create sound alias checks we perform the following steps:
3427 // o) We partition each group into read only and non read only accesses.
3428 // o) For each group with more than one base pointer we then compute minimal
3429 // and maximal accesses to each array of a group in read only and non
3430 // read only partitions separately.
3431 AliasGroupVectorTy AliasGroups
;
3432 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3434 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3436 splitAliasGroupsByDomain(AliasGroups
);
3438 for (AliasGroupTy
&AG
: AliasGroups
) {
3439 if (!hasFeasibleRuntimeContext())
3443 IslMaxOperationsGuard
MaxOpGuard(getIslCtx(), OptComputeOut
);
3444 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3448 if (isl_ctx_last_error(getIslCtx()) == isl_error_quota
) {
3449 invalidate(COMPLEXITY
, DebugLoc());
3457 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3458 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3459 AliasGroupTy ReadOnlyAccesses
;
3460 AliasGroupTy ReadWriteAccesses
;
3461 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3462 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3464 if (AliasGroup
.size() < 2)
3467 for (MemoryAccess
*Access
: AliasGroup
) {
3468 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3469 Access
->getAccessInstruction())
3470 << "Possibly aliasing pointer, use restrict keyword.");
3471 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3472 if (HasWriteAccess
.count(Array
)) {
3473 ReadWriteArrays
.insert(Array
);
3474 ReadWriteAccesses
.push_back(Access
);
3476 ReadOnlyArrays
.insert(Array
);
3477 ReadOnlyAccesses
.push_back(Access
);
3481 // If there are no read-only pointers, and less than two read-write pointers,
3482 // no alias check is needed.
3483 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3486 // If there is no read-write pointer, no alias check is needed.
3487 if (ReadWriteArrays
.empty())
3490 // For non-affine accesses, no alias check can be generated as we cannot
3491 // compute a sufficiently tight lower and upper bound: bail out.
3492 for (MemoryAccess
*MA
: AliasGroup
) {
3493 if (!MA
->isAffine()) {
3494 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3495 MA
->getAccessInstruction()->getParent());
3500 // Ensure that for all memory accesses for which we generate alias checks,
3501 // their base pointers are available.
3502 for (MemoryAccess
*MA
: AliasGroup
) {
3503 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3504 addRequiredInvariantLoad(
3505 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3508 MinMaxAliasGroups
.emplace_back();
3509 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3510 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3511 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3516 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3521 // Bail out if the number of values we need to compare is too large.
3522 // This is important as the number of comparisons grows quadratically with
3523 // the number of values we need to compare.
3524 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3525 RunTimeChecksMaxArraysPerGroup
)
3529 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3537 /// Get the smallest loop that contains @p S but is not in @p S.
3538 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3539 // Start with the smallest loop containing the entry and expand that
3540 // loop until it contains all blocks in the region. If there is a loop
3541 // containing all blocks in the region check if it is itself contained
3542 // and if so take the parent loop as it will be the smallest containing
3543 // the region but not contained by it.
3544 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3546 bool AllContained
= true;
3547 for (auto *BB
: S
.blocks())
3548 AllContained
&= L
->contains(BB
);
3551 L
= L
->getParentLoop();
3554 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3557 int Scop::NextScopID
= 0;
3559 std::string
Scop::CurrentFunc
;
3561 int Scop::getNextID(std::string ParentFunc
) {
3562 if (ParentFunc
!= CurrentFunc
) {
3563 CurrentFunc
= ParentFunc
;
3566 return NextScopID
++;
3569 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3570 ScopDetection::DetectionContext
&DC
, OptimizationRemarkEmitter
&ORE
)
3571 : SE(&ScalarEvolution
), R(R
), name(R
.getNameStr()),
3572 HasSingleExitEdge(R
.getExitingBlock()), DC(DC
), ORE(ORE
),
3573 IslCtx(isl_ctx_alloc(), isl_ctx_free
), Affinator(this, LI
),
3574 ID(getNextID((*R
.getEntry()->getParent()).getName().str())) {
3575 if (IslOnErrorAbort
)
3576 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT
);
3581 isl_set_free(Context
);
3582 isl_set_free(AssumedContext
);
3583 isl_set_free(InvalidContext
);
3584 isl_schedule_free(Schedule
);
3586 ParameterIds
.clear();
3588 for (auto &AS
: RecordedAssumptions
)
3589 isl_set_free(AS
.Set
);
3591 // Free the alias groups
3592 for (MinMaxVectorPairTy
&MinMaxAccessPair
: MinMaxAliasGroups
) {
3593 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.first
) {
3594 isl_pw_multi_aff_free(MMA
.first
);
3595 isl_pw_multi_aff_free(MMA
.second
);
3597 for (MinMaxAccessTy
&MMA
: MinMaxAccessPair
.second
) {
3598 isl_pw_multi_aff_free(MMA
.first
);
3599 isl_pw_multi_aff_free(MMA
.second
);
3603 for (const auto &IAClass
: InvariantEquivClasses
)
3604 isl_set_free(IAClass
.ExecutionContext
);
3606 // Explicitly release all Scop objects and the underlying isl objects before
3607 // we release the isl context.
3609 ScopArrayInfoSet
.clear();
3610 ScopArrayInfoMap
.clear();
3611 ScopArrayNameMap
.clear();
3612 AccessFunctions
.clear();
3615 void Scop::foldSizeConstantsToRight() {
3616 isl_union_set
*Accessed
= isl_union_map_range(getAccesses().release());
3618 for (auto Array
: arrays()) {
3619 if (Array
->getNumberOfDimensions() <= 1)
3622 isl_space
*Space
= Array
->getSpace().release();
3624 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3626 if (!isl_union_set_contains(Accessed
, Space
)) {
3627 isl_space_free(Space
);
3631 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3633 isl_map
*Transform
=
3634 isl_map_universe(isl_space_map_from_set(Array
->getSpace().release()));
3636 std::vector
<int> Int
;
3638 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3639 for (int i
= 0; i
< Dims
; i
++) {
3641 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3642 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3643 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3645 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3647 if (i
== Dims
- 1) {
3649 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3650 isl_basic_set_free(DimHull
);
3654 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3655 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3656 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3661 if (isl_val_is_int(Val
))
3662 ValInt
= isl_val_get_num_si(Val
);
3665 Int
.push_back(ValInt
);
3667 isl_constraint
*C
= isl_constraint_alloc_equality(
3668 isl_local_space_from_space(isl_map_get_space(Transform
)));
3669 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3670 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3671 Transform
= isl_map_add_constraint(Transform
, C
);
3672 isl_basic_set_free(DimHull
);
3676 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3677 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3680 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3684 Int
.push_back(ValInt
);
3685 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3686 isl_basic_set_free(DimHull
);
3687 isl_basic_set_free(ZeroSet
);
3690 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3692 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3693 isl_set_free(Elements
);
3694 isl_set_free(MappedElements
);
3695 isl_map_free(Transform
);
3699 isl_set_free(MappedElements
);
3701 bool CanFold
= true;
3706 unsigned NumDims
= Array
->getNumberOfDimensions();
3707 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3708 if (Int
[0] != Int
[i
] && Int
[i
])
3712 isl_set_free(Elements
);
3713 isl_map_free(Transform
);
3717 for (auto &Access
: AccessFunctions
)
3718 if (Access
->getScopArrayInfo() == Array
)
3719 Access
->setAccessRelation(Access
->getAccessRelation().apply_range(
3720 isl::manage(isl_map_copy(Transform
))));
3722 isl_map_free(Transform
);
3724 std::vector
<const SCEV
*> Sizes
;
3725 for (unsigned i
= 0; i
< NumDims
; i
++) {
3726 auto Size
= Array
->getDimensionSize(i
);
3728 if (i
== NumDims
- 1)
3729 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3730 Sizes
.push_back(Size
);
3733 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3735 isl_set_free(Elements
);
3737 isl_union_set_free(Accessed
);
3740 void Scop::markFortranArrays() {
3741 for (ScopStmt
&Stmt
: Stmts
) {
3742 for (MemoryAccess
*MemAcc
: Stmt
) {
3743 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3747 // TODO: const_cast-ing to edit
3748 ScopArrayInfo
*SAI
=
3749 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3750 assert(SAI
&& "memory access into a Fortran array does not "
3751 "have an associated ScopArrayInfo");
3752 SAI
->applyAndSetFAD(FAD
);
3757 void Scop::finalizeAccesses() {
3758 updateAccessDimensionality();
3759 foldSizeConstantsToRight();
3760 foldAccessRelations();
3761 assumeNoOutOfBounds();
3762 markFortranArrays();
3765 void Scop::updateAccessDimensionality() {
3766 // Check all array accesses for each base pointer and find a (virtual) element
3767 // size for the base pointer that divides all access functions.
3768 for (ScopStmt
&Stmt
: *this)
3769 for (MemoryAccess
*Access
: Stmt
) {
3770 if (!Access
->isArrayKind())
3772 ScopArrayInfo
*Array
=
3773 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3775 if (Array
->getNumberOfDimensions() != 1)
3777 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3778 const SCEV
*Subscript
= Access
->getSubscript(0);
3779 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3781 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3782 Array
->updateElementType(Ty
);
3785 for (auto &Stmt
: *this)
3786 for (auto &Access
: Stmt
)
3787 Access
->updateDimensionality();
3790 void Scop::foldAccessRelations() {
3791 for (auto &Stmt
: *this)
3792 for (auto &Access
: Stmt
)
3793 Access
->foldAccessRelation();
3796 void Scop::assumeNoOutOfBounds() {
3797 for (auto &Stmt
: *this)
3798 for (auto &Access
: Stmt
)
3799 Access
->assumeNoOutOfBound();
3802 void Scop::removeFromStmtMap(ScopStmt
&Stmt
) {
3803 if (Stmt
.isRegionStmt())
3804 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks()) {
3806 for (Instruction
&Inst
: *BB
)
3807 InstStmtMap
.erase(&Inst
);
3810 StmtMap
.erase(Stmt
.getBasicBlock());
3811 for (Instruction
*Inst
: Stmt
.getInstructions())
3812 InstStmtMap
.erase(Inst
);
3816 void Scop::removeStmts(std::function
<bool(ScopStmt
&)> ShouldDelete
) {
3817 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3818 if (!ShouldDelete(*StmtIt
)) {
3823 removeFromStmtMap(*StmtIt
);
3824 StmtIt
= Stmts
.erase(StmtIt
);
3828 void Scop::removeStmtNotInDomainMap() {
3829 auto ShouldDelete
= [this](ScopStmt
&Stmt
) -> bool {
3830 return !this->DomainMap
.lookup(Stmt
.getEntryBlock());
3832 removeStmts(ShouldDelete
);
3835 void Scop::simplifySCoP(bool AfterHoisting
) {
3836 auto ShouldDelete
= [AfterHoisting
](ScopStmt
&Stmt
) -> bool {
3837 bool RemoveStmt
= Stmt
.isEmpty();
3839 // Remove read only statements only after invariant load hoisting.
3840 if (!RemoveStmt
&& AfterHoisting
) {
3841 bool OnlyRead
= true;
3842 for (MemoryAccess
*MA
: Stmt
) {
3850 RemoveStmt
= OnlyRead
;
3855 removeStmts(ShouldDelete
);
3858 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3859 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3863 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3864 LInst
= cast
<LoadInst
>(Rep
);
3866 Type
*Ty
= LInst
->getType();
3867 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3868 for (auto &IAClass
: InvariantEquivClasses
) {
3869 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3872 auto &MAs
= IAClass
.InvariantAccesses
;
3873 for (auto *MA
: MAs
)
3874 if (MA
->getAccessInstruction() == Val
)
3881 bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
3882 for (const llvm::Argument
&Arg
: F
.args())
3883 if (&Arg
== maybeParam
)
3889 bool Scop::canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3890 bool MAInvalidCtxIsEmpty
,
3891 bool NonHoistableCtxIsEmpty
) {
3892 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3893 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3894 if (PollyAllowDereferenceOfAllFunctionParams
&&
3895 isAParameter(LInst
->getPointerOperand(), getFunction()))
3898 // TODO: We can provide more information for better but more expensive
3900 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3901 LInst
->getAlignment(), DL
))
3904 // If the location might be overwritten we do not hoist it unconditionally.
3906 // TODO: This is probably too conservative.
3907 if (!NonHoistableCtxIsEmpty
)
3910 // If a dereferenceable load is in a statement that is modeled precisely we
3912 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3915 // Even if the statement is not modeled precisely we can hoist the load if it
3916 // does not involve any parameters that might have been specialized by the
3917 // statement domain.
3918 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3919 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3924 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3928 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3929 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3931 // Get the context under which the statement is executed but remove the error
3932 // context under which this statement is reached.
3933 isl::set DomainCtx
= Stmt
.getDomain().params();
3934 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3936 if (isl_set_n_basic_set(DomainCtx
.get()) >= MaxDisjunctsInDomain
) {
3937 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3938 invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3942 // Project out all parameters that relate to loads in the statement. Otherwise
3943 // we could have cyclic dependences on the constraints under which the
3944 // hoisted loads are executed and we could not determine an order in which to
3945 // pre-load them. This happens because not only lower bounds are part of the
3946 // domain but also upper bounds.
3947 for (auto &InvMA
: InvMAs
) {
3948 auto *MA
= InvMA
.MA
;
3949 Instruction
*AccInst
= MA
->getAccessInstruction();
3950 if (SE
->isSCEVable(AccInst
->getType())) {
3951 SetVector
<Value
*> Values
;
3952 for (const SCEV
*Parameter
: Parameters
) {
3954 findValues(Parameter
, *SE
, Values
);
3955 if (!Values
.count(AccInst
))
3958 if (isl::id ParamId
= getIdForParam(Parameter
)) {
3959 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3961 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3967 for (auto &InvMA
: InvMAs
) {
3968 auto *MA
= InvMA
.MA
;
3969 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3971 // Check for another invariant access that accesses the same location as
3972 // MA and if found consolidate them. Otherwise create a new equivalence
3973 // class at the end of InvariantEquivClasses.
3974 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3975 Type
*Ty
= LInst
->getType();
3976 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3978 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3979 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3980 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3983 // Check if we know that this pointer can be speculatively accessed.
3984 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3985 NonHoistableCtxIsEmpty
)) {
3986 MACtx
= isl::set::universe(DomainCtx
.get_space());
3989 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3990 MACtx
= MACtx
.gist_params(getContext());
3993 bool Consolidated
= false;
3994 for (auto &IAClass
: InvariantEquivClasses
) {
3995 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3998 // If the pointer and the type is equal check if the access function wrt.
3999 // to the domain is equal too. It can happen that the domain fixes
4000 // parameter values and these can be different for distinct part of the
4001 // SCoP. If this happens we cannot consolidate the loads but need to
4002 // create a new invariant load equivalence class.
4003 auto &MAs
= IAClass
.InvariantAccesses
;
4005 auto *LastMA
= MAs
.front();
4007 isl::set AR
= MA
->getAccessRelation().range();
4008 isl::set LastAR
= LastMA
->getAccessRelation().range();
4009 bool SameAR
= AR
.is_equal(LastAR
);
4015 // Add MA to the list of accesses that are in this class.
4018 Consolidated
= true;
4020 // Unify the execution context of the class and this statement.
4021 isl::set IAClassDomainCtx
= isl::manage(IAClass
.ExecutionContext
);
4022 if (IAClassDomainCtx
)
4023 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
4025 IAClassDomainCtx
= MACtx
;
4026 IAClass
.ExecutionContext
= IAClassDomainCtx
.release();
4033 // If we did not consolidate MA, thus did not find an equivalence class
4034 // for it, we create a new one.
4035 InvariantEquivClasses
.emplace_back(InvariantEquivClassTy
{
4036 PointerSCEV
, MemoryAccessList
{MA
}, MACtx
.release(), Ty
});
4040 /// Check if an access range is too complex.
4042 /// An access range is too complex, if it contains either many disjuncts or
4043 /// very complex expressions. As a simple heuristic, we assume if a set to
4044 /// be too complex if the sum of existentially quantified dimensions and
4045 /// set dimensions is larger than a threshold. This reliably detects both
4046 /// sets with many disjuncts as well as sets with many divisions as they
4049 /// @param AccessRange The range to check for complexity.
4051 /// @returns True if the access range is too complex.
4052 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
4053 unsigned NumTotalDims
= 0;
4055 auto CountDimensions
= [&NumTotalDims
](isl::basic_set BSet
) -> isl::stat
{
4056 NumTotalDims
+= BSet
.dim(isl::dim::div
);
4057 NumTotalDims
+= BSet
.dim(isl::dim::set
);
4058 return isl::stat::ok
;
4061 AccessRange
.foreach_basic_set(CountDimensions
);
4063 if (NumTotalDims
> MaxDimensionsInAccessRange
)
4069 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
4070 // TODO: Loads that are not loop carried, hence are in a statement with
4071 // zero iterators, are by construction invariant, though we
4072 // currently "hoist" them anyway. This is necessary because we allow
4073 // them to be treated as parameters (e.g., in conditions) and our code
4074 // generation would otherwise use the old value.
4076 auto &Stmt
= *Access
->getStatement();
4077 BasicBlock
*BB
= Stmt
.getEntryBlock();
4079 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
4080 Access
->isMemoryIntrinsic())
4083 // Skip accesses that have an invariant base pointer which is defined but
4084 // not loaded inside the SCoP. This can happened e.g., if a readnone call
4085 // returns a pointer that is used as a base address. However, as we want
4086 // to hoist indirect pointers, we allow the base pointer to be defined in
4087 // the region if it is also a memory access. Each ScopArrayInfo object
4088 // that has a base pointer origin has a base pointer that is loaded and
4089 // that it is invariant, thus it will be hoisted too. However, if there is
4090 // no base pointer origin we check that the base pointer is defined
4091 // outside the region.
4092 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
4093 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
4096 isl::map AccessRelation
= give(Access
->getAccessRelation().release());
4097 assert(!AccessRelation
.is_empty());
4099 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
4102 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
4103 isl::set SafeToLoad
;
4105 auto &DL
= getFunction().getParent()->getDataLayout();
4106 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
4108 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
4109 } else if (BB
!= LI
->getParent()) {
4110 // Skip accesses in non-affine subregions as they might not be executed
4111 // under the same condition as the entry of the non-affine subregion.
4114 SafeToLoad
= AccessRelation
.range();
4117 if (isAccessRangeTooComplex(AccessRelation
.range()))
4120 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
4121 isl::set WrittenCtx
= Written
.params();
4122 bool IsWritten
= !WrittenCtx
.is_empty();
4127 WrittenCtx
= WrittenCtx
.remove_divs();
4129 isl_set_n_basic_set(WrittenCtx
.get()) >= MaxDisjunctsInDomain
;
4130 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
4133 addAssumption(INVARIANTLOAD
, WrittenCtx
.copy(), LI
->getDebugLoc(),
4134 AS_RESTRICTION
, LI
->getParent());
4138 void Scop::verifyInvariantLoads() {
4139 auto &RIL
= getRequiredInvariantLoads();
4140 for (LoadInst
*LI
: RIL
) {
4141 assert(LI
&& contains(LI
));
4142 // If there exists a statement in the scop which has a memory access for
4143 // @p LI, then mark this scop as infeasible for optimization.
4144 for (ScopStmt
&Stmt
: Stmts
)
4145 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
4146 invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
4152 void Scop::hoistInvariantLoads() {
4153 if (!PollyInvariantLoadHoisting
)
4156 isl::union_map Writes
= getWrites();
4157 for (ScopStmt
&Stmt
: *this) {
4158 InvariantAccessesTy InvariantAccesses
;
4160 for (MemoryAccess
*Access
: Stmt
)
4161 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
4162 InvariantAccesses
.push_back({Access
, NHCtx
});
4164 // Transfer the memory access from the statement to the SCoP.
4165 for (auto InvMA
: InvariantAccesses
)
4166 Stmt
.removeMemoryAccess(InvMA
.MA
);
4167 addInvariantLoads(Stmt
, InvariantAccesses
);
4171 /// Find the canonical scop array info object for a set of invariant load
4172 /// hoisted loads. The canonical array is the one that corresponds to the
4173 /// first load in the list of accesses which is used as base pointer of a
4175 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
4176 MemoryAccessList
&Accesses
) {
4177 for (MemoryAccess
*Access
: Accesses
) {
4178 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
4179 Access
->getAccessInstruction(), MemoryKind::Array
);
4181 return CanonicalArray
;
4186 /// Check if @p Array severs as base array in an invariant load.
4187 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
4188 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
4189 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
4190 if (Access2
->getScopArrayInfo() == Array
)
4195 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4196 /// with a reference to @p New.
4197 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
4198 const ScopArrayInfo
*New
) {
4199 for (ScopStmt
&Stmt
: *S
)
4200 for (MemoryAccess
*Access
: Stmt
) {
4201 if (Access
->getLatestScopArrayInfo() != Old
)
4204 isl::id Id
= New
->getBasePtrId();
4205 isl::map Map
= Access
->getAccessRelation();
4206 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
4207 Access
->setAccessRelation(Map
);
4211 void Scop::canonicalizeDynamicBasePtrs() {
4212 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
4213 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
4215 const ScopArrayInfo
*CanonicalBasePtrSAI
=
4216 findCanonicalArray(this, BasePtrAccesses
);
4218 if (!CanonicalBasePtrSAI
)
4221 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
4222 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
4223 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
4224 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
4225 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
4228 // we currently do not canonicalize arrays where some accesses are
4229 // hoisted as invariant loads. If we would, we need to update the access
4230 // function of the invariant loads as well. However, as this is not a
4231 // very common situation, we leave this for now to avoid further
4232 // complexity increases.
4233 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
4236 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
4241 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
4242 ArrayRef
<const SCEV
*> Sizes
,
4244 const char *BaseName
) {
4245 assert((BasePtr
|| BaseName
) &&
4246 "BasePtr and BaseName can not be nullptr at the same time.");
4247 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
4248 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
4249 : ScopArrayNameMap
[BaseName
];
4251 auto &DL
= getFunction().getParent()->getDataLayout();
4252 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
4253 DL
, this, BaseName
));
4254 ScopArrayInfoSet
.insert(SAI
.get());
4256 SAI
->updateElementType(ElementType
);
4257 // In case of mismatching array sizes, we bail out by setting the run-time
4258 // context to false.
4259 if (!SAI
->updateSizes(Sizes
))
4260 invalidate(DELINEARIZATION
, DebugLoc());
4265 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
4266 const std::string
&BaseName
,
4267 const std::vector
<unsigned> &Sizes
) {
4268 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
4269 std::vector
<const SCEV
*> SCEVSizes
;
4271 for (auto size
: Sizes
)
4273 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
4275 SCEVSizes
.push_back(nullptr);
4277 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
4278 MemoryKind::Array
, BaseName
.c_str());
4282 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
4284 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
4288 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
4289 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
4290 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
4294 std::string
Scop::getContextStr() const { return getContext().to_str(); }
4296 std::string
Scop::getAssumedContextStr() const {
4297 assert(AssumedContext
&& "Assumed context not yet built");
4298 return stringFromIslObj(AssumedContext
);
4301 std::string
Scop::getInvalidContextStr() const {
4302 return stringFromIslObj(InvalidContext
);
4305 std::string
Scop::getNameStr() const {
4306 std::string ExitName
, EntryName
;
4307 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4308 return EntryName
+ "---" + ExitName
;
4311 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4312 std::string ExitName
, EntryName
;
4313 raw_string_ostream
ExitStr(ExitName
);
4314 raw_string_ostream
EntryStr(EntryName
);
4316 R
.getEntry()->printAsOperand(EntryStr
, false);
4320 R
.getExit()->printAsOperand(ExitStr
, false);
4323 ExitName
= "FunctionExit";
4325 return std::make_pair(EntryName
, ExitName
);
4328 isl::set
Scop::getContext() const { return isl::manage(isl_set_copy(Context
)); }
4329 isl::space
Scop::getParamSpace() const { return getContext().get_space(); }
4331 isl::space
Scop::getFullParamSpace() const {
4332 std::vector
<isl::id
> FortranIDs
;
4333 FortranIDs
= getFortranArrayIds(arrays());
4335 isl::space Space
= isl::space::params_alloc(
4336 getIslCtx(), ParameterIds
.size() + FortranIDs
.size());
4339 for (const SCEV
*Parameter
: Parameters
) {
4340 isl::id Id
= getIdForParam(Parameter
);
4341 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4344 for (isl::id Id
: FortranIDs
)
4345 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4350 isl::set
Scop::getAssumedContext() const {
4351 assert(AssumedContext
&& "Assumed context not yet built");
4352 return isl::manage(isl_set_copy(AssumedContext
));
4355 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4356 if (PollyProcessUnprofitable
)
4362 unsigned OptimizableStmtsOrLoops
= 0;
4363 for (auto &Stmt
: *this) {
4364 if (Stmt
.getNumIterators() == 0)
4367 bool ContainsArrayAccs
= false;
4368 bool ContainsScalarAccs
= false;
4369 for (auto *MA
: Stmt
) {
4372 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4373 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4376 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4377 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4380 return OptimizableStmtsOrLoops
> 1;
4383 bool Scop::hasFeasibleRuntimeContext() const {
4384 auto *PositiveContext
= getAssumedContext().release();
4385 auto *NegativeContext
= getInvalidContext().release();
4387 addNonEmptyDomainConstraints(isl::manage(PositiveContext
)).release();
4388 bool IsFeasible
= !(isl_set_is_empty(PositiveContext
) ||
4389 isl_set_is_subset(PositiveContext
, NegativeContext
));
4390 isl_set_free(PositiveContext
);
4392 isl_set_free(NegativeContext
);
4396 auto *DomainContext
= isl_union_set_params(getDomains().release());
4397 IsFeasible
= !isl_set_is_subset(DomainContext
, NegativeContext
);
4398 IsFeasible
&= !isl_set_is_subset(Context
, NegativeContext
);
4399 isl_set_free(NegativeContext
);
4400 isl_set_free(DomainContext
);
4405 static std::string
toString(AssumptionKind Kind
) {
4408 return "No-aliasing";
4412 return "No-overflows";
4414 return "Signed-unsigned";
4416 return "Low complexity";
4418 return "Profitable";
4422 return "Finite loop";
4424 return "Invariant load";
4425 case DELINEARIZATION
:
4426 return "Delinearization";
4428 llvm_unreachable("Unknown AssumptionKind!");
4431 bool Scop::isEffectiveAssumption(__isl_keep isl_set
*Set
, AssumptionSign Sign
) {
4432 if (Sign
== AS_ASSUMPTION
) {
4433 if (isl_set_is_subset(Context
, Set
))
4436 if (isl_set_is_subset(AssumedContext
, Set
))
4439 if (isl_set_is_disjoint(Set
, Context
))
4442 if (isl_set_is_subset(Set
, InvalidContext
))
4448 bool Scop::trackAssumption(AssumptionKind Kind
, __isl_keep isl_set
*Set
,
4449 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4450 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4453 // Do never emit trivial assumptions as they only clutter the output.
4454 if (!PollyRemarksMinimal
) {
4455 isl_set
*Univ
= nullptr;
4456 if (Sign
== AS_ASSUMPTION
)
4457 Univ
= isl_set_universe(isl_set_get_space(Set
));
4459 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& isl_set_is_empty(Set
)) ||
4460 (Sign
== AS_ASSUMPTION
&& isl_set_is_equal(Univ
, Set
));
4469 AssumptionsAliasing
++;
4472 AssumptionsInbounds
++;
4475 AssumptionsWrapping
++;
4478 AssumptionsUnsigned
++;
4481 AssumptionsComplexity
++;
4484 AssumptionsUnprofitable
++;
4487 AssumptionsErrorBlock
++;
4490 AssumptionsInfiniteLoop
++;
4493 AssumptionsInvariantLoad
++;
4495 case DELINEARIZATION
:
4496 AssumptionsDelinearization
++;
4500 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4501 std::string Msg
= toString(Kind
) + Suffix
+ stringFromIslObj(Set
);
4503 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
, BB
)
4506 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
,
4512 void Scop::addAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4513 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4514 // Simplify the assumptions/restrictions first.
4515 Set
= isl_set_gist_params(Set
, getContext().release());
4517 if (!trackAssumption(Kind
, Set
, Loc
, Sign
, BB
)) {
4522 if (Sign
== AS_ASSUMPTION
) {
4523 AssumedContext
= isl_set_intersect(AssumedContext
, Set
);
4524 AssumedContext
= isl_set_coalesce(AssumedContext
);
4526 InvalidContext
= isl_set_union(InvalidContext
, Set
);
4527 InvalidContext
= isl_set_coalesce(InvalidContext
);
4531 void Scop::recordAssumption(AssumptionKind Kind
, __isl_take isl_set
*Set
,
4532 DebugLoc Loc
, AssumptionSign Sign
, BasicBlock
*BB
) {
4533 assert((isl_set_is_params(Set
) || BB
) &&
4534 "Assumptions without a basic block must be parameter sets");
4535 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4538 void Scop::addRecordedAssumptions() {
4539 while (!RecordedAssumptions
.empty()) {
4540 const Assumption
&AS
= RecordedAssumptions
.pop_back_val();
4543 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
, nullptr /* BasicBlock */);
4547 // If the domain was deleted the assumptions are void.
4548 isl_set
*Dom
= getDomainConditions(AS
.BB
).release();
4550 isl_set_free(AS
.Set
);
4554 // If a basic block was given use its domain to simplify the assumption.
4555 // In case of restrictions we know they only have to hold on the domain,
4556 // thus we can intersect them with the domain of the block. However, for
4557 // assumptions the domain has to imply them, thus:
4559 // Dom => S <==> A v B <==> A - B
4561 // To avoid the complement we will register A - B as a restriction not an
4563 isl_set
*S
= AS
.Set
;
4564 if (AS
.Sign
== AS_RESTRICTION
)
4565 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4566 else /* (AS.Sign == AS_ASSUMPTION) */
4567 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4569 addAssumption(AS
.Kind
, S
, AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
4573 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
, BasicBlock
*BB
) {
4574 addAssumption(Kind
, isl_set_empty(getParamSpace().release()), Loc
,
4578 isl::set
Scop::getInvalidContext() const {
4579 return isl::manage(isl_set_copy(InvalidContext
));
4582 void Scop::printContext(raw_ostream
&OS
) const {
4584 OS
.indent(4) << Context
<< "\n";
4586 OS
.indent(4) << "Assumed Context:\n";
4587 OS
.indent(4) << AssumedContext
<< "\n";
4589 OS
.indent(4) << "Invalid Context:\n";
4590 OS
.indent(4) << InvalidContext
<< "\n";
4593 for (const SCEV
*Parameter
: Parameters
)
4594 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4597 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4599 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4600 if (Pair
.second
.size() == 0)
4603 noOfGroups
+= Pair
.second
.size();
4606 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4607 if (MinMaxAliasGroups
.empty()) {
4608 OS
.indent(8) << "n/a\n";
4612 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4614 // If the group has no read only accesses print the write accesses.
4615 if (Pair
.second
.empty()) {
4616 OS
.indent(8) << "[[";
4617 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4618 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4624 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4625 OS
.indent(8) << "[[";
4626 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4627 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4628 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4636 void Scop::printStatements(raw_ostream
&OS
, bool PrintInstructions
) const {
4637 OS
<< "Statements {\n";
4639 for (const ScopStmt
&Stmt
: *this) {
4641 Stmt
.print(OS
, PrintInstructions
);
4644 OS
.indent(4) << "}\n";
4647 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4650 for (auto &Array
: arrays())
4653 OS
.indent(4) << "}\n";
4655 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4657 for (auto &Array
: arrays())
4658 Array
->print(OS
, /* SizeAsPwAff */ true);
4660 OS
.indent(4) << "}\n";
4663 void Scop::print(raw_ostream
&OS
, bool PrintInstructions
) const {
4664 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4665 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4666 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4667 OS
.indent(4) << "Invariant Accesses: {\n";
4668 for (const auto &IAClass
: InvariantEquivClasses
) {
4669 const auto &MAs
= IAClass
.InvariantAccesses
;
4671 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4673 MAs
.front()->print(OS
);
4674 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4678 OS
.indent(4) << "}\n";
4679 printContext(OS
.indent(4));
4680 printArrayInfo(OS
.indent(4));
4681 printAliasAssumptions(OS
);
4682 printStatements(OS
.indent(4), PrintInstructions
);
4685 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4686 LLVM_DUMP_METHOD
void Scop::dump() const { print(dbgs(), true); }
4689 isl_ctx
*Scop::getIslCtx() const { return IslCtx
.get(); }
4691 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4693 // First try to use the SCEVAffinator to generate a piecewise defined
4694 // affine function from @p E in the context of @p BB. If that tasks becomes to
4695 // complex the affinator might return a nullptr. In such a case we invalidate
4696 // the SCoP and return a dummy value. This way we do not need to add error
4697 // handling code to all users of this function.
4698 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4700 // TODO: We could use a heuristic and either use:
4701 // SCEVAffinator::takeNonNegativeAssumption
4703 // SCEVAffinator::interpretAsUnsigned
4704 // to deal with unsigned or "NonNegative" SCEVs.
4706 Affinator
.takeNonNegativeAssumption(PWAC
);
4710 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4711 invalidate(COMPLEXITY
, DL
, BB
);
4712 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4715 isl::union_set
Scop::getDomains() const {
4716 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx(), 0);
4717 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4719 for (const ScopStmt
&Stmt
: *this)
4720 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain().release());
4722 return isl::manage(Domain
);
4725 isl::pw_aff
Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4726 PWACtx PWAC
= getPwAff(E
, BB
);
4727 isl_set_free(PWAC
.second
);
4728 return isl::manage(PWAC
.first
);
4732 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4733 isl::union_map Accesses
= isl::union_map::empty(getParamSpace());
4735 for (ScopStmt
&Stmt
: *this) {
4736 for (MemoryAccess
*MA
: Stmt
) {
4737 if (!Predicate(*MA
))
4740 isl::set Domain
= Stmt
.getDomain();
4741 isl::map AccessDomain
= MA
->getAccessRelation();
4742 AccessDomain
= AccessDomain
.intersect_domain(Domain
);
4743 Accesses
= Accesses
.add_map(AccessDomain
);
4747 return Accesses
.coalesce();
4750 isl::union_map
Scop::getMustWrites() {
4751 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4754 isl::union_map
Scop::getMayWrites() {
4755 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4758 isl::union_map
Scop::getWrites() {
4759 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4762 isl::union_map
Scop::getReads() {
4763 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4766 isl::union_map
Scop::getAccesses() {
4767 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4770 isl::union_map
Scop::getAccesses(ScopArrayInfo
*Array
) {
4771 return getAccessesOfType(
4772 [Array
](MemoryAccess
&MA
) { return MA
.getScopArrayInfo() == Array
; });
4775 // Check whether @p Node is an extension node.
4777 // @return true if @p Node is an extension node.
4778 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4779 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4780 return isl_bool_error
;
4782 return isl_bool_true
;
4785 bool Scop::containsExtensionNode(__isl_keep isl_schedule
*Schedule
) {
4786 return isl_schedule_foreach_schedule_node_top_down(Schedule
, isNotExtNode
,
4787 nullptr) == isl_stat_error
;
4790 isl::union_map
Scop::getSchedule() const {
4791 auto *Tree
= getScheduleTree().release();
4792 if (containsExtensionNode(Tree
)) {
4793 isl_schedule_free(Tree
);
4796 auto *S
= isl_schedule_get_map(Tree
);
4797 isl_schedule_free(Tree
);
4798 return isl::manage(S
);
4801 isl::schedule
Scop::getScheduleTree() const {
4802 return isl::manage(isl_schedule_intersect_domain(isl_schedule_copy(Schedule
),
4803 getDomains().release()));
4806 void Scop::setSchedule(__isl_take isl_union_map
*NewSchedule
) {
4807 auto *S
= isl_schedule_from_domain(getDomains().release());
4808 S
= isl_schedule_insert_partial_schedule(
4809 S
, isl_multi_union_pw_aff_from_union_map(NewSchedule
));
4810 isl_schedule_free(Schedule
);
4814 void Scop::setScheduleTree(__isl_take isl_schedule
*NewSchedule
) {
4815 isl_schedule_free(Schedule
);
4816 Schedule
= NewSchedule
;
4819 bool Scop::restrictDomains(isl::union_set Domain
) {
4820 bool Changed
= false;
4821 for (ScopStmt
&Stmt
: *this) {
4822 isl::union_set StmtDomain
= isl::union_set(Stmt
.getDomain());
4823 isl::union_set NewStmtDomain
= StmtDomain
.intersect(Domain
);
4825 if (StmtDomain
.is_subset(NewStmtDomain
))
4830 NewStmtDomain
= NewStmtDomain
.coalesce();
4832 if (NewStmtDomain
.is_empty())
4833 Stmt
.restrictDomain(isl::set::empty(Stmt
.getDomainSpace()));
4835 Stmt
.restrictDomain(isl::set(NewStmtDomain
));
4840 ScalarEvolution
*Scop::getSE() const { return SE
; }
4842 // Create an isl_multi_union_aff that defines an identity mapping from the
4843 // elements of USet to their N-th dimension.
4847 // Domain: { A[i,j]; B[i,j,k] }
4850 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4852 // @param USet A union set describing the elements for which to generate a
4854 // @param N The dimension to map to.
4855 // @returns A mapping from USet to its N-th dimension.
4856 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4859 assert(!USet
.is_empty());
4861 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4863 auto Lambda
= [&Result
, N
](isl::set S
) -> isl::stat
{
4864 int Dim
= S
.dim(isl::dim::set
);
4865 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4868 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4870 Result
= Result
.add_pw_multi_aff(PMA
);
4871 return isl::stat::ok
;
4874 isl::stat Res
= USet
.foreach_set(Lambda
);
4877 assert(Res
== isl::stat::ok
);
4879 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4882 void Scop::addScopStmt(BasicBlock
*BB
, Loop
*SurroundingLoop
,
4883 std::vector
<Instruction
*> Instructions
) {
4884 assert(BB
&& "Unexpected nullptr!");
4885 Stmts
.emplace_back(*this, *BB
, SurroundingLoop
, Instructions
);
4886 auto *Stmt
= &Stmts
.back();
4887 StmtMap
[BB
].push_back(Stmt
);
4888 for (Instruction
*Inst
: Instructions
) {
4889 assert(!InstStmtMap
.count(Inst
) &&
4890 "Unexpected statement corresponding to the instruction.");
4891 InstStmtMap
[Inst
] = Stmt
;
4895 void Scop::addScopStmt(Region
*R
, Loop
*SurroundingLoop
) {
4896 assert(R
&& "Unexpected nullptr!");
4897 Stmts
.emplace_back(*this, *R
, SurroundingLoop
);
4898 auto *Stmt
= &Stmts
.back();
4899 for (BasicBlock
*BB
: R
->blocks()) {
4900 StmtMap
[BB
].push_back(Stmt
);
4901 for (Instruction
&Inst
: *BB
) {
4902 assert(!InstStmtMap
.count(&Inst
) &&
4903 "Unexpected statement corresponding to the instruction.");
4904 InstStmtMap
[&Inst
] = Stmt
;
4909 ScopStmt
*Scop::addScopStmt(isl::map SourceRel
, isl::map TargetRel
,
4912 isl::set SourceDomain
= SourceRel
.domain();
4913 isl::set TargetDomain
= TargetRel
.domain();
4914 assert(Domain
.is_subset(TargetDomain
) &&
4915 "Target access not defined for complete statement domain");
4916 assert(Domain
.is_subset(SourceDomain
) &&
4917 "Source access not defined for complete statement domain");
4919 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4921 return &(Stmts
.back());
4924 void Scop::buildSchedule(LoopInfo
&LI
) {
4925 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4926 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4927 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4928 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4929 Schedule
= LoopStack
[0].Schedule
;
4932 /// To generate a schedule for the elements in a Region we traverse the Region
4933 /// in reverse-post-order and add the contained RegionNodes in traversal order
4934 /// to the schedule of the loop that is currently at the top of the LoopStack.
4935 /// For loop-free codes, this results in a correct sequential ordering.
4946 /// Including loops requires additional processing. Whenever a loop header is
4947 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4948 /// from an empty schedule, we first process all RegionNodes that are within
4949 /// this loop and complete the sequential schedule at this loop-level before
4950 /// processing about any other nodes. To implement this
4951 /// loop-nodes-first-processing, the reverse post-order traversal is
4952 /// insufficient. Hence, we additionally check if the traversal yields
4953 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4954 /// These region-nodes are then queue and only traverse after the all nodes
4955 /// within the current loop have been processed.
4956 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4957 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4959 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4960 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4961 std::deque
<RegionNode
*> DelayList
;
4962 bool LastRNWaiting
= false;
4964 // Iterate over the region @p R in reverse post-order but queue
4965 // sub-regions/blocks iff they are not part of the last encountered but not
4966 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4967 // that we queued the last sub-region/block from the reverse post-order
4968 // iterator. If it is set we have to explore the next sub-region/block from
4969 // the iterator (if any) to guarantee progress. If it is not set we first try
4970 // the next queued sub-region/blocks.
4971 while (!WorkList
.empty() || !DelayList
.empty()) {
4974 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
4975 RN
= WorkList
.front();
4976 WorkList
.pop_front();
4977 LastRNWaiting
= false;
4979 RN
= DelayList
.front();
4980 DelayList
.pop_front();
4983 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4987 Loop
*LastLoop
= LoopStack
.back().L
;
4988 if (LastLoop
!= L
) {
4989 if (LastLoop
&& !LastLoop
->contains(L
)) {
4990 LastRNWaiting
= true;
4991 DelayList
.push_back(RN
);
4994 LoopStack
.push_back({L
, nullptr, 0});
4996 buildSchedule(RN
, LoopStack
, LI
);
5000 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
5001 if (RN
->isSubRegion()) {
5002 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
5003 if (!isNonAffineSubRegion(LocalRegion
)) {
5004 buildSchedule(LocalRegion
, LoopStack
, LI
);
5009 auto &LoopData
= LoopStack
.back();
5010 LoopData
.NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
5012 for (auto *Stmt
: getStmtListFor(RN
)) {
5013 auto *UDomain
= isl_union_set_from_set(Stmt
->getDomain().release());
5014 auto *StmtSchedule
= isl_schedule_from_domain(UDomain
);
5015 LoopData
.Schedule
= combineInSequence(LoopData
.Schedule
, StmtSchedule
);
5018 // Check if we just processed the last node in this loop. If we did, finalize
5021 // - adding new schedule dimensions
5022 // - folding the resulting schedule into the parent loop schedule
5023 // - dropping the loop schedule from the LoopStack.
5025 // Then continue to check surrounding loops, which might also have been
5026 // completed by this node.
5027 while (LoopData
.L
&&
5028 LoopData
.NumBlocksProcessed
== getNumBlocksInLoop(LoopData
.L
)) {
5029 auto *Schedule
= LoopData
.Schedule
;
5030 auto NumBlocksProcessed
= LoopData
.NumBlocksProcessed
;
5032 LoopStack
.pop_back();
5033 auto &NextLoopData
= LoopStack
.back();
5036 isl::union_set Domain
= give(isl_schedule_get_domain(Schedule
));
5037 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, LoopStack
.size());
5038 Schedule
= isl_schedule_insert_partial_schedule(Schedule
, MUPA
.release());
5039 NextLoopData
.Schedule
=
5040 combineInSequence(NextLoopData
.Schedule
, Schedule
);
5043 NextLoopData
.NumBlocksProcessed
+= NumBlocksProcessed
;
5044 LoopData
= NextLoopData
;
5048 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(BasicBlock
*BB
) const {
5049 auto StmtMapIt
= StmtMap
.find(BB
);
5050 if (StmtMapIt
== StmtMap
.end())
5052 assert(StmtMapIt
->second
.size() == 1 &&
5053 "Each statement corresponds to exactly one BB.");
5054 return StmtMapIt
->second
;
5057 ScopStmt
*Scop::getLastStmtFor(BasicBlock
*BB
) const {
5058 ArrayRef
<ScopStmt
*> StmtList
= getStmtListFor(BB
);
5059 if (!StmtList
.empty())
5060 return StmtList
.back();
5064 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(RegionNode
*RN
) const {
5065 if (RN
->isSubRegion())
5066 return getStmtListFor(RN
->getNodeAs
<Region
>());
5067 return getStmtListFor(RN
->getNodeAs
<BasicBlock
>());
5070 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(Region
*R
) const {
5071 return getStmtListFor(R
->getEntry());
5074 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
5075 if (!L
|| !R
.contains(L
))
5077 // outermostLoopInRegion always returns nullptr for top level regions
5078 if (R
.isTopLevelRegion()) {
5079 // LoopInfo's depths start at 1, we start at 0
5080 return L
->getLoopDepth() - 1;
5082 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
5084 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
5088 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
5089 for (auto &SAI
: arrays()) {
5090 if (SAI
->getName() == BaseName
)
5096 void Scop::addAccessData(MemoryAccess
*Access
) {
5097 const ScopArrayInfo
*SAI
= Access
->getOriginalScopArrayInfo();
5098 assert(SAI
&& "can only use after access relations have been constructed");
5100 if (Access
->isOriginalValueKind() && Access
->isRead())
5101 ValueUseAccs
[SAI
].push_back(Access
);
5102 else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite())
5103 PHIIncomingAccs
[SAI
].push_back(Access
);
5106 void Scop::removeAccessData(MemoryAccess
*Access
) {
5107 if (Access
->isOriginalValueKind() && Access
->isRead()) {
5108 auto &Uses
= ValueUseAccs
[Access
->getScopArrayInfo()];
5109 std::remove(Uses
.begin(), Uses
.end(), Access
);
5110 } else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite()) {
5111 auto &Incomings
= PHIIncomingAccs
[Access
->getScopArrayInfo()];
5112 std::remove(Incomings
.begin(), Incomings
.end(), Access
);
5116 MemoryAccess
*Scop::getValueDef(const ScopArrayInfo
*SAI
) const {
5117 assert(SAI
->isValueKind());
5119 Instruction
*Val
= dyn_cast
<Instruction
>(SAI
->getBasePtr());
5123 ScopStmt
*Stmt
= getStmtFor(Val
);
5127 return Stmt
->lookupValueWriteOf(Val
);
5130 ArrayRef
<MemoryAccess
*> Scop::getValueUses(const ScopArrayInfo
*SAI
) const {
5131 assert(SAI
->isValueKind());
5132 auto It
= ValueUseAccs
.find(SAI
);
5133 if (It
== ValueUseAccs
.end())
5138 MemoryAccess
*Scop::getPHIRead(const ScopArrayInfo
*SAI
) const {
5139 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
5141 if (SAI
->isExitPHIKind())
5144 PHINode
*PHI
= cast
<PHINode
>(SAI
->getBasePtr());
5145 ScopStmt
*Stmt
= getStmtFor(PHI
);
5146 assert(Stmt
&& "PHINode must be within the SCoP");
5148 return Stmt
->lookupPHIReadOf(PHI
);
5151 ArrayRef
<MemoryAccess
*> Scop::getPHIIncomings(const ScopArrayInfo
*SAI
) const {
5152 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
5153 auto It
= PHIIncomingAccs
.find(SAI
);
5154 if (It
== PHIIncomingAccs
.end())
5159 bool Scop::isEscaping(Instruction
*Inst
) {
5160 assert(contains(Inst
) && "The concept of escaping makes only sense for "
5161 "values defined inside the SCoP");
5163 for (Use
&Use
: Inst
->uses()) {
5164 BasicBlock
*UserBB
= getUseBlock(Use
);
5165 if (!contains(UserBB
))
5168 // When the SCoP region exit needs to be simplified, PHIs in the region exit
5169 // move to a new basic block such that its incoming blocks are not in the
5171 if (hasSingleExitEdge() && isa
<PHINode
>(Use
.getUser()) &&
5172 isExit(cast
<PHINode
>(Use
.getUser())->getParent()))
5178 Scop::ScopStatistics
Scop::getStatistics() const {
5179 ScopStatistics Result
;
5180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5181 auto LoopStat
= ScopDetection::countBeneficialLoops(&R
, *SE
, *getLI(), 0);
5183 int NumTotalLoops
= LoopStat
.NumLoops
;
5184 Result
.NumBoxedLoops
= getBoxedLoops().size();
5185 Result
.NumAffineLoops
= NumTotalLoops
- Result
.NumBoxedLoops
;
5187 for (const ScopStmt
&Stmt
: *this) {
5188 isl::set Domain
= Stmt
.getDomain().intersect_params(getContext());
5189 bool IsInLoop
= Stmt
.getNumIterators() >= 1;
5190 for (MemoryAccess
*MA
: Stmt
) {
5194 if (MA
->isLatestValueKind()) {
5195 Result
.NumValueWrites
+= 1;
5197 Result
.NumValueWritesInLoops
+= 1;
5200 if (MA
->isLatestAnyPHIKind()) {
5201 Result
.NumPHIWrites
+= 1;
5203 Result
.NumPHIWritesInLoops
+= 1;
5207 MA
->getAccessRelation().intersect_domain(Domain
).range();
5208 if (AccSet
.is_singleton()) {
5209 Result
.NumSingletonWrites
+= 1;
5211 Result
.NumSingletonWritesInLoops
+= 1;
5219 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const Scop
&scop
) {
5220 scop
.print(OS
, PollyPrintInstructions
);
5224 //===----------------------------------------------------------------------===//
5225 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5226 AU
.addRequired
<LoopInfoWrapperPass
>();
5227 AU
.addRequired
<RegionInfoPass
>();
5228 AU
.addRequired
<DominatorTreeWrapperPass
>();
5229 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5230 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5231 AU
.addRequired
<AAResultsWrapperPass
>();
5232 AU
.addRequired
<AssumptionCacheTracker
>();
5233 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5234 AU
.setPreservesAll();
5237 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
,
5238 Scop::ScopStatistics ScopStats
) {
5239 assert(Stats
.NumLoops
== ScopStats
.NumAffineLoops
+ ScopStats
.NumBoxedLoops
);
5242 NumLoopsInScop
+= Stats
.NumLoops
;
5244 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
5246 if (Stats
.MaxDepth
== 1)
5248 else if (Stats
.MaxDepth
== 2)
5250 else if (Stats
.MaxDepth
== 3)
5251 NumScopsDepthThree
++;
5252 else if (Stats
.MaxDepth
== 4)
5253 NumScopsDepthFour
++;
5254 else if (Stats
.MaxDepth
== 5)
5255 NumScopsDepthFive
++;
5257 NumScopsDepthLarger
++;
5259 NumAffineLoops
+= ScopStats
.NumAffineLoops
;
5260 NumBoxedLoops
+= ScopStats
.NumBoxedLoops
;
5262 NumValueWrites
+= ScopStats
.NumValueWrites
;
5263 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
5264 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
5265 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
5266 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
5267 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
5270 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
5271 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5273 if (!SD
.isMaxRegionInScop(*R
))
5276 Function
*F
= R
->getEntry()->getParent();
5277 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5278 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5279 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5280 auto const &DL
= F
->getParent()->getDataLayout();
5281 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5282 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
5283 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5285 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5286 S
= SB
.getScop(); // take ownership of scop object
5288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5290 ScopDetection::LoopStats Stats
=
5291 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5292 updateLoopCountStatistic(Stats
, S
->getStatistics());
5299 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
5301 S
->print(OS
, PollyPrintInstructions
);
5303 OS
<< "Invalid Scop!\n";
5306 char ScopInfoRegionPass::ID
= 0;
5308 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5310 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
5311 "Polly - Create polyhedral description of Scops", false,
5313 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5314 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5315 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5316 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5317 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5318 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5319 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5320 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
5321 "Polly - Create polyhedral description of Scops", false,
5324 //===----------------------------------------------------------------------===//
5325 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
5326 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
5327 AssumptionCache
&AC
, OptimizationRemarkEmitter
&ORE
)
5328 : DL(DL
), SD(SD
), SE(SE
), LI(LI
), AA(AA
), DT(DT
), AC(AC
), ORE(ORE
) {
5332 void ScopInfo::recompute() {
5333 RegionToScopMap
.clear();
5334 /// Create polyhedral description of scops for all the valid regions of a
5336 for (auto &It
: SD
) {
5337 Region
*R
= const_cast<Region
*>(It
);
5338 if (!SD
.isMaxRegionInScop(*R
))
5341 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5342 std::unique_ptr
<Scop
> S
= SB
.getScop();
5345 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5346 ScopDetection::LoopStats Stats
=
5347 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5348 updateLoopCountStatistic(Stats
, S
->getStatistics());
5350 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5351 assert(Inserted
&& "Building Scop for the same region twice!");
5356 bool ScopInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
5357 FunctionAnalysisManager::Invalidator
&Inv
) {
5358 // Check whether the analysis, all analyses on functions have been preserved
5359 // or anything we're holding references to is being invalidated
5360 auto PAC
= PA
.getChecker
<ScopInfoAnalysis
>();
5361 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>()) ||
5362 Inv
.invalidate
<ScopAnalysis
>(F
, PA
) ||
5363 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
5364 Inv
.invalidate
<LoopAnalysis
>(F
, PA
) ||
5365 Inv
.invalidate
<AAManager
>(F
, PA
) ||
5366 Inv
.invalidate
<DominatorTreeAnalysis
>(F
, PA
) ||
5367 Inv
.invalidate
<AssumptionAnalysis
>(F
, PA
);
5370 AnalysisKey
ScopInfoAnalysis::Key
;
5372 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5373 FunctionAnalysisManager
&FAM
) {
5374 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5375 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5376 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5377 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5378 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5379 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5380 auto &DL
= F
.getParent()->getDataLayout();
5381 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
5382 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
};
5385 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5386 FunctionAnalysisManager
&FAM
) {
5387 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5388 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5389 // order here to keep the output persistent
5390 for (auto &It
: reverse(SI
)) {
5392 It
.second
->print(Stream
, PollyPrintInstructions
);
5394 Stream
<< "Invalid Scop!\n";
5396 return PreservedAnalyses::all();
5399 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5400 AU
.addRequired
<LoopInfoWrapperPass
>();
5401 AU
.addRequired
<RegionInfoPass
>();
5402 AU
.addRequired
<DominatorTreeWrapperPass
>();
5403 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5404 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5405 AU
.addRequired
<AAResultsWrapperPass
>();
5406 AU
.addRequired
<AssumptionCacheTracker
>();
5407 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5408 AU
.setPreservesAll();
5411 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5412 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5413 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5414 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5415 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5416 auto const &DL
= F
.getParent()->getDataLayout();
5417 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5418 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5419 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5421 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
});
5425 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5426 for (auto &It
: *Result
) {
5428 It
.second
->print(OS
, PollyPrintInstructions
);
5430 OS
<< "Invalid Scop!\n";
5434 char ScopInfoWrapperPass::ID
= 0;
5436 Pass
*polly::createScopInfoWrapperPassPass() {
5437 return new ScopInfoWrapperPass();
5440 INITIALIZE_PASS_BEGIN(
5441 ScopInfoWrapperPass
, "polly-function-scops",
5442 "Polly - Create polyhedral description of all Scops of a function", false,
5444 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5445 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5446 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5447 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5448 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5449 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5450 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5451 INITIALIZE_PASS_END(
5452 ScopInfoWrapperPass
, "polly-function-scops",
5453 "Polly - Create polyhedral description of all Scops of a function", false,