1 //===- ScopInfo.cpp -------------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
106 using namespace llvm
;
107 using namespace polly
;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing
, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds
, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping
, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned
, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity
, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable
, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock
, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop
, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad
,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization
,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops
, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop
, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops
, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops
, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthOne
, "Number of scops with maximal loop depth 1");
130 STATISTIC(NumScopsDepthTwo
, "Number of scops with maximal loop depth 2");
131 STATISTIC(NumScopsDepthThree
, "Number of scops with maximal loop depth 3");
132 STATISTIC(NumScopsDepthFour
, "Number of scops with maximal loop depth 4");
133 STATISTIC(NumScopsDepthFive
, "Number of scops with maximal loop depth 5");
134 STATISTIC(NumScopsDepthLarger
,
135 "Number of scops with maximal loop depth 6 and larger");
136 STATISTIC(MaxNumLoopsInScop
, "Maximal number of loops in scops");
138 STATISTIC(NumValueWrites
, "Number of scalar value writes after ScopInfo");
140 NumValueWritesInLoops
,
141 "Number of scalar value writes nested in affine loops after ScopInfo");
142 STATISTIC(NumPHIWrites
, "Number of scalar phi writes after ScopInfo");
143 STATISTIC(NumPHIWritesInLoops
,
144 "Number of scalar phi writes nested in affine loops after ScopInfo");
145 STATISTIC(NumSingletonWrites
, "Number of singleton writes after ScopInfo");
146 STATISTIC(NumSingletonWritesInLoops
,
147 "Number of singleton writes nested in affine loops after ScopInfo");
149 // The maximal number of basic sets we allow during domain construction to
150 // be created. More complex scops will result in very high compile time and
151 // are also unlikely to result in good code
152 static int const MaxDisjunctsInDomain
= 20;
154 // The number of disjunct in the context after which we stop to add more
155 // disjuncts. This parameter is there to avoid exponential growth in the
156 // number of disjunct when adding non-convex sets to the context.
157 static int const MaxDisjunctsInContext
= 4;
159 // The maximal number of dimensions we allow during invariant load construction.
160 // More complex access ranges will result in very high compile time and are also
161 // unlikely to result in good code. This value is very high and should only
162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
163 static int const MaxDimensionsInAccessRange
= 9;
166 OptComputeOut("polly-analysis-computeout",
167 cl::desc("Bound the scop analysis by a maximal amount of "
168 "computational steps (0 means no bound)"),
169 cl::Hidden
, cl::init(800000), cl::ZeroOrMore
,
170 cl::cat(PollyCategory
));
172 static cl::opt
<bool> PollyRemarksMinimal(
173 "polly-remarks-minimal",
174 cl::desc("Do not emit remarks about assumptions that are known"),
175 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
177 static cl::opt
<int> RunTimeChecksMaxAccessDisjuncts(
178 "polly-rtc-max-array-disjuncts",
179 cl::desc("The maximal number of disjunts allowed in memory accesses to "
181 cl::Hidden
, cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
183 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
184 "polly-rtc-max-parameters",
185 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
186 cl::ZeroOrMore
, cl::init(8), cl::cat(PollyCategory
));
188 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
189 "polly-rtc-max-arrays-per-group",
190 cl::desc("The maximal number of arrays to compare in each alias group."),
191 cl::Hidden
, cl::ZeroOrMore
, cl::init(20), cl::cat(PollyCategory
));
193 static cl::opt
<std::string
> UserContextStr(
194 "polly-context", cl::value_desc("isl parameter set"),
195 cl::desc("Provide additional constraints on the context parameters"),
196 cl::init(""), cl::cat(PollyCategory
));
199 IslOnErrorAbort("polly-on-isl-error-abort",
200 cl::desc("Abort if an isl error is encountered"),
201 cl::init(true), cl::cat(PollyCategory
));
203 static cl::opt
<bool> PollyPreciseInbounds(
204 "polly-precise-inbounds",
205 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
206 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
209 PollyIgnoreInbounds("polly-ignore-inbounds",
210 cl::desc("Do not take inbounds assumptions at all"),
211 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
213 static cl::opt
<bool> PollyIgnoreParamBounds(
214 "polly-ignore-parameter-bounds",
216 "Do not add parameter bounds and do no gist simplify sets accordingly"),
217 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
219 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
220 "polly-allow-dereference-of-all-function-parameters",
222 "Treat all parameters to functions that are pointers as dereferencible."
223 " This is useful for invariant load hoisting, since we can generate"
224 " less runtime checks. This is only valid if all pointers to functions"
225 " are always initialized, so that Polly can choose to hoist"
227 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
229 static cl::opt
<bool> PollyPreciseFoldAccesses(
230 "polly-precise-fold-accesses",
231 cl::desc("Fold memory accesses to model more possible delinearizations "
232 "(does not scale well)"),
233 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
235 bool polly::UseInstructionNames
;
237 static cl::opt
<bool, true> XUseInstructionNames(
238 "polly-use-llvm-names",
239 cl::desc("Use LLVM-IR names when deriving statement names"),
240 cl::location(UseInstructionNames
), cl::Hidden
, cl::init(false),
241 cl::ZeroOrMore
, cl::cat(PollyCategory
));
243 static cl::opt
<bool> PollyPrintInstructions(
244 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
245 cl::Hidden
, cl::Optional
, cl::init(false), cl::cat(PollyCategory
));
247 //===----------------------------------------------------------------------===//
249 // Create a sequence of two schedules. Either argument may be null and is
250 // interpreted as the empty schedule. Can also return null if both schedules are
252 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
258 return Prev
.sequence(Succ
);
261 static isl::set
addRangeBoundsToSet(isl::set S
, const ConstantRange
&Range
,
262 int dim
, isl::dim type
) {
264 isl::ctx Ctx
= S
.get_ctx();
266 // The upper and lower bound for a parameter value is derived either from
267 // the data type of the parameter or from the - possibly more restrictive -
269 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMin(), true);
270 S
= S
.lower_bound_val(type
, dim
, V
);
271 V
= valFromAPInt(Ctx
.get(), Range
.getSignedMax(), true);
272 S
= S
.upper_bound_val(type
, dim
, V
);
274 if (Range
.isFullSet())
277 if (isl_set_n_basic_set(S
.get()) > MaxDisjunctsInContext
)
280 // In case of signed wrapping, we can refine the set of valid values by
281 // excluding the part not covered by the wrapping range.
282 if (Range
.isSignWrappedSet()) {
283 V
= valFromAPInt(Ctx
.get(), Range
.getLower(), true);
284 isl::set SLB
= S
.lower_bound_val(type
, dim
, V
);
286 V
= valFromAPInt(Ctx
.get(), Range
.getUpper(), true);
288 isl::set SUB
= S
.upper_bound_val(type
, dim
, V
);
295 static const ScopArrayInfo
*identifyBasePtrOriginSAI(Scop
*S
, Value
*BasePtr
) {
296 LoadInst
*BasePtrLI
= dyn_cast
<LoadInst
>(BasePtr
);
300 if (!S
->contains(BasePtrLI
))
303 ScalarEvolution
&SE
= *S
->getSE();
305 auto *OriginBaseSCEV
=
306 SE
.getPointerBase(SE
.getSCEV(BasePtrLI
->getPointerOperand()));
310 auto *OriginBaseSCEVUnknown
= dyn_cast
<SCEVUnknown
>(OriginBaseSCEV
);
311 if (!OriginBaseSCEVUnknown
)
314 return S
->getScopArrayInfo(OriginBaseSCEVUnknown
->getValue(),
318 ScopArrayInfo::ScopArrayInfo(Value
*BasePtr
, Type
*ElementType
, isl::ctx Ctx
,
319 ArrayRef
<const SCEV
*> Sizes
, MemoryKind Kind
,
320 const DataLayout
&DL
, Scop
*S
,
321 const char *BaseName
)
322 : BasePtr(BasePtr
), ElementType(ElementType
), Kind(Kind
), DL(DL
), S(*S
) {
323 std::string BasePtrName
=
325 : getIslCompatibleName("MemRef", BasePtr
, S
->getNextArrayIdx(),
326 Kind
== MemoryKind::PHI
? "__phi" : "",
327 UseInstructionNames
);
328 Id
= isl::id::alloc(Ctx
, BasePtrName
, this);
332 if (!BasePtr
|| Kind
!= MemoryKind::Array
) {
333 BasePtrOriginSAI
= nullptr;
337 BasePtrOriginSAI
= identifyBasePtrOriginSAI(S
, BasePtr
);
338 if (BasePtrOriginSAI
)
339 const_cast<ScopArrayInfo
*>(BasePtrOriginSAI
)->addDerivedSAI(this);
342 ScopArrayInfo::~ScopArrayInfo() = default;
344 isl::space
ScopArrayInfo::getSpace() const {
345 auto Space
= isl::space(Id
.get_ctx(), 0, getNumberOfDimensions());
346 Space
= Space
.set_tuple_id(isl::dim::set
, Id
);
350 bool ScopArrayInfo::isReadOnly() {
351 isl::union_set WriteSet
= S
.getWrites().range();
352 isl::space Space
= getSpace();
353 WriteSet
= WriteSet
.extract_set(Space
);
355 return bool(WriteSet
.is_empty());
358 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo
*Array
) const {
359 if (Array
->getElementType() != getElementType())
362 if (Array
->getNumberOfDimensions() != getNumberOfDimensions())
365 for (unsigned i
= 0; i
< getNumberOfDimensions(); i
++)
366 if (Array
->getDimensionSize(i
) != getDimensionSize(i
))
372 void ScopArrayInfo::updateElementType(Type
*NewElementType
) {
373 if (NewElementType
== ElementType
)
376 auto OldElementSize
= DL
.getTypeAllocSizeInBits(ElementType
);
377 auto NewElementSize
= DL
.getTypeAllocSizeInBits(NewElementType
);
379 if (NewElementSize
== OldElementSize
|| NewElementSize
== 0)
382 if (NewElementSize
% OldElementSize
== 0 && NewElementSize
< OldElementSize
) {
383 ElementType
= NewElementType
;
385 auto GCD
= GreatestCommonDivisor64(NewElementSize
, OldElementSize
);
386 ElementType
= IntegerType::get(ElementType
->getContext(), GCD
);
390 /// Make the ScopArrayInfo model a Fortran Array
391 void ScopArrayInfo::applyAndSetFAD(Value
*FAD
) {
392 assert(FAD
&& "got invalid Fortran array descriptor");
394 assert(this->FAD
== FAD
&&
395 "receiving different array descriptors for same array");
399 assert(DimensionSizesPw
.size() > 0 && !DimensionSizesPw
[0]);
403 isl::space
Space(S
.getIslCtx(), 1, 0);
405 std::string param_name
= getName();
406 param_name
+= "_fortranarr_size";
407 isl::id IdPwAff
= isl::id::alloc(S
.getIslCtx(), param_name
, this);
409 Space
= Space
.set_dim_id(isl::dim::param
, 0, IdPwAff
);
411 isl::aff::var_on_domain(isl::local_space(Space
), isl::dim::param
, 0);
413 DimensionSizesPw
[0] = PwAff
;
416 bool ScopArrayInfo::updateSizes(ArrayRef
<const SCEV
*> NewSizes
,
417 bool CheckConsistency
) {
418 int SharedDims
= std::min(NewSizes
.size(), DimensionSizes
.size());
419 int ExtraDimsNew
= NewSizes
.size() - SharedDims
;
420 int ExtraDimsOld
= DimensionSizes
.size() - SharedDims
;
422 if (CheckConsistency
) {
423 for (int i
= 0; i
< SharedDims
; i
++) {
424 auto *NewSize
= NewSizes
[i
+ ExtraDimsNew
];
425 auto *KnownSize
= DimensionSizes
[i
+ ExtraDimsOld
];
426 if (NewSize
&& KnownSize
&& NewSize
!= KnownSize
)
430 if (DimensionSizes
.size() >= NewSizes
.size())
434 DimensionSizes
.clear();
435 DimensionSizes
.insert(DimensionSizes
.begin(), NewSizes
.begin(),
437 DimensionSizesPw
.clear();
438 for (const SCEV
*Expr
: DimensionSizes
) {
440 DimensionSizesPw
.push_back(nullptr);
443 isl::pw_aff Size
= S
.getPwAffOnly(Expr
);
444 DimensionSizesPw
.push_back(Size
);
449 std::string
ScopArrayInfo::getName() const { return Id
.get_name(); }
451 int ScopArrayInfo::getElemSizeInBytes() const {
452 return DL
.getTypeAllocSize(ElementType
);
455 isl::id
ScopArrayInfo::getBasePtrId() const { return Id
; }
457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
458 LLVM_DUMP_METHOD
void ScopArrayInfo::dump() const { print(errs()); }
461 void ScopArrayInfo::print(raw_ostream
&OS
, bool SizeAsPwAff
) const {
462 OS
.indent(8) << *getElementType() << " " << getName();
464 // If this is a Fortran array, then we can print the outermost dimension
465 // as a isl_pw_aff even though there is no SCEV information.
466 bool IsOutermostSizeKnown
= SizeAsPwAff
&& FAD
;
468 if (!IsOutermostSizeKnown
&& getNumberOfDimensions() > 0 &&
469 !getDimensionSize(0)) {
473 for (; u
< getNumberOfDimensions(); u
++) {
477 isl::pw_aff Size
= getDimensionSizePw(u
);
478 OS
<< " " << Size
<< " ";
480 OS
<< *getDimensionSize(u
);
488 if (BasePtrOriginSAI
)
489 OS
<< " [BasePtrOrigin: " << BasePtrOriginSAI
->getName() << "]";
491 OS
<< " // Element size " << getElemSizeInBytes() << "\n";
494 const ScopArrayInfo
*
495 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA
) {
496 isl::id Id
= PMA
.get_tuple_id(isl::dim::out
);
497 assert(!Id
.is_null() && "Output dimension didn't have an ID");
498 return getFromId(Id
);
501 const ScopArrayInfo
*ScopArrayInfo::getFromId(isl::id Id
) {
502 void *User
= Id
.get_user();
503 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
507 void MemoryAccess::wrapConstantDimensions() {
508 auto *SAI
= getScopArrayInfo();
509 isl::space ArraySpace
= SAI
->getSpace();
510 isl::ctx Ctx
= ArraySpace
.get_ctx();
511 unsigned DimsArray
= SAI
->getNumberOfDimensions();
513 isl::multi_aff DivModAff
= isl::multi_aff::identity(
514 ArraySpace
.map_from_domain_and_range(ArraySpace
));
515 isl::local_space LArraySpace
= isl::local_space(ArraySpace
);
517 // Begin with last dimension, to iteratively carry into higher dimensions.
518 for (int i
= DimsArray
- 1; i
> 0; i
--) {
519 auto *DimSize
= SAI
->getDimensionSize(i
);
520 auto *DimSizeCst
= dyn_cast
<SCEVConstant
>(DimSize
);
522 // This transformation is not applicable to dimensions with dynamic size.
526 // This transformation is not applicable to dimensions of size zero.
527 if (DimSize
->isZero())
530 isl::val DimSizeVal
=
531 valFromAPInt(Ctx
.get(), DimSizeCst
->getAPInt(), false);
532 isl::aff Var
= isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
);
534 isl::aff::var_on_domain(LArraySpace
, isl::dim::set
, i
- 1);
536 // Compute: index % size
537 // Modulo must apply in the divide of the previous iteration, if any.
538 isl::aff Modulo
= Var
.mod(DimSizeVal
);
539 Modulo
= Modulo
.pullback(DivModAff
);
541 // Compute: floor(index / size)
542 isl::aff Divide
= Var
.div(isl::aff(LArraySpace
, DimSizeVal
));
543 Divide
= Divide
.floor();
544 Divide
= Divide
.add(PrevVar
);
545 Divide
= Divide
.pullback(DivModAff
);
547 // Apply Modulo and Divide.
548 DivModAff
= DivModAff
.set_aff(i
, Modulo
);
549 DivModAff
= DivModAff
.set_aff(i
- 1, Divide
);
552 // Apply all modulo/divides on the accesses.
553 isl::map Relation
= AccessRelation
;
554 Relation
= Relation
.apply_range(isl::map::from_multi_aff(DivModAff
));
555 Relation
= Relation
.detect_equalities();
556 AccessRelation
= Relation
;
559 void MemoryAccess::updateDimensionality() {
560 auto *SAI
= getScopArrayInfo();
561 isl::space ArraySpace
= SAI
->getSpace();
562 isl::space AccessSpace
= AccessRelation
.get_space().range();
563 isl::ctx Ctx
= ArraySpace
.get_ctx();
565 auto DimsArray
= ArraySpace
.dim(isl::dim::set
);
566 auto DimsAccess
= AccessSpace
.dim(isl::dim::set
);
567 auto DimsMissing
= DimsArray
- DimsAccess
;
569 auto *BB
= getStatement()->getEntryBlock();
570 auto &DL
= BB
->getModule()->getDataLayout();
571 unsigned ArrayElemSize
= SAI
->getElemSizeInBytes();
572 unsigned ElemBytes
= DL
.getTypeAllocSize(getElementType());
574 isl::map Map
= isl::map::from_domain_and_range(
575 isl::set::universe(AccessSpace
), isl::set::universe(ArraySpace
));
577 for (unsigned i
= 0; i
< DimsMissing
; i
++)
578 Map
= Map
.fix_si(isl::dim::out
, i
, 0);
580 for (unsigned i
= DimsMissing
; i
< DimsArray
; i
++)
581 Map
= Map
.equate(isl::dim::in
, i
- DimsMissing
, isl::dim::out
, i
);
583 AccessRelation
= AccessRelation
.apply_range(Map
);
585 // For the non delinearized arrays, divide the access function of the last
586 // subscript by the size of the elements in the array.
588 // A stride one array access in C expressed as A[i] is expressed in
589 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
590 // two subsequent values of 'i' index two values that are stored next to
591 // each other in memory. By this division we make this characteristic
592 // obvious again. If the base pointer was accessed with offsets not divisible
593 // by the accesses element size, we will have chosen a smaller ArrayElemSize
594 // that divides the offsets of all accesses to this base pointer.
595 if (DimsAccess
== 1) {
596 isl::val V
= isl::val(Ctx
, ArrayElemSize
);
597 AccessRelation
= AccessRelation
.floordiv_val(V
);
600 // We currently do this only if we added at least one dimension, which means
601 // some dimension's indices have not been specified, an indicator that some
602 // index values have been added together.
603 // TODO: Investigate general usefulness; Effect on unit tests is to make index
604 // expressions more complicated.
606 wrapConstantDimensions();
609 computeBoundsOnAccessRelation(ArrayElemSize
);
611 // Introduce multi-element accesses in case the type loaded by this memory
612 // access is larger than the canonical element type of the array.
614 // An access ((float *)A)[i] to an array char *A is modeled as
615 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
616 if (ElemBytes
> ArrayElemSize
) {
617 assert(ElemBytes
% ArrayElemSize
== 0 &&
618 "Loaded element size should be multiple of canonical element size");
619 isl::map Map
= isl::map::from_domain_and_range(
620 isl::set::universe(ArraySpace
), isl::set::universe(ArraySpace
));
621 for (unsigned i
= 0; i
< DimsArray
- 1; i
++)
622 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
627 LS
= isl::local_space(Map
.get_space());
628 int Num
= ElemBytes
/ getScopArrayInfo()->getElemSizeInBytes();
630 C
= isl::constraint::alloc_inequality(LS
);
631 C
= C
.set_constant_val(isl::val(Ctx
, Num
- 1));
632 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, 1);
633 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, -1);
634 Map
= Map
.add_constraint(C
);
636 C
= isl::constraint::alloc_inequality(LS
);
637 C
= C
.set_coefficient_si(isl::dim::in
, DimsArray
- 1, -1);
638 C
= C
.set_coefficient_si(isl::dim::out
, DimsArray
- 1, 1);
639 C
= C
.set_constant_val(isl::val(Ctx
, 0));
640 Map
= Map
.add_constraint(C
);
641 AccessRelation
= AccessRelation
.apply_range(Map
);
646 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT
) {
648 case MemoryAccess::RT_NONE
:
649 llvm_unreachable("Requested a reduction operator string for a memory "
650 "access which isn't a reduction");
651 case MemoryAccess::RT_ADD
:
653 case MemoryAccess::RT_MUL
:
655 case MemoryAccess::RT_BOR
:
657 case MemoryAccess::RT_BXOR
:
659 case MemoryAccess::RT_BAND
:
662 llvm_unreachable("Unknown reduction type");
665 const ScopArrayInfo
*MemoryAccess::getOriginalScopArrayInfo() const {
666 isl::id ArrayId
= getArrayId();
667 void *User
= ArrayId
.get_user();
668 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
672 const ScopArrayInfo
*MemoryAccess::getLatestScopArrayInfo() const {
673 isl::id ArrayId
= getLatestArrayId();
674 void *User
= ArrayId
.get_user();
675 const ScopArrayInfo
*SAI
= static_cast<ScopArrayInfo
*>(User
);
679 isl::id
MemoryAccess::getOriginalArrayId() const {
680 return AccessRelation
.get_tuple_id(isl::dim::out
);
683 isl::id
MemoryAccess::getLatestArrayId() const {
684 if (!hasNewAccessRelation())
685 return getOriginalArrayId();
686 return NewAccessRelation
.get_tuple_id(isl::dim::out
);
689 isl::map
MemoryAccess::getAddressFunction() const {
690 return getAccessRelation().lexmin();
694 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule
) const {
695 isl::map Schedule
, ScheduledAccRel
;
696 isl::union_set UDomain
;
698 UDomain
= getStatement()->getDomain();
699 USchedule
= USchedule
.intersect_domain(UDomain
);
700 Schedule
= isl::map::from_union_map(USchedule
);
701 ScheduledAccRel
= getAddressFunction().apply_domain(Schedule
);
702 return isl::pw_multi_aff::from_map(ScheduledAccRel
);
705 isl::map
MemoryAccess::getOriginalAccessRelation() const {
706 return AccessRelation
;
709 std::string
MemoryAccess::getOriginalAccessRelationStr() const {
710 return AccessRelation
.to_str();
713 isl::space
MemoryAccess::getOriginalAccessRelationSpace() const {
714 return AccessRelation
.get_space();
717 isl::map
MemoryAccess::getNewAccessRelation() const {
718 return NewAccessRelation
;
721 std::string
MemoryAccess::getNewAccessRelationStr() const {
722 return NewAccessRelation
.to_str();
725 std::string
MemoryAccess::getAccessRelationStr() const {
726 return getAccessRelation().to_str();
729 isl::basic_map
MemoryAccess::createBasicAccessMap(ScopStmt
*Statement
) {
730 isl::space Space
= isl::space(Statement
->getIslCtx(), 0, 1);
731 Space
= Space
.align_params(Statement
->getDomainSpace());
733 return isl::basic_map::from_domain_and_range(
734 isl::basic_set::universe(Statement
->getDomainSpace()),
735 isl::basic_set::universe(Space
));
738 // Formalize no out-of-bound access assumption
740 // When delinearizing array accesses we optimistically assume that the
741 // delinearized accesses do not access out of bound locations (the subscript
742 // expression of each array evaluates for each statement instance that is
743 // executed to a value that is larger than zero and strictly smaller than the
744 // size of the corresponding dimension). The only exception is the outermost
745 // dimension for which we do not need to assume any upper bound. At this point
746 // we formalize this assumption to ensure that at code generation time the
747 // relevant run-time checks can be generated.
749 // To find the set of constraints necessary to avoid out of bound accesses, we
750 // first build the set of data locations that are not within array bounds. We
751 // then apply the reverse access relation to obtain the set of iterations that
752 // may contain invalid accesses and reduce this set of iterations to the ones
753 // that are actually executed by intersecting them with the domain of the
754 // statement. If we now project out all loop dimensions, we obtain a set of
755 // parameters that may cause statement instances to be executed that may
756 // possibly yield out of bound memory accesses. The complement of these
757 // constraints is the set of constraints that needs to be assumed to ensure such
758 // statement instances are never executed.
759 void MemoryAccess::assumeNoOutOfBound() {
760 if (PollyIgnoreInbounds
)
762 auto *SAI
= getScopArrayInfo();
763 isl::space Space
= getOriginalAccessRelationSpace().range();
764 isl::set Outside
= isl::set::empty(Space
);
765 for (int i
= 1, Size
= Space
.dim(isl::dim::set
); i
< Size
; ++i
) {
766 isl::local_space
LS(Space
);
767 isl::pw_aff Var
= isl::pw_aff::var_on_domain(LS
, isl::dim::set
, i
);
768 isl::pw_aff Zero
= isl::pw_aff(LS
);
770 isl::set DimOutside
= Var
.lt_set(Zero
);
771 isl::pw_aff SizeE
= SAI
->getDimensionSizePw(i
);
772 SizeE
= SizeE
.add_dims(isl::dim::in
, Space
.dim(isl::dim::set
));
773 SizeE
= SizeE
.set_tuple_id(isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
774 DimOutside
= DimOutside
.unite(SizeE
.le_set(Var
));
776 Outside
= Outside
.unite(DimOutside
);
779 Outside
= Outside
.apply(getAccessRelation().reverse());
780 Outside
= Outside
.intersect(Statement
->getDomain());
781 Outside
= Outside
.params();
783 // Remove divs to avoid the construction of overly complicated assumptions.
784 // Doing so increases the set of parameter combinations that are assumed to
785 // not appear. This is always save, but may make the resulting run-time check
786 // bail out more often than strictly necessary.
787 Outside
= Outside
.remove_divs();
788 Outside
= Outside
.complement();
789 const auto &Loc
= getAccessInstruction()
790 ? getAccessInstruction()->getDebugLoc()
792 if (!PollyPreciseInbounds
)
793 Outside
= Outside
.gist_params(Statement
->getDomain().params());
794 Statement
->getParent()->recordAssumption(INBOUNDS
, Outside
, Loc
,
798 void MemoryAccess::buildMemIntrinsicAccessRelation() {
799 assert(isMemoryIntrinsic());
800 assert(Subscripts
.size() == 2 && Sizes
.size() == 1);
802 isl::pw_aff SubscriptPWA
= getPwAff(Subscripts
[0]);
803 isl::map SubscriptMap
= isl::map::from_pw_aff(SubscriptPWA
);
806 if (Subscripts
[1] == nullptr) {
807 LengthMap
= isl::map::universe(SubscriptMap
.get_space());
809 isl::pw_aff LengthPWA
= getPwAff(Subscripts
[1]);
810 LengthMap
= isl::map::from_pw_aff(LengthPWA
);
811 isl::space RangeSpace
= LengthMap
.get_space().range();
812 LengthMap
= LengthMap
.apply_range(isl::map::lex_gt(RangeSpace
));
814 LengthMap
= LengthMap
.lower_bound_si(isl::dim::out
, 0, 0);
815 LengthMap
= LengthMap
.align_params(SubscriptMap
.get_space());
816 SubscriptMap
= SubscriptMap
.align_params(LengthMap
.get_space());
817 LengthMap
= LengthMap
.sum(SubscriptMap
);
819 LengthMap
.set_tuple_id(isl::dim::in
, getStatement()->getDomainId());
822 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize
) {
823 ScalarEvolution
*SE
= Statement
->getParent()->getSE();
825 auto MAI
= MemAccInst(getAccessInstruction());
826 if (isa
<MemIntrinsic
>(MAI
))
829 Value
*Ptr
= MAI
.getPointerOperand();
830 if (!Ptr
|| !SE
->isSCEVable(Ptr
->getType()))
833 auto *PtrSCEV
= SE
->getSCEV(Ptr
);
834 if (isa
<SCEVCouldNotCompute
>(PtrSCEV
))
837 auto *BasePtrSCEV
= SE
->getPointerBase(PtrSCEV
);
838 if (BasePtrSCEV
&& !isa
<SCEVCouldNotCompute
>(BasePtrSCEV
))
839 PtrSCEV
= SE
->getMinusSCEV(PtrSCEV
, BasePtrSCEV
);
841 const ConstantRange
&Range
= SE
->getSignedRange(PtrSCEV
);
842 if (Range
.isFullSet())
845 if (Range
.isWrappedSet() || Range
.isSignWrappedSet())
848 bool isWrapping
= Range
.isSignWrappedSet();
850 unsigned BW
= Range
.getBitWidth();
851 const auto One
= APInt(BW
, 1);
852 const auto LB
= isWrapping
? Range
.getLower() : Range
.getSignedMin();
853 const auto UB
= isWrapping
? (Range
.getUpper() - One
) : Range
.getSignedMax();
855 auto Min
= LB
.sdiv(APInt(BW
, ElementSize
));
856 auto Max
= UB
.sdiv(APInt(BW
, ElementSize
)) + One
;
858 assert(Min
.sle(Max
) && "Minimum expected to be less or equal than max");
860 isl::map Relation
= AccessRelation
;
861 isl::set AccessRange
= Relation
.range();
862 AccessRange
= addRangeBoundsToSet(AccessRange
, ConstantRange(Min
, Max
), 0,
864 AccessRelation
= Relation
.intersect_range(AccessRange
);
867 void MemoryAccess::foldAccessRelation() {
868 if (Sizes
.size() < 2 || isa
<SCEVConstant
>(Sizes
[1]))
871 int Size
= Subscripts
.size();
873 isl::map NewAccessRelation
= AccessRelation
;
875 for (int i
= Size
- 2; i
>= 0; --i
) {
877 isl::map MapOne
, MapTwo
;
878 isl::pw_aff DimSize
= getPwAff(Sizes
[i
+ 1]);
880 isl::space SpaceSize
= DimSize
.get_space();
882 give(isl_space_get_dim_id(SpaceSize
.get(), isl_dim_param
, 0));
884 Space
= AccessRelation
.get_space();
885 Space
= Space
.range().map_from_set();
886 Space
= Space
.align_params(SpaceSize
);
888 int ParamLocation
= Space
.find_dim_by_id(isl::dim::param
, ParamId
);
890 MapOne
= isl::map::universe(Space
);
891 for (int j
= 0; j
< Size
; ++j
)
892 MapOne
= MapOne
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
893 MapOne
= MapOne
.lower_bound_si(isl::dim::in
, i
+ 1, 0);
895 MapTwo
= isl::map::universe(Space
);
896 for (int j
= 0; j
< Size
; ++j
)
897 if (j
< i
|| j
> i
+ 1)
898 MapTwo
= MapTwo
.equate(isl::dim::in
, j
, isl::dim::out
, j
);
900 isl::local_space
LS(Space
);
902 C
= isl::constraint::alloc_equality(LS
);
903 C
= C
.set_constant_si(-1);
904 C
= C
.set_coefficient_si(isl::dim::in
, i
, 1);
905 C
= C
.set_coefficient_si(isl::dim::out
, i
, -1);
906 MapTwo
= MapTwo
.add_constraint(C
);
907 C
= isl::constraint::alloc_equality(LS
);
908 C
= C
.set_coefficient_si(isl::dim::in
, i
+ 1, 1);
909 C
= C
.set_coefficient_si(isl::dim::out
, i
+ 1, -1);
910 C
= C
.set_coefficient_si(isl::dim::param
, ParamLocation
, 1);
911 MapTwo
= MapTwo
.add_constraint(C
);
912 MapTwo
= MapTwo
.upper_bound_si(isl::dim::in
, i
+ 1, -1);
914 MapOne
= MapOne
.unite(MapTwo
);
915 NewAccessRelation
= NewAccessRelation
.apply_range(MapOne
);
918 isl::id BaseAddrId
= getScopArrayInfo()->getBasePtrId();
919 isl::space Space
= Statement
->getDomainSpace();
920 NewAccessRelation
= NewAccessRelation
.set_tuple_id(
921 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
922 NewAccessRelation
= NewAccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
923 NewAccessRelation
= NewAccessRelation
.gist_domain(Statement
->getDomain());
925 // Access dimension folding might in certain cases increase the number of
926 // disjuncts in the memory access, which can possibly complicate the generated
927 // run-time checks and can lead to costly compilation.
928 if (!PollyPreciseFoldAccesses
&&
929 isl_map_n_basic_map(NewAccessRelation
.get()) >
930 isl_map_n_basic_map(AccessRelation
.get())) {
932 AccessRelation
= NewAccessRelation
;
936 /// Check if @p Expr is divisible by @p Size.
937 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
942 // Only one factor needs to be divisible.
943 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
944 for (auto *FactorExpr
: MulExpr
->operands())
945 if (isDivisible(FactorExpr
, Size
, SE
))
950 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
952 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
953 for (auto *OpExpr
: NAryExpr
->operands())
954 if (!isDivisible(OpExpr
, Size
, SE
))
959 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
960 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
961 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
962 return MulSCEV
== Expr
;
965 void MemoryAccess::buildAccessRelation(const ScopArrayInfo
*SAI
) {
966 assert(AccessRelation
.is_null() && "AccessRelation already built");
968 // Initialize the invalid domain which describes all iterations for which the
969 // access relation is not modeled correctly.
970 isl::set StmtInvalidDomain
= getStatement()->getInvalidDomain();
971 InvalidDomain
= isl::set::empty(StmtInvalidDomain
.get_space());
973 isl::ctx Ctx
= Id
.get_ctx();
974 isl::id BaseAddrId
= SAI
->getBasePtrId();
976 if (getAccessInstruction() && isa
<MemIntrinsic
>(getAccessInstruction())) {
977 buildMemIntrinsicAccessRelation();
978 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
983 // We overapproximate non-affine accesses with a possible access to the
984 // whole array. For read accesses it does not make a difference, if an
985 // access must or may happen. However, for write accesses it is important to
986 // differentiate between writes that must happen and writes that may happen.
987 if (AccessRelation
.is_null())
988 AccessRelation
= createBasicAccessMap(Statement
);
990 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
994 isl::space Space
= isl::space(Ctx
, 0, Statement
->getNumIterators(), 0);
995 AccessRelation
= isl::map::universe(Space
);
997 for (int i
= 0, Size
= Subscripts
.size(); i
< Size
; ++i
) {
998 isl::pw_aff Affine
= getPwAff(Subscripts
[i
]);
999 isl::map SubscriptMap
= isl::map::from_pw_aff(Affine
);
1000 AccessRelation
= AccessRelation
.flat_range_product(SubscriptMap
);
1003 Space
= Statement
->getDomainSpace();
1004 AccessRelation
= AccessRelation
.set_tuple_id(
1005 isl::dim::in
, Space
.get_tuple_id(isl::dim::set
));
1006 AccessRelation
= AccessRelation
.set_tuple_id(isl::dim::out
, BaseAddrId
);
1008 AccessRelation
= AccessRelation
.gist_domain(Statement
->getDomain());
1011 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, Instruction
*AccessInst
,
1012 AccessType AccType
, Value
*BaseAddress
,
1013 Type
*ElementType
, bool Affine
,
1014 ArrayRef
<const SCEV
*> Subscripts
,
1015 ArrayRef
<const SCEV
*> Sizes
, Value
*AccessValue
,
1017 : Kind(Kind
), AccType(AccType
), Statement(Stmt
), InvalidDomain(nullptr),
1018 BaseAddr(BaseAddress
), ElementType(ElementType
),
1019 Sizes(Sizes
.begin(), Sizes
.end()), AccessInstruction(AccessInst
),
1020 AccessValue(AccessValue
), IsAffine(Affine
),
1021 Subscripts(Subscripts
.begin(), Subscripts
.end()), AccessRelation(nullptr),
1022 NewAccessRelation(nullptr), FAD(nullptr) {
1023 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1024 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1026 std::string IdName
= Stmt
->getBaseName() + Access
;
1027 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1030 MemoryAccess::MemoryAccess(ScopStmt
*Stmt
, AccessType AccType
, isl::map AccRel
)
1031 : Kind(MemoryKind::Array
), AccType(AccType
), Statement(Stmt
),
1032 InvalidDomain(nullptr), AccessRelation(nullptr),
1033 NewAccessRelation(AccRel
), FAD(nullptr) {
1034 isl::id ArrayInfoId
= NewAccessRelation
.get_tuple_id(isl::dim::out
);
1035 auto *SAI
= ScopArrayInfo::getFromId(ArrayInfoId
);
1036 Sizes
.push_back(nullptr);
1037 for (unsigned i
= 1; i
< SAI
->getNumberOfDimensions(); i
++)
1038 Sizes
.push_back(SAI
->getDimensionSize(i
));
1039 ElementType
= SAI
->getElementType();
1040 BaseAddr
= SAI
->getBasePtr();
1041 static const std::string TypeStrings
[] = {"", "_Read", "_Write", "_MayWrite"};
1042 const std::string Access
= TypeStrings
[AccType
] + utostr(Stmt
->size());
1044 std::string IdName
= Stmt
->getBaseName() + Access
;
1045 Id
= isl::id::alloc(Stmt
->getParent()->getIslCtx(), IdName
, this);
1048 MemoryAccess::~MemoryAccess() = default;
1050 void MemoryAccess::realignParams() {
1051 isl::set Ctx
= Statement
->getParent()->getContext();
1052 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1053 AccessRelation
= AccessRelation
.gist_params(Ctx
);
1056 const std::string
MemoryAccess::getReductionOperatorStr() const {
1057 return MemoryAccess::getReductionOperatorStr(getReductionType());
1060 isl::id
MemoryAccess::getId() const { return Id
; }
1062 raw_ostream
&polly::operator<<(raw_ostream
&OS
,
1063 MemoryAccess::ReductionType RT
) {
1064 if (RT
== MemoryAccess::RT_NONE
)
1067 OS
<< MemoryAccess::getReductionOperatorStr(RT
);
1071 void MemoryAccess::setFortranArrayDescriptor(Value
*FAD
) { this->FAD
= FAD
; }
1073 void MemoryAccess::print(raw_ostream
&OS
) const {
1076 OS
.indent(12) << "ReadAccess :=\t";
1079 OS
.indent(12) << "MustWriteAccess :=\t";
1082 OS
.indent(12) << "MayWriteAccess :=\t";
1086 OS
<< "[Reduction Type: " << getReductionType() << "] ";
1089 OS
<< "[Fortran array descriptor: " << FAD
->getName();
1093 OS
<< "[Scalar: " << isScalarKind() << "]\n";
1094 OS
.indent(16) << getOriginalAccessRelationStr() << ";\n";
1095 if (hasNewAccessRelation())
1096 OS
.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1099 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1100 LLVM_DUMP_METHOD
void MemoryAccess::dump() const { print(errs()); }
1103 isl::pw_aff
MemoryAccess::getPwAff(const SCEV
*E
) {
1104 auto *Stmt
= getStatement();
1105 PWACtx PWAC
= Stmt
->getParent()->getPwAff(E
, Stmt
->getEntryBlock());
1106 isl::set StmtDom
= getStatement()->getDomain();
1107 StmtDom
= StmtDom
.reset_tuple_id();
1108 isl::set NewInvalidDom
= StmtDom
.intersect(PWAC
.second
);
1109 InvalidDomain
= InvalidDomain
.unite(NewInvalidDom
);
1113 // Create a map in the size of the provided set domain, that maps from the
1114 // one element of the provided set domain to another element of the provided
1116 // The mapping is limited to all points that are equal in all but the last
1117 // dimension and for which the last dimension of the input is strict smaller
1118 // than the last dimension of the output.
1120 // getEqualAndLarger(set[i0, i1, ..., iX]):
1122 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1123 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1125 static isl::map
getEqualAndLarger(isl::space SetDomain
) {
1126 isl::space Space
= SetDomain
.map_from_set();
1127 isl::map Map
= isl::map::universe(Space
);
1128 unsigned lastDimension
= Map
.dim(isl::dim::in
) - 1;
1130 // Set all but the last dimension to be equal for the input and output
1132 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1133 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1134 for (unsigned i
= 0; i
< lastDimension
; ++i
)
1135 Map
= Map
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
1137 // Set the last dimension of the input to be strict smaller than the
1138 // last dimension of the output.
1140 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1141 Map
= Map
.order_lt(isl::dim::in
, lastDimension
, isl::dim::out
, lastDimension
);
1145 isl::set
MemoryAccess::getStride(isl::map Schedule
) const {
1146 isl::map AccessRelation
= getAccessRelation();
1147 isl::space Space
= Schedule
.get_space().range();
1148 isl::map NextScatt
= getEqualAndLarger(Space
);
1150 Schedule
= Schedule
.reverse();
1151 NextScatt
= NextScatt
.lexmin();
1153 NextScatt
= NextScatt
.apply_range(Schedule
);
1154 NextScatt
= NextScatt
.apply_range(AccessRelation
);
1155 NextScatt
= NextScatt
.apply_domain(Schedule
);
1156 NextScatt
= NextScatt
.apply_domain(AccessRelation
);
1158 isl::set Deltas
= NextScatt
.deltas();
1162 bool MemoryAccess::isStrideX(isl::map Schedule
, int StrideWidth
) const {
1163 isl::set Stride
, StrideX
;
1166 Stride
= getStride(Schedule
);
1167 StrideX
= isl::set::universe(Stride
.get_space());
1168 for (unsigned i
= 0; i
< StrideX
.dim(isl::dim::set
) - 1; i
++)
1169 StrideX
= StrideX
.fix_si(isl::dim::set
, i
, 0);
1170 StrideX
= StrideX
.fix_si(isl::dim::set
, StrideX
.dim(isl::dim::set
) - 1,
1172 IsStrideX
= Stride
.is_subset(StrideX
);
1177 bool MemoryAccess::isStrideZero(isl::map Schedule
) const {
1178 return isStrideX(Schedule
, 0);
1181 bool MemoryAccess::isStrideOne(isl::map Schedule
) const {
1182 return isStrideX(Schedule
, 1);
1185 void MemoryAccess::setAccessRelation(isl::map NewAccess
) {
1186 AccessRelation
= NewAccess
;
1189 void MemoryAccess::setNewAccessRelation(isl::map NewAccess
) {
1193 // Check domain space compatibility.
1194 isl::space NewSpace
= NewAccess
.get_space();
1195 isl::space NewDomainSpace
= NewSpace
.domain();
1196 isl::space OriginalDomainSpace
= getStatement()->getDomainSpace();
1197 assert(OriginalDomainSpace
.has_equal_tuples(NewDomainSpace
));
1199 // Reads must be executed unconditionally. Writes might be executed in a
1202 // Check whether there is an access for every statement instance.
1203 isl::set StmtDomain
= getStatement()->getDomain();
1205 StmtDomain
.intersect_params(getStatement()->getParent()->getContext());
1206 isl::set NewDomain
= NewAccess
.domain();
1207 assert(StmtDomain
.is_subset(NewDomain
) &&
1208 "Partial READ accesses not supported");
1211 isl::space NewAccessSpace
= NewAccess
.get_space();
1212 assert(NewAccessSpace
.has_tuple_id(isl::dim::set
) &&
1213 "Must specify the array that is accessed");
1214 isl::id NewArrayId
= NewAccessSpace
.get_tuple_id(isl::dim::set
);
1215 auto *SAI
= static_cast<ScopArrayInfo
*>(NewArrayId
.get_user());
1216 assert(SAI
&& "Must set a ScopArrayInfo");
1218 if (SAI
->isArrayKind() && SAI
->getBasePtrOriginSAI()) {
1219 InvariantEquivClassTy
*EqClass
=
1220 getStatement()->getParent()->lookupInvariantEquivClass(
1223 "Access functions to indirect arrays must have an invariant and "
1224 "hoisted base pointer");
1227 // Check whether access dimensions correspond to number of dimensions of the
1229 auto Dims
= SAI
->getNumberOfDimensions();
1230 assert(NewAccessSpace
.dim(isl::dim::set
) == Dims
&&
1231 "Access dims must match array dims");
1234 NewAccess
= NewAccess
.gist_domain(getStatement()->getDomain());
1235 NewAccessRelation
= NewAccess
;
1238 bool MemoryAccess::isLatestPartialAccess() const {
1239 isl::set StmtDom
= getStatement()->getDomain();
1240 isl::set AccDom
= getLatestAccessRelation().domain();
1242 return isl_set_is_subset(StmtDom
.keep(), AccDom
.keep()) == isl_bool_false
;
1245 //===----------------------------------------------------------------------===//
1247 isl::map
ScopStmt::getSchedule() const {
1248 isl::set Domain
= getDomain();
1249 if (Domain
.is_empty())
1250 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1251 auto Schedule
= getParent()->getSchedule();
1254 Schedule
= Schedule
.intersect_domain(isl::union_set(Domain
));
1255 if (Schedule
.is_empty())
1256 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1257 isl::map M
= M
.from_union_map(Schedule
);
1259 M
= M
.gist_domain(Domain
);
1264 void ScopStmt::restrictDomain(isl::set NewDomain
) {
1265 assert(NewDomain
.is_subset(Domain
) &&
1266 "New domain is not a subset of old domain!");
1270 void ScopStmt::addAccess(MemoryAccess
*Access
, bool Prepend
) {
1271 Instruction
*AccessInst
= Access
->getAccessInstruction();
1273 if (Access
->isArrayKind()) {
1274 MemoryAccessList
&MAL
= InstructionToAccess
[AccessInst
];
1275 MAL
.emplace_front(Access
);
1276 } else if (Access
->isValueKind() && Access
->isWrite()) {
1277 Instruction
*AccessVal
= cast
<Instruction
>(Access
->getAccessValue());
1278 assert(!ValueWrites
.lookup(AccessVal
));
1280 ValueWrites
[AccessVal
] = Access
;
1281 } else if (Access
->isValueKind() && Access
->isRead()) {
1282 Value
*AccessVal
= Access
->getAccessValue();
1283 assert(!ValueReads
.lookup(AccessVal
));
1285 ValueReads
[AccessVal
] = Access
;
1286 } else if (Access
->isAnyPHIKind() && Access
->isWrite()) {
1287 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1288 assert(!PHIWrites
.lookup(PHI
));
1290 PHIWrites
[PHI
] = Access
;
1291 } else if (Access
->isAnyPHIKind() && Access
->isRead()) {
1292 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessValue());
1293 assert(!PHIReads
.lookup(PHI
));
1295 PHIReads
[PHI
] = Access
;
1299 MemAccs
.insert(MemAccs
.begin(), Access
);
1302 MemAccs
.push_back(Access
);
1305 void ScopStmt::realignParams() {
1306 for (MemoryAccess
*MA
: *this)
1307 MA
->realignParams();
1309 isl::set Ctx
= Parent
.getContext();
1310 InvalidDomain
= InvalidDomain
.gist_params(Ctx
);
1311 Domain
= Domain
.gist_params(Ctx
);
1314 /// Add @p BSet to the set @p User if @p BSet is bounded.
1315 static isl_stat
collectBoundedParts(__isl_take isl_basic_set
*BSet
,
1317 isl_set
**BoundedParts
= static_cast<isl_set
**>(User
);
1318 if (isl_basic_set_is_bounded(BSet
))
1319 *BoundedParts
= isl_set_union(*BoundedParts
, isl_set_from_basic_set(BSet
));
1321 isl_basic_set_free(BSet
);
1325 /// Return the bounded parts of @p S.
1326 static __isl_give isl_set
*collectBoundedParts(__isl_take isl_set
*S
) {
1327 isl_set
*BoundedParts
= isl_set_empty(isl_set_get_space(S
));
1328 isl_set_foreach_basic_set(S
, collectBoundedParts
, &BoundedParts
);
1330 return BoundedParts
;
1333 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1335 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1336 /// both with regards to the dimension @p Dim.
1337 static std::pair
<__isl_give isl_set
*, __isl_give isl_set
*>
1338 partitionSetParts(__isl_take isl_set
*S
, unsigned Dim
) {
1339 for (unsigned u
= 0, e
= isl_set_n_dim(S
); u
< e
; u
++)
1340 S
= isl_set_lower_bound_si(S
, isl_dim_set
, u
, 0);
1342 unsigned NumDimsS
= isl_set_n_dim(S
);
1343 isl_set
*OnlyDimS
= isl_set_copy(S
);
1345 // Remove dimensions that are greater than Dim as they are not interesting.
1346 assert(NumDimsS
>= Dim
+ 1);
1348 isl_set_project_out(OnlyDimS
, isl_dim_set
, Dim
+ 1, NumDimsS
- Dim
- 1);
1350 // Create artificial parametric upper bounds for dimensions smaller than Dim
1351 // as we are not interested in them.
1352 OnlyDimS
= isl_set_insert_dims(OnlyDimS
, isl_dim_param
, 0, Dim
);
1353 for (unsigned u
= 0; u
< Dim
; u
++) {
1354 isl_constraint
*C
= isl_inequality_alloc(
1355 isl_local_space_from_space(isl_set_get_space(OnlyDimS
)));
1356 C
= isl_constraint_set_coefficient_si(C
, isl_dim_param
, u
, 1);
1357 C
= isl_constraint_set_coefficient_si(C
, isl_dim_set
, u
, -1);
1358 OnlyDimS
= isl_set_add_constraint(OnlyDimS
, C
);
1361 // Collect all bounded parts of OnlyDimS.
1362 isl_set
*BoundedParts
= collectBoundedParts(OnlyDimS
);
1364 // Create the dimensions greater than Dim again.
1365 BoundedParts
= isl_set_insert_dims(BoundedParts
, isl_dim_set
, Dim
+ 1,
1366 NumDimsS
- Dim
- 1);
1368 // Remove the artificial upper bound parameters again.
1369 BoundedParts
= isl_set_remove_dims(BoundedParts
, isl_dim_param
, 0, Dim
);
1371 isl_set
*UnboundedParts
= isl_set_subtract(S
, isl_set_copy(BoundedParts
));
1372 return std::make_pair(UnboundedParts
, BoundedParts
);
1375 /// Set the dimension Ids from @p From in @p To.
1376 static __isl_give isl_set
*setDimensionIds(__isl_keep isl_set
*From
,
1377 __isl_take isl_set
*To
) {
1378 for (unsigned u
= 0, e
= isl_set_n_dim(From
); u
< e
; u
++) {
1379 isl_id
*DimId
= isl_set_get_dim_id(From
, isl_dim_set
, u
);
1380 To
= isl_set_set_dim_id(To
, isl_dim_set
, u
, DimId
);
1385 /// Create the conditions under which @p L @p Pred @p R is true.
1386 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1387 __isl_take isl_pw_aff
*L
,
1388 __isl_take isl_pw_aff
*R
) {
1390 case ICmpInst::ICMP_EQ
:
1391 return isl_pw_aff_eq_set(L
, R
);
1392 case ICmpInst::ICMP_NE
:
1393 return isl_pw_aff_ne_set(L
, R
);
1394 case ICmpInst::ICMP_SLT
:
1395 return isl_pw_aff_lt_set(L
, R
);
1396 case ICmpInst::ICMP_SLE
:
1397 return isl_pw_aff_le_set(L
, R
);
1398 case ICmpInst::ICMP_SGT
:
1399 return isl_pw_aff_gt_set(L
, R
);
1400 case ICmpInst::ICMP_SGE
:
1401 return isl_pw_aff_ge_set(L
, R
);
1402 case ICmpInst::ICMP_ULT
:
1403 return isl_pw_aff_lt_set(L
, R
);
1404 case ICmpInst::ICMP_UGT
:
1405 return isl_pw_aff_gt_set(L
, R
);
1406 case ICmpInst::ICMP_ULE
:
1407 return isl_pw_aff_le_set(L
, R
);
1408 case ICmpInst::ICMP_UGE
:
1409 return isl_pw_aff_ge_set(L
, R
);
1411 llvm_unreachable("Non integer predicate not supported");
1415 /// Create the conditions under which @p L @p Pred @p R is true.
1417 /// Helper function that will make sure the dimensions of the result have the
1418 /// same isl_id's as the @p Domain.
1419 static __isl_give isl_set
*buildConditionSet(ICmpInst::Predicate Pred
,
1420 __isl_take isl_pw_aff
*L
,
1421 __isl_take isl_pw_aff
*R
,
1422 __isl_keep isl_set
*Domain
) {
1423 isl_set
*ConsequenceCondSet
= buildConditionSet(Pred
, L
, R
);
1424 return setDimensionIds(Domain
, ConsequenceCondSet
);
1427 /// Compute the isl representation for the SCEV @p E in this BB.
1429 /// @param S The Scop in which @p BB resides in.
1430 /// @param BB The BB for which isl representation is to be
1432 /// @param InvalidDomainMap A map of BB to their invalid domains.
1433 /// @param E The SCEV that should be translated.
1434 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1436 /// Note that this function will also adjust the invalid context accordingly.
1438 __isl_give isl_pw_aff
*
1439 getPwAff(Scop
&S
, BasicBlock
*BB
,
1440 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
, const SCEV
*E
,
1441 bool NonNegative
= false) {
1442 PWACtx PWAC
= S
.getPwAff(E
, BB
, NonNegative
);
1443 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
1444 return PWAC
.first
.take();
1447 /// Build the conditions sets for the switch @p SI in the @p Domain.
1449 /// This will fill @p ConditionSets with the conditions under which control
1450 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1451 /// have as many elements as @p SI has successors.
1452 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
,
1453 __isl_keep isl_set
*Domain
,
1454 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1455 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1456 Value
*Condition
= getConditionFromTerminator(SI
);
1457 assert(Condition
&& "No condition for switch");
1459 ScalarEvolution
&SE
= *S
.getSE();
1460 isl_pw_aff
*LHS
, *RHS
;
1461 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
1463 unsigned NumSuccessors
= SI
->getNumSuccessors();
1464 ConditionSets
.resize(NumSuccessors
);
1465 for (auto &Case
: SI
->cases()) {
1466 unsigned Idx
= Case
.getSuccessorIndex();
1467 ConstantInt
*CaseValue
= Case
.getCaseValue();
1469 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
1470 isl_set
*CaseConditionSet
=
1471 buildConditionSet(ICmpInst::ICMP_EQ
, isl_pw_aff_copy(LHS
), RHS
, Domain
);
1472 ConditionSets
[Idx
] = isl_set_coalesce(
1473 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
1476 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
1477 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
1478 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
1480 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
1481 ConditionSets
[0] = setDimensionIds(
1482 Domain
, isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
));
1484 isl_pw_aff_free(LHS
);
1489 /// Build condition sets for unsigned ICmpInst(s).
1490 /// Special handling is required for unsigned operands to ensure that if
1491 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1492 /// it should wrap around.
1494 /// @param IsStrictUpperBound holds information on the predicate relation
1495 /// between TestVal and UpperBound, i.e,
1496 /// TestVal < UpperBound OR TestVal <= UpperBound
1497 __isl_give isl_set
*
1498 buildUnsignedConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1499 __isl_keep isl_set
*Domain
, const SCEV
*SCEV_TestVal
,
1500 const SCEV
*SCEV_UpperBound
,
1501 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1502 bool IsStrictUpperBound
) {
1503 // Do not take NonNeg assumption on TestVal
1504 // as it might have MSB (Sign bit) set.
1505 isl_pw_aff
*TestVal
= getPwAff(S
, BB
, InvalidDomainMap
, SCEV_TestVal
, false);
1506 // Take NonNeg assumption on UpperBound.
1507 isl_pw_aff
*UpperBound
=
1508 getPwAff(S
, BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
1512 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1513 isl_pw_aff_get_domain_space(TestVal
))),
1514 isl_pw_aff_copy(TestVal
));
1517 if (IsStrictUpperBound
)
1518 // TestVal < UpperBound
1519 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
1521 // TestVal <= UpperBound
1522 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
1524 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
1525 ConsequenceCondSet
= setDimensionIds(Domain
, ConsequenceCondSet
);
1526 return ConsequenceCondSet
;
1529 /// Build the conditions sets for the branch condition @p Condition in
1532 /// This will fill @p ConditionSets with the conditions under which control
1533 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1534 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1535 /// context under which @p Condition is true/false will be returned as the
1536 /// new elements of @p ConditionSets.
1537 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, Value
*Condition
,
1538 TerminatorInst
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
1539 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1540 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1541 ScalarEvolution
&SE
= *S
.getSE();
1542 isl_set
*ConsequenceCondSet
= nullptr;
1544 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
1545 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
1546 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
1547 bool NonNeg
= false;
1548 isl_pw_aff
*LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
1549 isl_pw_aff
*RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
1550 ConsequenceCondSet
=
1551 buildConditionSet(ICmpInst::ICMP_SLE
, LHS
, RHS
, Domain
);
1552 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
1553 auto *Unique
= dyn_cast
<ConstantInt
>(
1554 getUniqueNonErrorValue(PHI
, &S
.getRegion(), *S
.getLI(), *S
.getDT()));
1556 if (Unique
->isZero())
1557 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1559 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1560 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
1561 if (CCond
->isZero())
1562 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
1564 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
1565 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
1566 auto Opcode
= BinOp
->getOpcode();
1567 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
1569 bool Valid
= buildConditionSets(S
, BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
1570 InvalidDomainMap
, ConditionSets
) &&
1571 buildConditionSets(S
, BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
1572 InvalidDomainMap
, ConditionSets
);
1574 while (!ConditionSets
.empty())
1575 isl_set_free(ConditionSets
.pop_back_val());
1579 isl_set_free(ConditionSets
.pop_back_val());
1580 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
1581 isl_set_free(ConditionSets
.pop_back_val());
1582 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
1584 if (Opcode
== Instruction::And
)
1585 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
1587 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
1589 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
1591 "Condition of exiting branch was neither constant nor ICmp!");
1593 LoopInfo
&LI
= *S
.getLI();
1594 DominatorTree
&DT
= *S
.getDT();
1595 Region
&R
= S
.getRegion();
1597 isl_pw_aff
*LHS
, *RHS
;
1598 // For unsigned comparisons we assumed the signed bit of neither operand
1599 // to be set. The comparison is equal to a signed comparison under this
1601 bool NonNeg
= ICond
->isUnsigned();
1602 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
1603 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
1605 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, LI
, DT
);
1606 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, LI
, DT
);
1608 switch (ICond
->getPredicate()) {
1609 case ICmpInst::ICMP_ULT
:
1610 ConsequenceCondSet
=
1611 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1612 RightOperand
, InvalidDomainMap
, true);
1614 case ICmpInst::ICMP_ULE
:
1615 ConsequenceCondSet
=
1616 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, LeftOperand
,
1617 RightOperand
, InvalidDomainMap
, false);
1619 case ICmpInst::ICMP_UGT
:
1620 ConsequenceCondSet
=
1621 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1622 LeftOperand
, InvalidDomainMap
, true);
1624 case ICmpInst::ICMP_UGE
:
1625 ConsequenceCondSet
=
1626 buildUnsignedConditionSets(S
, BB
, Condition
, Domain
, RightOperand
,
1627 LeftOperand
, InvalidDomainMap
, false);
1630 LHS
= getPwAff(S
, BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
1631 RHS
= getPwAff(S
, BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
1632 ConsequenceCondSet
=
1633 buildConditionSet(ICond
->getPredicate(), LHS
, RHS
, Domain
);
1638 // If no terminator was given we are only looking for parameter constraints
1639 // under which @p Condition is true/false.
1641 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
1642 assert(ConsequenceCondSet
);
1643 ConsequenceCondSet
= isl_set_coalesce(
1644 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
1646 isl_set
*AlternativeCondSet
= nullptr;
1648 isl_set_n_basic_set(ConsequenceCondSet
) >= MaxDisjunctsInDomain
;
1651 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
1652 isl_set_copy(ConsequenceCondSet
));
1654 isl_set_n_basic_set(AlternativeCondSet
) >= MaxDisjunctsInDomain
;
1658 S
.invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
1659 TI
? TI
->getParent() : nullptr /* BasicBlock */);
1660 isl_set_free(AlternativeCondSet
);
1661 isl_set_free(ConsequenceCondSet
);
1665 ConditionSets
.push_back(ConsequenceCondSet
);
1666 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
1671 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1673 /// This will fill @p ConditionSets with the conditions under which control
1674 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1675 /// have as many elements as @p TI has successors.
1676 bool buildConditionSets(Scop
&S
, BasicBlock
*BB
, TerminatorInst
*TI
, Loop
*L
,
1677 __isl_keep isl_set
*Domain
,
1678 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
1679 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
1680 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
1681 return buildConditionSets(S
, BB
, SI
, L
, Domain
, InvalidDomainMap
,
1684 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
1686 if (TI
->getNumSuccessors() == 1) {
1687 ConditionSets
.push_back(isl_set_copy(Domain
));
1691 Value
*Condition
= getConditionFromTerminator(TI
);
1692 assert(Condition
&& "No condition for Terminator");
1694 return buildConditionSets(S
, BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
1698 ScopStmt::ScopStmt(Scop
&parent
, Region
&R
, Loop
*SurroundingLoop
,
1699 std::vector
<Instruction
*> EntryBlockInstructions
)
1700 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), R(&R
),
1701 Build(nullptr), SurroundingLoop(SurroundingLoop
),
1702 Instructions(EntryBlockInstructions
) {
1703 BaseName
= getIslCompatibleName(
1704 "Stmt", R
.getNameStr(), parent
.getNextStmtIdx(), "", UseInstructionNames
);
1707 ScopStmt::ScopStmt(Scop
&parent
, BasicBlock
&bb
, Loop
*SurroundingLoop
,
1708 std::vector
<Instruction
*> Instructions
, int Count
)
1709 : Parent(parent
), InvalidDomain(nullptr), Domain(nullptr), BB(&bb
),
1710 Build(nullptr), SurroundingLoop(SurroundingLoop
),
1711 Instructions(Instructions
) {
1714 S
+= std::to_string(Count
);
1715 BaseName
= getIslCompatibleName("Stmt", &bb
, parent
.getNextStmtIdx(), S
,
1716 UseInstructionNames
);
1719 ScopStmt::ScopStmt(Scop
&parent
, isl::map SourceRel
, isl::map TargetRel
,
1721 : Parent(parent
), InvalidDomain(nullptr), Domain(NewDomain
),
1723 BaseName
= getIslCompatibleName("CopyStmt_", "",
1724 std::to_string(parent
.getCopyStmtsNum()));
1725 isl::id Id
= isl::id::alloc(getIslCtx(), getBaseName(), this);
1726 Domain
= Domain
.set_tuple_id(Id
);
1727 TargetRel
= TargetRel
.set_tuple_id(isl::dim::in
, Id
);
1729 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE
, TargetRel
);
1730 parent
.addAccessFunction(Access
);
1732 SourceRel
= SourceRel
.set_tuple_id(isl::dim::in
, Id
);
1733 Access
= new MemoryAccess(this, MemoryAccess::AccessType::READ
, SourceRel
);
1734 parent
.addAccessFunction(Access
);
1738 ScopStmt::~ScopStmt() = default;
1740 std::string
ScopStmt::getDomainStr() const { return Domain
.to_str(); }
1742 std::string
ScopStmt::getScheduleStr() const {
1743 auto *S
= getSchedule().release();
1746 auto Str
= stringFromIslObj(S
);
1751 void ScopStmt::setInvalidDomain(isl::set ID
) { InvalidDomain
= ID
; }
1753 BasicBlock
*ScopStmt::getEntryBlock() const {
1755 return getBasicBlock();
1756 return getRegion()->getEntry();
1759 unsigned ScopStmt::getNumIterators() const { return NestLoops
.size(); }
1761 const char *ScopStmt::getBaseName() const { return BaseName
.c_str(); }
1763 Loop
*ScopStmt::getLoopForDimension(unsigned Dimension
) const {
1764 return NestLoops
[Dimension
];
1767 isl::ctx
ScopStmt::getIslCtx() const { return Parent
.getIslCtx(); }
1769 isl::set
ScopStmt::getDomain() const { return Domain
; }
1771 isl::space
ScopStmt::getDomainSpace() const { return Domain
.get_space(); }
1773 isl::id
ScopStmt::getDomainId() const { return Domain
.get_tuple_id(); }
1775 void ScopStmt::printInstructions(raw_ostream
&OS
) const {
1776 OS
<< "Instructions {\n";
1778 for (Instruction
*Inst
: Instructions
)
1779 OS
.indent(16) << *Inst
<< "\n";
1781 OS
.indent(12) << "}\n";
1784 void ScopStmt::print(raw_ostream
&OS
, bool PrintInstructions
) const {
1785 OS
<< "\t" << getBaseName() << "\n";
1786 OS
.indent(12) << "Domain :=\n";
1789 OS
.indent(16) << getDomainStr() << ";\n";
1791 OS
.indent(16) << "n/a\n";
1793 OS
.indent(12) << "Schedule :=\n";
1796 OS
.indent(16) << getScheduleStr() << ";\n";
1798 OS
.indent(16) << "n/a\n";
1800 for (MemoryAccess
*Access
: MemAccs
)
1803 if (PrintInstructions
)
1804 printInstructions(OS
.indent(12));
1807 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1808 LLVM_DUMP_METHOD
void ScopStmt::dump() const { print(dbgs(), true); }
1811 void ScopStmt::removeAccessData(MemoryAccess
*MA
) {
1812 if (MA
->isRead() && MA
->isOriginalValueKind()) {
1813 bool Found
= ValueReads
.erase(MA
->getAccessValue());
1815 assert(Found
&& "Expected access data not found");
1817 if (MA
->isWrite() && MA
->isOriginalValueKind()) {
1818 bool Found
= ValueWrites
.erase(cast
<Instruction
>(MA
->getAccessValue()));
1820 assert(Found
&& "Expected access data not found");
1822 if (MA
->isWrite() && MA
->isOriginalAnyPHIKind()) {
1823 bool Found
= PHIWrites
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1825 assert(Found
&& "Expected access data not found");
1827 if (MA
->isRead() && MA
->isOriginalAnyPHIKind()) {
1828 bool Found
= PHIReads
.erase(cast
<PHINode
>(MA
->getAccessInstruction()));
1830 assert(Found
&& "Expected access data not found");
1834 void ScopStmt::removeMemoryAccess(MemoryAccess
*MA
) {
1835 // Remove the memory accesses from this statement together with all scalar
1836 // accesses that were caused by it. MemoryKind::Value READs have no access
1837 // instruction, hence would not be removed by this function. However, it is
1838 // only used for invariant LoadInst accesses, its arguments are always affine,
1839 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1840 // accesses to be removed.
1841 auto Predicate
= [&](MemoryAccess
*Acc
) {
1842 return Acc
->getAccessInstruction() == MA
->getAccessInstruction();
1844 for (auto *MA
: MemAccs
) {
1845 if (Predicate(MA
)) {
1846 removeAccessData(MA
);
1847 Parent
.removeAccessData(MA
);
1850 MemAccs
.erase(std::remove_if(MemAccs
.begin(), MemAccs
.end(), Predicate
),
1852 InstructionToAccess
.erase(MA
->getAccessInstruction());
1855 void ScopStmt::removeSingleMemoryAccess(MemoryAccess
*MA
) {
1856 auto MAIt
= std::find(MemAccs
.begin(), MemAccs
.end(), MA
);
1857 assert(MAIt
!= MemAccs
.end());
1858 MemAccs
.erase(MAIt
);
1860 removeAccessData(MA
);
1861 Parent
.removeAccessData(MA
);
1863 auto It
= InstructionToAccess
.find(MA
->getAccessInstruction());
1864 if (It
!= InstructionToAccess
.end()) {
1865 It
->second
.remove(MA
);
1866 if (It
->second
.empty())
1867 InstructionToAccess
.erase(MA
->getAccessInstruction());
1871 MemoryAccess
*ScopStmt::ensureValueRead(Value
*V
) {
1872 MemoryAccess
*Access
= lookupInputAccessOf(V
);
1876 ScopArrayInfo
*SAI
=
1877 Parent
.getOrCreateScopArrayInfo(V
, V
->getType(), {}, MemoryKind::Value
);
1878 Access
= new MemoryAccess(this, nullptr, MemoryAccess::READ
, V
, V
->getType(),
1879 true, {}, {}, V
, MemoryKind::Value
);
1880 Parent
.addAccessFunction(Access
);
1881 Access
->buildAccessRelation(SAI
);
1883 Parent
.addAccessData(Access
);
1887 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const ScopStmt
&S
) {
1888 S
.print(OS
, PollyPrintInstructions
);
1892 //===----------------------------------------------------------------------===//
1893 /// Scop class implement
1895 void Scop::setContext(isl::set NewContext
) {
1896 Context
= NewContext
.align_params(Context
.get_space());
1901 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1902 struct SCEVSensitiveParameterRewriter
1903 : public SCEVRewriteVisitor
<SCEVSensitiveParameterRewriter
> {
1904 const ValueToValueMap
&VMap
;
1907 SCEVSensitiveParameterRewriter(const ValueToValueMap
&VMap
,
1908 ScalarEvolution
&SE
)
1909 : SCEVRewriteVisitor(SE
), VMap(VMap
) {}
1911 static const SCEV
*rewrite(const SCEV
*E
, ScalarEvolution
&SE
,
1912 const ValueToValueMap
&VMap
) {
1913 SCEVSensitiveParameterRewriter
SSPR(VMap
, SE
);
1914 return SSPR
.visit(E
);
1917 const SCEV
*visitAddRecExpr(const SCEVAddRecExpr
*E
) {
1918 auto *Start
= visit(E
->getStart());
1919 auto *AddRec
= SE
.getAddRecExpr(SE
.getConstant(E
->getType(), 0),
1920 visit(E
->getStepRecurrence(SE
)),
1921 E
->getLoop(), SCEV::FlagAnyWrap
);
1922 return SE
.getAddExpr(Start
, AddRec
);
1925 const SCEV
*visitUnknown(const SCEVUnknown
*E
) {
1926 if (auto *NewValue
= VMap
.lookup(E
->getValue()))
1927 return SE
.getUnknown(NewValue
);
1932 /// Check whether we should remap a SCEV expression.
1933 struct SCEVFindInsideScop
: public SCEVTraversal
<SCEVFindInsideScop
> {
1934 const ValueToValueMap
&VMap
;
1935 bool FoundInside
= false;
1939 SCEVFindInsideScop(const ValueToValueMap
&VMap
, ScalarEvolution
&SE
,
1941 : SCEVTraversal(*this), VMap(VMap
), S(S
) {}
1943 static bool hasVariant(const SCEV
*E
, ScalarEvolution
&SE
,
1944 const ValueToValueMap
&VMap
, const Scop
*S
) {
1945 SCEVFindInsideScop
SFIS(VMap
, SE
, S
);
1947 return SFIS
.FoundInside
;
1950 bool follow(const SCEV
*E
) {
1951 if (auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(E
)) {
1952 FoundInside
|= S
->getRegion().contains(AddRec
->getLoop());
1953 } else if (auto *Unknown
= dyn_cast
<SCEVUnknown
>(E
)) {
1954 if (Instruction
*I
= dyn_cast
<Instruction
>(Unknown
->getValue()))
1955 FoundInside
|= S
->getRegion().contains(I
) && !VMap
.count(I
);
1957 return !FoundInside
;
1960 bool isDone() { return FoundInside
; }
1963 } // end anonymous namespace
1965 const SCEV
*Scop::getRepresentingInvariantLoadSCEV(const SCEV
*E
) const {
1966 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
1967 // doesn't like addition between an AddRec and an expression that
1968 // doesn't have a dominance relationship with it.)
1969 if (SCEVFindInsideScop::hasVariant(E
, *SE
, InvEquivClassVMap
, this))
1973 return SCEVSensitiveParameterRewriter::rewrite(E
, *SE
, InvEquivClassVMap
);
1976 // This table of function names is used to translate parameter names in more
1977 // human-readable names. This makes it easier to interpret Polly analysis
1979 StringMap
<std::string
> KnownNames
= {
1980 {"_Z13get_global_idj", "global_id"},
1981 {"_Z12get_local_idj", "local_id"},
1982 {"_Z15get_global_sizej", "global_size"},
1983 {"_Z14get_local_sizej", "local_size"},
1984 {"_Z12get_work_dimv", "work_dim"},
1985 {"_Z17get_global_offsetj", "global_offset"},
1986 {"_Z12get_group_idj", "group_id"},
1987 {"_Z14get_num_groupsj", "num_groups"},
1990 static std::string
getCallParamName(CallInst
*Call
) {
1992 raw_string_ostream
OS(Result
);
1993 std::string Name
= Call
->getCalledFunction()->getName();
1995 auto Iterator
= KnownNames
.find(Name
);
1996 if (Iterator
!= KnownNames
.end())
1997 Name
= "__" + Iterator
->getValue();
1999 for (auto &Operand
: Call
->arg_operands()) {
2000 ConstantInt
*Op
= cast
<ConstantInt
>(&Operand
);
2001 OS
<< "_" << Op
->getValue();
2007 void Scop::createParameterId(const SCEV
*Parameter
) {
2008 assert(Parameters
.count(Parameter
));
2009 assert(!ParameterIds
.count(Parameter
));
2011 std::string ParameterName
= "p_" + std::to_string(getNumParams() - 1);
2013 if (const SCEVUnknown
*ValueParameter
= dyn_cast
<SCEVUnknown
>(Parameter
)) {
2014 Value
*Val
= ValueParameter
->getValue();
2015 CallInst
*Call
= dyn_cast
<CallInst
>(Val
);
2017 if (Call
&& isConstCall(Call
)) {
2018 ParameterName
= getCallParamName(Call
);
2019 } else if (UseInstructionNames
) {
2020 // If this parameter references a specific Value and this value has a name
2021 // we use this name as it is likely to be unique and more useful than just
2024 ParameterName
= Val
->getName();
2025 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Val
)) {
2026 auto *LoadOrigin
= LI
->getPointerOperand()->stripInBoundsOffsets();
2027 if (LoadOrigin
->hasName()) {
2028 ParameterName
+= "_loaded_from_";
2030 LI
->getPointerOperand()->stripInBoundsOffsets()->getName();
2035 ParameterName
= getIslCompatibleName("", ParameterName
, "");
2038 isl::id Id
= isl::id::alloc(getIslCtx(), ParameterName
,
2039 const_cast<void *>((const void *)Parameter
));
2040 ParameterIds
[Parameter
] = Id
;
2043 void Scop::addParams(const ParameterSetTy
&NewParameters
) {
2044 for (const SCEV
*Parameter
: NewParameters
) {
2045 // Normalize the SCEV to get the representing element for an invariant load.
2046 Parameter
= extractConstantFactor(Parameter
, *SE
).second
;
2047 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2049 if (Parameters
.insert(Parameter
))
2050 createParameterId(Parameter
);
2054 isl::id
Scop::getIdForParam(const SCEV
*Parameter
) const {
2055 // Normalize the SCEV to get the representing element for an invariant load.
2056 Parameter
= getRepresentingInvariantLoadSCEV(Parameter
);
2057 return ParameterIds
.lookup(Parameter
);
2060 isl::set
Scop::addNonEmptyDomainConstraints(isl::set C
) const {
2061 isl_set
*DomainContext
= isl_union_set_params(getDomains().release());
2062 return isl::manage(isl_set_intersect_params(C
.release(), DomainContext
));
2065 bool Scop::isDominatedBy(const DominatorTree
&DT
, BasicBlock
*BB
) const {
2066 return DT
.dominates(BB
, getEntry());
2069 void Scop::addUserAssumptions(
2070 AssumptionCache
&AC
, DominatorTree
&DT
, LoopInfo
&LI
,
2071 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2072 for (auto &Assumption
: AC
.assumptions()) {
2073 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
2074 if (!CI
|| CI
->getNumArgOperands() != 1)
2077 bool InScop
= contains(CI
);
2078 if (!InScop
&& !isDominatedBy(DT
, CI
->getParent()))
2081 auto *L
= LI
.getLoopFor(CI
->getParent());
2082 auto *Val
= CI
->getArgOperand(0);
2083 ParameterSetTy DetectedParams
;
2084 if (!isAffineConstraint(Val
, &R
, L
, *SE
, DetectedParams
)) {
2086 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
2087 << "Non-affine user assumption ignored.");
2091 // Collect all newly introduced parameters.
2092 ParameterSetTy NewParams
;
2093 for (auto *Param
: DetectedParams
) {
2094 Param
= extractConstantFactor(Param
, *SE
).second
;
2095 Param
= getRepresentingInvariantLoadSCEV(Param
);
2096 if (Parameters
.count(Param
))
2098 NewParams
.insert(Param
);
2101 SmallVector
<isl_set
*, 2> ConditionSets
;
2102 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
2103 BasicBlock
*BB
= InScop
? CI
->getParent() : getRegion().getEntry();
2104 auto *Dom
= InScop
? DomainMap
[BB
].copy() : Context
.copy();
2105 assert(Dom
&& "Cannot propagate a nullptr.");
2106 bool Valid
= buildConditionSets(*this, BB
, Val
, TI
, L
, Dom
,
2107 InvalidDomainMap
, ConditionSets
);
2113 isl_set
*AssumptionCtx
= nullptr;
2115 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
2116 isl_set_free(ConditionSets
[0]);
2118 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
2119 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
2122 // Project out newly introduced parameters as they are not otherwise useful.
2123 if (!NewParams
.empty()) {
2124 for (unsigned u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
2125 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
2126 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
2129 if (!NewParams
.count(Param
))
2133 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
2136 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
2137 << "Use user assumption: " << stringFromIslObj(AssumptionCtx
));
2138 Context
= Context
.intersect(isl::manage(AssumptionCtx
));
2142 void Scop::addUserContext() {
2143 if (UserContextStr
.empty())
2146 isl_set
*UserContext
=
2147 isl_set_read_from_str(getIslCtx().get(), UserContextStr
.c_str());
2148 isl_space
*Space
= getParamSpace().release();
2149 if (isl_space_dim(Space
, isl_dim_param
) !=
2150 isl_set_dim(UserContext
, isl_dim_param
)) {
2151 auto SpaceStr
= isl_space_to_str(Space
);
2152 errs() << "Error: the context provided in -polly-context has not the same "
2153 << "number of dimensions than the computed context. Due to this "
2154 << "mismatch, the -polly-context option is ignored. Please provide "
2155 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2157 isl_set_free(UserContext
);
2158 isl_space_free(Space
);
2162 for (unsigned i
= 0; i
< isl_space_dim(Space
, isl_dim_param
); i
++) {
2163 std::string NameContext
= Context
.get_dim_name(isl::dim::param
, i
);
2164 std::string NameUserContext
=
2165 isl_set_get_dim_name(UserContext
, isl_dim_param
, i
);
2167 if (NameContext
!= NameUserContext
) {
2168 auto SpaceStr
= isl_space_to_str(Space
);
2169 errs() << "Error: the name of dimension " << i
2170 << " provided in -polly-context "
2171 << "is '" << NameUserContext
<< "', but the name in the computed "
2172 << "context is '" << NameContext
2173 << "'. Due to this name mismatch, "
2174 << "the -polly-context option is ignored. Please provide "
2175 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2177 isl_set_free(UserContext
);
2178 isl_space_free(Space
);
2183 isl_set_set_dim_id(UserContext
, isl_dim_param
, i
,
2184 isl_space_get_dim_id(Space
, isl_dim_param
, i
));
2187 Context
= Context
.intersect(isl::manage(UserContext
));
2188 isl_space_free(Space
);
2191 void Scop::buildInvariantEquivalenceClasses() {
2192 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
2194 const InvariantLoadsSetTy
&RIL
= getRequiredInvariantLoads();
2195 for (LoadInst
*LInst
: RIL
) {
2196 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
2198 Type
*Ty
= LInst
->getType();
2199 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
2201 InvEquivClassVMap
[LInst
] = ClassRep
;
2206 InvariantEquivClasses
.emplace_back(
2207 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), nullptr, Ty
});
2211 void Scop::buildContext() {
2212 isl::space Space
= isl::space::params_alloc(getIslCtx(), 0);
2213 Context
= isl::set::universe(Space
);
2214 InvalidContext
= isl::set::empty(Space
);
2215 AssumedContext
= isl::set::universe(Space
);
2218 void Scop::addParameterBounds() {
2220 for (auto *Parameter
: Parameters
) {
2221 ConstantRange SRange
= SE
->getSignedRange(Parameter
);
2222 Context
= addRangeBoundsToSet(Context
, SRange
, PDim
++, isl::dim::param
);
2226 static std::vector
<isl::id
> getFortranArrayIds(Scop::array_range Arrays
) {
2227 std::vector
<isl::id
> OutermostSizeIds
;
2228 for (auto Array
: Arrays
) {
2229 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2230 // for its outermost dimension. Fortran arrays will have this since the
2231 // outermost dimension size can be picked up from their runtime description.
2232 // TODO: actually need to check if it has a FAD, but for now this works.
2233 if (Array
->getNumberOfDimensions() > 0) {
2234 isl::pw_aff PwAff
= Array
->getDimensionSizePw(0);
2239 isl::manage(isl_pw_aff_get_dim_id(PwAff
.get(), isl_dim_param
, 0));
2240 assert(!Id
.is_null() &&
2241 "Invalid Id for PwAff expression in Fortran array");
2243 OutermostSizeIds
.push_back(Id
);
2246 return OutermostSizeIds
;
2249 // The FORTRAN array size parameters are known to be non-negative.
2250 static isl::set
boundFortranArrayParams(isl::set Context
,
2251 Scop::array_range Arrays
) {
2252 std::vector
<isl::id
> OutermostSizeIds
;
2253 OutermostSizeIds
= getFortranArrayIds(Arrays
);
2255 for (isl::id Id
: OutermostSizeIds
) {
2256 int dim
= Context
.find_dim_by_id(isl::dim::param
, Id
);
2257 Context
= Context
.lower_bound_si(isl::dim::param
, dim
, 0);
2263 void Scop::realignParams() {
2264 if (PollyIgnoreParamBounds
)
2267 // Add all parameters into a common model.
2268 isl::space Space
= getFullParamSpace();
2270 // Align the parameters of all data structures to the model.
2271 Context
= Context
.align_params(Space
);
2273 // Bound the size of the fortran array dimensions.
2274 Context
= boundFortranArrayParams(Context
, arrays());
2276 // As all parameters are known add bounds to them.
2277 addParameterBounds();
2279 for (ScopStmt
&Stmt
: *this)
2280 Stmt
.realignParams();
2281 // Simplify the schedule according to the context too.
2282 Schedule
= Schedule
.gist_domain_params(getContext());
2285 static isl::set
simplifyAssumptionContext(isl::set AssumptionContext
,
2287 // If we have modeled all blocks in the SCoP that have side effects we can
2288 // simplify the context with the constraints that are needed for anything to
2289 // be executed at all. However, if we have error blocks in the SCoP we already
2290 // assumed some parameter combinations cannot occur and removed them from the
2291 // domains, thus we cannot use the remaining domain to simplify the
2293 if (!S
.hasErrorBlock()) {
2294 auto DomainParameters
= S
.getDomains().params();
2295 AssumptionContext
= AssumptionContext
.gist_params(DomainParameters
);
2298 AssumptionContext
= AssumptionContext
.gist_params(S
.getContext());
2299 return AssumptionContext
;
2302 void Scop::simplifyContexts() {
2303 // The parameter constraints of the iteration domains give us a set of
2304 // constraints that need to hold for all cases where at least a single
2305 // statement iteration is executed in the whole scop. We now simplify the
2306 // assumed context under the assumption that such constraints hold and at
2307 // least a single statement iteration is executed. For cases where no
2308 // statement instances are executed, the assumptions we have taken about
2309 // the executed code do not matter and can be changed.
2311 // WARNING: This only holds if the assumptions we have taken do not reduce
2312 // the set of statement instances that are executed. Otherwise we
2313 // may run into a case where the iteration domains suggest that
2314 // for a certain set of parameter constraints no code is executed,
2315 // but in the original program some computation would have been
2316 // performed. In such a case, modifying the run-time conditions and
2317 // possibly influencing the run-time check may cause certain scops
2318 // to not be executed.
2322 // When delinearizing the following code:
2324 // for (long i = 0; i < 100; i++)
2325 // for (long j = 0; j < m; j++)
2328 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2329 // otherwise we would access out of bound data. Now, knowing that code is
2330 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2331 AssumedContext
= simplifyAssumptionContext(AssumedContext
, *this);
2332 InvalidContext
= InvalidContext
.align_params(getParamSpace());
2335 /// Add the minimal/maximal access in @p Set to @p User.
2337 buildMinMaxAccess(isl::set Set
, Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
2338 isl::pw_multi_aff MinPMA
, MaxPMA
;
2339 isl::pw_aff LastDimAff
;
2342 isl::ctx Ctx
= Set
.get_ctx();
2344 Set
= Set
.remove_divs();
2346 if (isl_set_n_basic_set(Set
.get()) >= MaxDisjunctsInDomain
)
2347 return isl::stat::error
;
2349 // Restrict the number of parameters involved in the access as the lexmin/
2350 // lexmax computation will take too long if this number is high.
2352 // Experiments with a simple test case using an i7 4800MQ:
2354 // #Parameters involved | Time (in sec)
2363 if (isl_set_n_param(Set
.get()) > RunTimeChecksMaxParameters
) {
2364 unsigned InvolvedParams
= 0;
2365 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
2366 if (Set
.involves_dims(isl::dim::param
, u
, 1))
2369 if (InvolvedParams
> RunTimeChecksMaxParameters
)
2370 return isl::stat::error
;
2373 if (isl_set_n_basic_set(Set
.get()) > RunTimeChecksMaxAccessDisjuncts
)
2374 return isl::stat::error
;
2376 MinPMA
= Set
.lexmin_pw_multi_aff();
2377 MaxPMA
= Set
.lexmax_pw_multi_aff();
2379 if (isl_ctx_last_error(Ctx
.get()) == isl_error_quota
)
2380 return isl::stat::error
;
2382 MinPMA
= MinPMA
.coalesce();
2383 MaxPMA
= MaxPMA
.coalesce();
2385 // Adjust the last dimension of the maximal access by one as we want to
2386 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2387 // we test during code generation might now point after the end of the
2388 // allocated array but we will never dereference it anyway.
2389 assert(MaxPMA
.dim(isl::dim::out
) && "Assumed at least one output dimension");
2390 Pos
= MaxPMA
.dim(isl::dim::out
) - 1;
2391 LastDimAff
= MaxPMA
.get_pw_aff(Pos
);
2392 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
2393 OneAff
= OneAff
.add_constant_si(1);
2394 LastDimAff
= LastDimAff
.add(OneAff
);
2395 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
2397 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
2399 return isl::stat::ok
;
2402 static __isl_give isl_set
*getAccessDomain(MemoryAccess
*MA
) {
2403 isl_set
*Domain
= MA
->getStatement()->getDomain().release();
2404 Domain
= isl_set_project_out(Domain
, isl_dim_set
, 0, isl_set_n_dim(Domain
));
2405 return isl_set_reset_tuple_id(Domain
);
2408 /// Wrapper function to calculate minimal/maximal accesses to each array.
2409 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup
, Scop
&S
,
2410 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
2411 MinMaxAccesses
.reserve(AliasGroup
.size());
2413 isl::union_set Domains
= S
.getDomains();
2414 isl::union_map Accesses
= isl::union_map::empty(S
.getParamSpace());
2416 for (MemoryAccess
*MA
: AliasGroup
)
2417 Accesses
= Accesses
.add_map(give(MA
->getAccessRelation().release()));
2419 Accesses
= Accesses
.intersect_domain(Domains
);
2420 isl::union_set Locations
= Accesses
.range();
2421 Locations
= Locations
.coalesce();
2422 Locations
= Locations
.detect_equalities();
2424 auto Lambda
= [&MinMaxAccesses
, &S
](isl::set Set
) -> isl::stat
{
2425 return buildMinMaxAccess(Set
, MinMaxAccesses
, S
);
2427 return Locations
.foreach_set(Lambda
) == isl::stat::ok
;
2430 /// Helper to treat non-affine regions and basic blocks the same.
2434 /// Return the block that is the representing block for @p RN.
2435 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
2436 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
2437 : RN
->getNodeAs
<BasicBlock
>();
2440 /// Return the @p idx'th block that is executed after @p RN.
2441 static inline BasicBlock
*
2442 getRegionNodeSuccessor(RegionNode
*RN
, TerminatorInst
*TI
, unsigned idx
) {
2443 if (RN
->isSubRegion()) {
2445 return RN
->getNodeAs
<Region
>()->getExit();
2447 return TI
->getSuccessor(idx
);
2450 /// Return the smallest loop surrounding @p RN.
2451 static inline Loop
*getRegionNodeLoop(RegionNode
*RN
, LoopInfo
&LI
) {
2452 if (!RN
->isSubRegion()) {
2453 BasicBlock
*BB
= RN
->getNodeAs
<BasicBlock
>();
2454 Loop
*L
= LI
.getLoopFor(BB
);
2456 // Unreachable statements are not considered to belong to a LLVM loop, as
2457 // they are not part of an actual loop in the control flow graph.
2458 // Nevertheless, we handle certain unreachable statements that are common
2459 // when modeling run-time bounds checks as being part of the loop to be
2460 // able to model them and to later eliminate the run-time bounds checks.
2462 // Specifically, for basic blocks that terminate in an unreachable and
2463 // where the immediate predecessor is part of a loop, we assume these
2464 // basic blocks belong to the loop the predecessor belongs to. This
2465 // allows us to model the following code.
2467 // for (i = 0; i < N; i++) {
2469 // abort(); <- this abort might be translated to an
2474 if (!L
&& isa
<UnreachableInst
>(BB
->getTerminator()) && BB
->getPrevNode())
2475 L
= LI
.getLoopFor(BB
->getPrevNode());
2479 Region
*NonAffineSubRegion
= RN
->getNodeAs
<Region
>();
2480 Loop
*L
= LI
.getLoopFor(NonAffineSubRegion
->getEntry());
2481 while (L
&& NonAffineSubRegion
->contains(L
))
2482 L
= L
->getParentLoop();
2486 /// Get the number of blocks in @p L.
2488 /// The number of blocks in a loop are the number of basic blocks actually
2489 /// belonging to the loop, as well as all single basic blocks that the loop
2490 /// exits to and which terminate in an unreachable instruction. We do not
2491 /// allow such basic blocks in the exit of a scop, hence they belong to the
2492 /// scop and represent run-time conditions which we want to model and
2493 /// subsequently speculate away.
2495 /// @see getRegionNodeLoop for additional details.
2496 unsigned getNumBlocksInLoop(Loop
*L
) {
2497 unsigned NumBlocks
= L
->getNumBlocks();
2498 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
2499 L
->getExitBlocks(ExitBlocks
);
2501 for (auto ExitBlock
: ExitBlocks
) {
2502 if (isa
<UnreachableInst
>(ExitBlock
->getTerminator()))
2508 static inline unsigned getNumBlocksInRegionNode(RegionNode
*RN
) {
2509 if (!RN
->isSubRegion())
2512 Region
*R
= RN
->getNodeAs
<Region
>();
2513 return std::distance(R
->block_begin(), R
->block_end());
2516 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
, LoopInfo
&LI
,
2517 const DominatorTree
&DT
) {
2518 if (!RN
->isSubRegion())
2519 return isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
, LI
, DT
);
2520 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
2521 if (isErrorBlock(*BB
, R
, LI
, DT
))
2528 static inline __isl_give isl_set
*addDomainDimId(__isl_take isl_set
*Domain
,
2529 unsigned Dim
, Loop
*L
) {
2530 Domain
= isl_set_lower_bound_si(Domain
, isl_dim_set
, Dim
, -1);
2532 isl_id_alloc(isl_set_get_ctx(Domain
), nullptr, static_cast<void *>(L
));
2533 return isl_set_set_dim_id(Domain
, isl_dim_set
, Dim
, DimId
);
2536 isl::set
Scop::getDomainConditions(const ScopStmt
*Stmt
) const {
2537 return getDomainConditions(Stmt
->getEntryBlock());
2540 isl::set
Scop::getDomainConditions(BasicBlock
*BB
) const {
2541 auto DIt
= DomainMap
.find(BB
);
2542 if (DIt
!= DomainMap
.end())
2543 return DIt
->getSecond();
2545 auto &RI
= *R
.getRegionInfo();
2546 auto *BBR
= RI
.getRegionFor(BB
);
2547 while (BBR
->getEntry() == BB
)
2548 BBR
= BBR
->getParent();
2549 return getDomainConditions(BBR
->getEntry());
2552 bool Scop::buildDomains(Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2553 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2554 bool IsOnlyNonAffineRegion
= isNonAffineSubRegion(R
);
2555 auto *EntryBB
= R
->getEntry();
2556 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
2557 int LD
= getRelativeLoopDepth(L
);
2558 auto *S
= isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD
+ 1));
2561 S
= addDomainDimId(S
, LD
+ 1, L
);
2562 L
= L
->getParentLoop();
2565 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
2566 DomainMap
[EntryBB
] = isl::manage(S
);
2568 if (IsOnlyNonAffineRegion
)
2569 return !containsErrorBlock(R
->getNode(), *R
, LI
, DT
);
2571 if (!buildDomainsWithBranchConstraints(R
, DT
, LI
, InvalidDomainMap
))
2574 if (!propagateDomainConstraints(R
, DT
, LI
, InvalidDomainMap
))
2577 // Error blocks and blocks dominated by them have been assumed to never be
2578 // executed. Representing them in the Scop does not add any value. In fact,
2579 // it is likely to cause issues during construction of the ScopStmts. The
2580 // contents of error blocks have not been verified to be expressible and
2581 // will cause problems when building up a ScopStmt for them.
2582 // Furthermore, basic blocks dominated by error blocks may reference
2583 // instructions in the error block which, if the error block is not modeled,
2584 // can themselves not be constructed properly. To this end we will replace
2585 // the domains of error blocks and those only reachable via error blocks
2586 // with an empty set. Additionally, we will record for each block under which
2587 // parameter combination it would be reached via an error block in its
2588 // InvalidDomain. This information is needed during load hoisting.
2589 if (!propagateInvalidStmtDomains(R
, DT
, LI
, InvalidDomainMap
))
2595 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2596 /// to be compatible to domains constructed for loop @p NewL.
2598 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2599 /// edge from @p OldL to @p NewL.
2600 static __isl_give isl_set
*adjustDomainDimensions(Scop
&S
,
2601 __isl_take isl_set
*Dom
,
2602 Loop
*OldL
, Loop
*NewL
) {
2603 // If the loops are the same there is nothing to do.
2607 int OldDepth
= S
.getRelativeLoopDepth(OldL
);
2608 int NewDepth
= S
.getRelativeLoopDepth(NewL
);
2609 // If both loops are non-affine loops there is nothing to do.
2610 if (OldDepth
== -1 && NewDepth
== -1)
2613 // Distinguish three cases:
2614 // 1) The depth is the same but the loops are not.
2615 // => One loop was left one was entered.
2616 // 2) The depth increased from OldL to NewL.
2617 // => One loop was entered, none was left.
2618 // 3) The depth decreased from OldL to NewL.
2619 // => Loops were left were difference of the depths defines how many.
2620 if (OldDepth
== NewDepth
) {
2621 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
2622 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NewDepth
, 1);
2623 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2624 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2625 } else if (OldDepth
< NewDepth
) {
2626 assert(OldDepth
+ 1 == NewDepth
);
2627 auto &R
= S
.getRegion();
2629 assert(NewL
->getParentLoop() == OldL
||
2630 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
2631 Dom
= isl_set_add_dims(Dom
, isl_dim_set
, 1);
2632 Dom
= addDomainDimId(Dom
, NewDepth
, NewL
);
2634 assert(OldDepth
> NewDepth
);
2635 int Diff
= OldDepth
- NewDepth
;
2636 int NumDim
= isl_set_n_dim(Dom
);
2637 assert(NumDim
>= Diff
);
2638 Dom
= isl_set_project_out(Dom
, isl_dim_set
, NumDim
- Diff
, Diff
);
2644 bool Scop::propagateInvalidStmtDomains(
2645 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2646 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2647 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2648 for (auto *RN
: RTraversal
) {
2650 // Recurse for affine subregions but go on for basic blocks and non-affine
2652 if (RN
->isSubRegion()) {
2653 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2654 if (!isNonAffineSubRegion(SubRegion
)) {
2655 propagateInvalidStmtDomains(SubRegion
, DT
, LI
, InvalidDomainMap
);
2660 bool ContainsErrorBlock
= containsErrorBlock(RN
, getRegion(), LI
, DT
);
2661 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2662 isl::set
&Domain
= DomainMap
[BB
];
2663 assert(Domain
&& "Cannot propagate a nullptr");
2665 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
2667 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
2669 if (!IsInvalidBlock
) {
2670 InvalidDomain
= InvalidDomain
.intersect(Domain
);
2672 InvalidDomain
= Domain
;
2673 isl::set DomPar
= Domain
.params();
2674 recordAssumption(ERRORBLOCK
, DomPar
, BB
->getTerminator()->getDebugLoc(),
2679 if (InvalidDomain
.is_empty()) {
2680 InvalidDomainMap
[BB
] = InvalidDomain
;
2684 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2685 auto *TI
= BB
->getTerminator();
2686 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
2687 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
2688 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2690 // Skip successors outside the SCoP.
2691 if (!contains(SuccBB
))
2695 if (DT
.dominates(SuccBB
, BB
))
2698 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2700 auto *AdjustedInvalidDomain
= adjustDomainDimensions(
2701 *this, InvalidDomain
.copy(), BBLoop
, SuccBBLoop
);
2703 auto *SuccInvalidDomain
= InvalidDomainMap
[SuccBB
].copy();
2705 isl_set_union(SuccInvalidDomain
, AdjustedInvalidDomain
);
2706 SuccInvalidDomain
= isl_set_coalesce(SuccInvalidDomain
);
2707 unsigned NumConjucts
= isl_set_n_basic_set(SuccInvalidDomain
);
2709 InvalidDomainMap
[SuccBB
] = isl::manage(SuccInvalidDomain
);
2711 // Check if the maximal number of domain disjunctions was reached.
2712 // In case this happens we will bail.
2713 if (NumConjucts
< MaxDisjunctsInDomain
)
2716 InvalidDomainMap
.erase(BB
);
2717 invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
2721 InvalidDomainMap
[BB
] = InvalidDomain
;
2727 void Scop::propagateDomainConstraintsToRegionExit(
2728 BasicBlock
*BB
, Loop
*BBLoop
,
2729 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
, LoopInfo
&LI
,
2730 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2731 // Check if the block @p BB is the entry of a region. If so we propagate it's
2732 // domain to the exit block of the region. Otherwise we are done.
2733 auto *RI
= R
.getRegionInfo();
2734 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
2735 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
2736 if (!BBReg
|| BBReg
->getEntry() != BB
|| !contains(ExitBB
))
2739 // Do not propagate the domain if there is a loop backedge inside the region
2740 // that would prevent the exit block from being executed.
2742 while (L
&& contains(L
)) {
2743 SmallVector
<BasicBlock
*, 4> LatchBBs
;
2744 BBLoop
->getLoopLatches(LatchBBs
);
2745 for (auto *LatchBB
: LatchBBs
)
2746 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
2748 L
= L
->getParentLoop();
2751 isl::set Domain
= DomainMap
[BB
];
2752 assert(Domain
&& "Cannot propagate a nullptr");
2754 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, getBoxedLoops());
2756 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2757 // adjust the domain before we can propagate it.
2758 isl::set AdjustedDomain
= isl::manage(
2759 adjustDomainDimensions(*this, Domain
.copy(), BBLoop
, ExitBBLoop
));
2760 isl::set
&ExitDomain
= DomainMap
[ExitBB
];
2762 // If the exit domain is not yet created we set it otherwise we "add" the
2764 ExitDomain
= ExitDomain
? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
2766 // Initialize the invalid domain.
2767 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
2769 FinishedExitBlocks
.insert(ExitBB
);
2772 bool Scop::buildDomainsWithBranchConstraints(
2773 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2774 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2775 // To create the domain for each block in R we iterate over all blocks and
2776 // subregions in R and propagate the conditions under which the current region
2777 // element is executed. To this end we iterate in reverse post order over R as
2778 // it ensures that we first visit all predecessors of a region node (either a
2779 // basic block or a subregion) before we visit the region node itself.
2780 // Initially, only the domain for the SCoP region entry block is set and from
2781 // there we propagate the current domain to all successors, however we add the
2782 // condition that the successor is actually executed next.
2783 // As we are only interested in non-loop carried constraints here we can
2784 // simply skip loop back edges.
2786 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
2787 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2788 for (auto *RN
: RTraversal
) {
2789 // Recurse for affine subregions but go on for basic blocks and non-affine
2791 if (RN
->isSubRegion()) {
2792 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2793 if (!isNonAffineSubRegion(SubRegion
)) {
2794 if (!buildDomainsWithBranchConstraints(SubRegion
, DT
, LI
,
2801 if (containsErrorBlock(RN
, getRegion(), LI
, DT
))
2802 HasErrorBlock
= true;
2804 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2805 TerminatorInst
*TI
= BB
->getTerminator();
2807 if (isa
<UnreachableInst
>(TI
))
2810 isl::set Domain
= DomainMap
.lookup(BB
);
2813 MaxLoopDepth
= std::max(MaxLoopDepth
, isl_set_n_dim(Domain
.get()));
2815 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
2816 // Propagate the domain from BB directly to blocks that have a superset
2817 // domain, at the moment only region exit nodes of regions that start in BB.
2818 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
, LI
,
2821 // If all successors of BB have been set a domain through the propagation
2822 // above we do not need to build condition sets but can just skip this
2823 // block. However, it is important to note that this is a local property
2824 // with regards to the region @p R. To this end FinishedExitBlocks is a
2826 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
2827 return FinishedExitBlocks
.count(SuccBB
);
2829 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
2832 // Build the condition sets for the successor nodes of the current region
2833 // node. If it is a non-affine subregion we will always execute the single
2834 // exit node, hence the single entry node domain is the condition set. For
2835 // basic blocks we use the helper function buildConditionSets.
2836 SmallVector
<isl_set
*, 8> ConditionSets
;
2837 if (RN
->isSubRegion())
2838 ConditionSets
.push_back(Domain
.copy());
2839 else if (!buildConditionSets(*this, BB
, TI
, BBLoop
, Domain
.get(),
2840 InvalidDomainMap
, ConditionSets
))
2843 // Now iterate over the successors and set their initial domain based on
2844 // their condition set. We skip back edges here and have to be careful when
2845 // we leave a loop not to keep constraints over a dimension that doesn't
2847 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
2848 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
2849 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
2850 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
2852 // Skip blocks outside the region.
2853 if (!contains(SuccBB
))
2856 // If we propagate the domain of some block to "SuccBB" we do not have to
2857 // adjust the domain.
2858 if (FinishedExitBlocks
.count(SuccBB
))
2862 if (DT
.dominates(SuccBB
, BB
))
2865 Loop
*SuccBBLoop
= getFirstNonBoxedLoopFor(SuccBB
, LI
, getBoxedLoops());
2867 CondSet
= isl::manage(
2868 adjustDomainDimensions(*this, CondSet
.copy(), BBLoop
, SuccBBLoop
));
2870 // Set the domain for the successor or merge it with an existing domain in
2871 // case there are multiple paths (without loop back edges) to the
2873 isl::set
&SuccDomain
= DomainMap
[SuccBB
];
2876 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
2878 // Initialize the invalid domain.
2879 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
2880 SuccDomain
= CondSet
;
2883 SuccDomain
= SuccDomain
.detect_equalities();
2885 // Check if the maximal number of domain disjunctions was reached.
2886 // In case this happens we will clean up and bail.
2887 if (isl_set_n_basic_set(SuccDomain
.get()) < MaxDisjunctsInDomain
)
2890 invalidate(COMPLEXITY
, DebugLoc());
2891 while (++u
< ConditionSets
.size())
2892 isl_set_free(ConditionSets
[u
]);
2900 isl::set
Scop::getPredecessorDomainConstraints(BasicBlock
*BB
, isl::set Domain
,
2903 // If @p BB is the ScopEntry we are done
2904 if (R
.getEntry() == BB
)
2905 return isl::set::universe(Domain
.get_space());
2907 // The region info of this function.
2908 auto &RI
= *R
.getRegionInfo();
2910 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, getBoxedLoops());
2912 // A domain to collect all predecessor domains, thus all conditions under
2913 // which the block is executed. To this end we start with the empty domain.
2914 isl::set PredDom
= isl::set::empty(Domain
.get_space());
2916 // Set of regions of which the entry block domain has been propagated to BB.
2917 // all predecessors inside any of the regions can be skipped.
2918 SmallSet
<Region
*, 8> PropagatedRegions
;
2920 for (auto *PredBB
: predecessors(BB
)) {
2922 if (DT
.dominates(BB
, PredBB
))
2925 // If the predecessor is in a region we used for propagation we can skip it.
2926 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
2927 if (std::any_of(PropagatedRegions
.begin(), PropagatedRegions
.end(),
2932 // Check if there is a valid region we can use for propagation, thus look
2933 // for a region that contains the predecessor and has @p BB as exit block.
2934 auto *PredR
= RI
.getRegionFor(PredBB
);
2935 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
2938 // If a valid region for propagation was found use the entry of that region
2939 // for propagation, otherwise the PredBB directly.
2940 if (PredR
->getExit() == BB
) {
2941 PredBB
= PredR
->getEntry();
2942 PropagatedRegions
.insert(PredR
);
2945 auto *PredBBDom
= getDomainConditions(PredBB
).release();
2946 Loop
*PredBBLoop
= getFirstNonBoxedLoopFor(PredBB
, LI
, getBoxedLoops());
2948 PredBBDom
= adjustDomainDimensions(*this, PredBBDom
, PredBBLoop
, BBLoop
);
2950 PredDom
= PredDom
.unite(isl::manage(PredBBDom
));
2956 bool Scop::propagateDomainConstraints(
2957 Region
*R
, DominatorTree
&DT
, LoopInfo
&LI
,
2958 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
2959 // Iterate over the region R and propagate the domain constrains from the
2960 // predecessors to the current node. In contrast to the
2961 // buildDomainsWithBranchConstraints function, this one will pull the domain
2962 // information from the predecessors instead of pushing it to the successors.
2963 // Additionally, we assume the domains to be already present in the domain
2964 // map here. However, we iterate again in reverse post order so we know all
2965 // predecessors have been visited before a block or non-affine subregion is
2968 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
2969 for (auto *RN
: RTraversal
) {
2970 // Recurse for affine subregions but go on for basic blocks and non-affine
2972 if (RN
->isSubRegion()) {
2973 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
2974 if (!isNonAffineSubRegion(SubRegion
)) {
2975 if (!propagateDomainConstraints(SubRegion
, DT
, LI
, InvalidDomainMap
))
2981 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
2982 isl::set
&Domain
= DomainMap
[BB
];
2985 // Under the union of all predecessor conditions we can reach this block.
2986 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
, DT
, LI
);
2987 Domain
= Domain
.intersect(PredDom
).coalesce();
2988 Domain
= Domain
.align_params(getParamSpace());
2990 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
2991 if (BBLoop
&& BBLoop
->getHeader() == BB
&& contains(BBLoop
))
2992 if (!addLoopBoundsToHeaderDomain(BBLoop
, LI
, InvalidDomainMap
))
2999 /// Create a map to map from a given iteration to a subsequent iteration.
3001 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3002 /// is incremented by one and all other dimensions are equal, e.g.,
3003 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3005 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3006 static __isl_give isl_map
*
3007 createNextIterationMap(__isl_take isl_space
*SetSpace
, unsigned Dim
) {
3008 auto *MapSpace
= isl_space_map_from_set(SetSpace
);
3009 auto *NextIterationMap
= isl_map_universe(isl_space_copy(MapSpace
));
3010 for (unsigned u
= 0; u
< isl_map_dim(NextIterationMap
, isl_dim_in
); u
++)
3013 isl_map_equate(NextIterationMap
, isl_dim_in
, u
, isl_dim_out
, u
);
3014 auto *C
= isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace
));
3015 C
= isl_constraint_set_constant_si(C
, 1);
3016 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, Dim
, 1);
3017 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, Dim
, -1);
3018 NextIterationMap
= isl_map_add_constraint(NextIterationMap
, C
);
3019 return NextIterationMap
;
3022 bool Scop::addLoopBoundsToHeaderDomain(
3023 Loop
*L
, LoopInfo
&LI
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
3024 int LoopDepth
= getRelativeLoopDepth(L
);
3025 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
3027 BasicBlock
*HeaderBB
= L
->getHeader();
3028 assert(DomainMap
.count(HeaderBB
));
3029 isl::set
&HeaderBBDom
= DomainMap
[HeaderBB
];
3031 isl::map NextIterationMap
= isl::manage(
3032 createNextIterationMap(HeaderBBDom
.get_space().release(), LoopDepth
));
3034 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
3036 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
3037 L
->getLoopLatches(LatchBlocks
);
3039 for (BasicBlock
*LatchBB
: LatchBlocks
) {
3040 // If the latch is only reachable via error statements we skip it.
3041 isl::set LatchBBDom
= DomainMap
.lookup(LatchBB
);
3045 isl::set BackedgeCondition
= nullptr;
3047 TerminatorInst
*TI
= LatchBB
->getTerminator();
3048 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
3049 assert(BI
&& "Only branch instructions allowed in loop latches");
3051 if (BI
->isUnconditional())
3052 BackedgeCondition
= LatchBBDom
;
3054 SmallVector
<isl_set
*, 8> ConditionSets
;
3055 int idx
= BI
->getSuccessor(0) != HeaderBB
;
3056 if (!buildConditionSets(*this, LatchBB
, TI
, L
, LatchBBDom
.get(),
3057 InvalidDomainMap
, ConditionSets
))
3060 // Free the non back edge condition set as we do not need it.
3061 isl_set_free(ConditionSets
[1 - idx
]);
3063 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
3066 int LatchLoopDepth
= getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
3067 assert(LatchLoopDepth
>= LoopDepth
);
3068 BackedgeCondition
= BackedgeCondition
.project_out(
3069 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
3070 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
3073 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
3074 for (int i
= 0; i
< LoopDepth
; i
++)
3075 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
3077 isl::set UnionBackedgeConditionComplement
=
3078 UnionBackedgeCondition
.complement();
3079 UnionBackedgeConditionComplement
=
3080 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
3082 UnionBackedgeConditionComplement
=
3083 UnionBackedgeConditionComplement
.apply(ForwardMap
);
3084 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
3085 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
3087 auto Parts
= partitionSetParts(HeaderBBDom
.copy(), LoopDepth
);
3088 HeaderBBDom
= isl::manage(Parts
.second
);
3090 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3091 // the bounded assumptions to the context as they are already implied by the
3093 if (Affinator
.hasNSWAddRecForLoop(L
)) {
3094 isl_set_free(Parts
.first
);
3098 isl::set UnboundedCtx
= isl::manage(Parts
.first
).params();
3099 recordAssumption(INFINITELOOP
, UnboundedCtx
,
3100 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
3104 MemoryAccess
*Scop::lookupBasePtrAccess(MemoryAccess
*MA
) {
3105 Value
*PointerBase
= MA
->getOriginalBaseAddr();
3107 auto *PointerBaseInst
= dyn_cast
<Instruction
>(PointerBase
);
3108 if (!PointerBaseInst
)
3111 auto *BasePtrStmt
= getStmtFor(PointerBaseInst
);
3115 return BasePtrStmt
->getArrayAccessOrNULLFor(PointerBaseInst
);
3118 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
3119 isl::union_map Writes
) {
3120 if (auto *BasePtrMA
= lookupBasePtrAccess(MA
)) {
3121 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
3124 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
3125 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
3126 if (!isa
<LoadInst
>(BasePtrInst
))
3127 return contains(BasePtrInst
);
3132 bool Scop::buildAliasChecks(AliasAnalysis
&AA
) {
3133 if (!PollyUseRuntimeAliasChecks
)
3136 if (buildAliasGroups(AA
)) {
3137 // Aliasing assumptions do not go through addAssumption but we still want to
3138 // collect statistics so we do it here explicitly.
3139 if (MinMaxAliasGroups
.size())
3140 AssumptionsAliasing
++;
3144 // If a problem occurs while building the alias groups we need to delete
3145 // this SCoP and pretend it wasn't valid in the first place. To this end
3146 // we make the assumed context infeasible.
3147 invalidate(ALIASING
, DebugLoc());
3149 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3150 << " could not be created as the number of parameters involved "
3151 "is too high. The SCoP will be "
3152 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3153 "the maximal number of parameters but be advised that the "
3154 "compile time might increase exponentially.\n\n");
3158 std::tuple
<Scop::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3159 Scop::buildAliasGroupsForAccesses(AliasAnalysis
&AA
) {
3160 AliasSetTracker
AST(AA
);
3162 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3163 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3164 for (ScopStmt
&Stmt
: *this) {
3166 isl_set
*StmtDomain
= Stmt
.getDomain().release();
3167 bool StmtDomainEmpty
= isl_set_is_empty(StmtDomain
);
3168 isl_set_free(StmtDomain
);
3170 // Statements with an empty domain will never be executed.
3171 if (StmtDomainEmpty
)
3174 for (MemoryAccess
*MA
: Stmt
) {
3175 if (MA
->isScalarKind())
3178 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3179 MemAccInst
Acc(MA
->getAccessInstruction());
3180 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3181 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3183 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3188 AliasGroupVectorTy AliasGroups
;
3189 for (AliasSet
&AS
: AST
) {
3190 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3194 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3197 AliasGroups
.push_back(std::move(AG
));
3200 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3203 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3204 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3206 AliasGroupTy
&AG
= AliasGroups
[u
];
3207 AliasGroupTy::iterator AGI
= AG
.begin();
3208 isl_set
*AGDomain
= getAccessDomain(*AGI
);
3209 while (AGI
!= AG
.end()) {
3210 MemoryAccess
*MA
= *AGI
;
3211 isl_set
*MADomain
= getAccessDomain(MA
);
3212 if (isl_set_is_disjoint(AGDomain
, MADomain
)) {
3213 NewAG
.push_back(MA
);
3214 AGI
= AG
.erase(AGI
);
3215 isl_set_free(MADomain
);
3217 AGDomain
= isl_set_union(AGDomain
, MADomain
);
3221 if (NewAG
.size() > 1)
3222 AliasGroups
.push_back(std::move(NewAG
));
3223 isl_set_free(AGDomain
);
3227 bool Scop::buildAliasGroups(AliasAnalysis
&AA
) {
3228 // To create sound alias checks we perform the following steps:
3229 // o) We partition each group into read only and non read only accesses.
3230 // o) For each group with more than one base pointer we then compute minimal
3231 // and maximal accesses to each array of a group in read only and non
3232 // read only partitions separately.
3233 AliasGroupVectorTy AliasGroups
;
3234 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3236 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses(AA
);
3238 splitAliasGroupsByDomain(AliasGroups
);
3240 for (AliasGroupTy
&AG
: AliasGroups
) {
3241 if (!hasFeasibleRuntimeContext())
3245 IslMaxOperationsGuard
MaxOpGuard(getIslCtx().get(), OptComputeOut
);
3246 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3250 if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota
) {
3251 invalidate(COMPLEXITY
, DebugLoc());
3259 bool Scop::buildAliasGroup(Scop::AliasGroupTy
&AliasGroup
,
3260 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3261 AliasGroupTy ReadOnlyAccesses
;
3262 AliasGroupTy ReadWriteAccesses
;
3263 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3264 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3266 if (AliasGroup
.size() < 2)
3269 for (MemoryAccess
*Access
: AliasGroup
) {
3270 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3271 Access
->getAccessInstruction())
3272 << "Possibly aliasing pointer, use restrict keyword.");
3273 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3274 if (HasWriteAccess
.count(Array
)) {
3275 ReadWriteArrays
.insert(Array
);
3276 ReadWriteAccesses
.push_back(Access
);
3278 ReadOnlyArrays
.insert(Array
);
3279 ReadOnlyAccesses
.push_back(Access
);
3283 // If there are no read-only pointers, and less than two read-write pointers,
3284 // no alias check is needed.
3285 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3288 // If there is no read-write pointer, no alias check is needed.
3289 if (ReadWriteArrays
.empty())
3292 // For non-affine accesses, no alias check can be generated as we cannot
3293 // compute a sufficiently tight lower and upper bound: bail out.
3294 for (MemoryAccess
*MA
: AliasGroup
) {
3295 if (!MA
->isAffine()) {
3296 invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3297 MA
->getAccessInstruction()->getParent());
3302 // Ensure that for all memory accesses for which we generate alias checks,
3303 // their base pointers are available.
3304 for (MemoryAccess
*MA
: AliasGroup
) {
3305 if (MemoryAccess
*BasePtrMA
= lookupBasePtrAccess(MA
))
3306 addRequiredInvariantLoad(
3307 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3310 MinMaxAliasGroups
.emplace_back();
3311 MinMaxVectorPairTy
&pair
= MinMaxAliasGroups
.back();
3312 MinMaxVectorTy
&MinMaxAccessesReadWrite
= pair
.first
;
3313 MinMaxVectorTy
&MinMaxAccessesReadOnly
= pair
.second
;
3318 calculateMinMaxAccess(ReadWriteAccesses
, *this, MinMaxAccessesReadWrite
);
3323 // Bail out if the number of values we need to compare is too large.
3324 // This is important as the number of comparisons grows quadratically with
3325 // the number of values we need to compare.
3326 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3327 RunTimeChecksMaxArraysPerGroup
)
3331 calculateMinMaxAccess(ReadOnlyAccesses
, *this, MinMaxAccessesReadOnly
);
3339 /// Get the smallest loop that contains @p S but is not in @p S.
3340 static Loop
*getLoopSurroundingScop(Scop
&S
, LoopInfo
&LI
) {
3341 // Start with the smallest loop containing the entry and expand that
3342 // loop until it contains all blocks in the region. If there is a loop
3343 // containing all blocks in the region check if it is itself contained
3344 // and if so take the parent loop as it will be the smallest containing
3345 // the region but not contained by it.
3346 Loop
*L
= LI
.getLoopFor(S
.getEntry());
3348 bool AllContained
= true;
3349 for (auto *BB
: S
.blocks())
3350 AllContained
&= L
->contains(BB
);
3353 L
= L
->getParentLoop();
3356 return L
? (S
.contains(L
) ? L
->getParentLoop() : L
) : nullptr;
3359 int Scop::NextScopID
= 0;
3361 std::string
Scop::CurrentFunc
;
3363 int Scop::getNextID(std::string ParentFunc
) {
3364 if (ParentFunc
!= CurrentFunc
) {
3365 CurrentFunc
= ParentFunc
;
3368 return NextScopID
++;
3371 Scop::Scop(Region
&R
, ScalarEvolution
&ScalarEvolution
, LoopInfo
&LI
,
3372 DominatorTree
&DT
, ScopDetection::DetectionContext
&DC
,
3373 OptimizationRemarkEmitter
&ORE
)
3374 : IslCtx(isl_ctx_alloc(), isl_ctx_free
), SE(&ScalarEvolution
), DT(&DT
),
3375 R(R
), name(R
.getNameStr()), HasSingleExitEdge(R
.getExitingBlock()),
3376 DC(DC
), ORE(ORE
), Affinator(this, LI
),
3377 ID(getNextID((*R
.getEntry()->getParent()).getName().str())) {
3378 if (IslOnErrorAbort
)
3379 isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT
);
3383 Scop::~Scop() = default;
3385 void Scop::foldSizeConstantsToRight() {
3386 isl_union_set
*Accessed
= isl_union_map_range(getAccesses().release());
3388 for (auto Array
: arrays()) {
3389 if (Array
->getNumberOfDimensions() <= 1)
3392 isl_space
*Space
= Array
->getSpace().release();
3394 Space
= isl_space_align_params(Space
, isl_union_set_get_space(Accessed
));
3396 if (!isl_union_set_contains(Accessed
, Space
)) {
3397 isl_space_free(Space
);
3401 isl_set
*Elements
= isl_union_set_extract_set(Accessed
, Space
);
3403 isl_map
*Transform
=
3404 isl_map_universe(isl_space_map_from_set(Array
->getSpace().release()));
3406 std::vector
<int> Int
;
3408 int Dims
= isl_set_dim(Elements
, isl_dim_set
);
3409 for (int i
= 0; i
< Dims
; i
++) {
3411 isl_set_project_out(isl_set_copy(Elements
), isl_dim_set
, 0, i
);
3412 DimOnly
= isl_set_project_out(DimOnly
, isl_dim_set
, 1, Dims
- i
- 1);
3413 DimOnly
= isl_set_lower_bound_si(DimOnly
, isl_dim_set
, 0, 0);
3415 isl_basic_set
*DimHull
= isl_set_affine_hull(DimOnly
);
3417 if (i
== Dims
- 1) {
3419 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3420 isl_basic_set_free(DimHull
);
3424 if (isl_basic_set_dim(DimHull
, isl_dim_div
) == 1) {
3425 isl_aff
*Diff
= isl_basic_set_get_div(DimHull
, 0);
3426 isl_val
*Val
= isl_aff_get_denominator_val(Diff
);
3431 if (isl_val_is_int(Val
))
3432 ValInt
= isl_val_get_num_si(Val
);
3435 Int
.push_back(ValInt
);
3437 isl_constraint
*C
= isl_constraint_alloc_equality(
3438 isl_local_space_from_space(isl_map_get_space(Transform
)));
3439 C
= isl_constraint_set_coefficient_si(C
, isl_dim_out
, i
, ValInt
);
3440 C
= isl_constraint_set_coefficient_si(C
, isl_dim_in
, i
, -1);
3441 Transform
= isl_map_add_constraint(Transform
, C
);
3442 isl_basic_set_free(DimHull
);
3446 isl_basic_set
*ZeroSet
= isl_basic_set_copy(DimHull
);
3447 ZeroSet
= isl_basic_set_fix_si(ZeroSet
, isl_dim_set
, 0, 0);
3450 if (isl_basic_set_is_equal(ZeroSet
, DimHull
)) {
3454 Int
.push_back(ValInt
);
3455 Transform
= isl_map_equate(Transform
, isl_dim_in
, i
, isl_dim_out
, i
);
3456 isl_basic_set_free(DimHull
);
3457 isl_basic_set_free(ZeroSet
);
3460 isl_set
*MappedElements
= isl_map_domain(isl_map_copy(Transform
));
3462 if (!isl_set_is_subset(Elements
, MappedElements
)) {
3463 isl_set_free(Elements
);
3464 isl_set_free(MappedElements
);
3465 isl_map_free(Transform
);
3469 isl_set_free(MappedElements
);
3471 bool CanFold
= true;
3476 unsigned NumDims
= Array
->getNumberOfDimensions();
3477 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
3478 if (Int
[0] != Int
[i
] && Int
[i
])
3482 isl_set_free(Elements
);
3483 isl_map_free(Transform
);
3487 for (auto &Access
: AccessFunctions
)
3488 if (Access
->getScopArrayInfo() == Array
)
3489 Access
->setAccessRelation(Access
->getAccessRelation().apply_range(
3490 isl::manage(isl_map_copy(Transform
))));
3492 isl_map_free(Transform
);
3494 std::vector
<const SCEV
*> Sizes
;
3495 for (unsigned i
= 0; i
< NumDims
; i
++) {
3496 auto Size
= Array
->getDimensionSize(i
);
3498 if (i
== NumDims
- 1)
3499 Size
= SE
->getMulExpr(Size
, SE
->getConstant(Size
->getType(), Int
[0]));
3500 Sizes
.push_back(Size
);
3503 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
3505 isl_set_free(Elements
);
3507 isl_union_set_free(Accessed
);
3510 void Scop::markFortranArrays() {
3511 for (ScopStmt
&Stmt
: Stmts
) {
3512 for (MemoryAccess
*MemAcc
: Stmt
) {
3513 Value
*FAD
= MemAcc
->getFortranArrayDescriptor();
3517 // TODO: const_cast-ing to edit
3518 ScopArrayInfo
*SAI
=
3519 const_cast<ScopArrayInfo
*>(MemAcc
->getLatestScopArrayInfo());
3520 assert(SAI
&& "memory access into a Fortran array does not "
3521 "have an associated ScopArrayInfo");
3522 SAI
->applyAndSetFAD(FAD
);
3527 void Scop::finalizeAccesses() {
3528 updateAccessDimensionality();
3529 foldSizeConstantsToRight();
3530 foldAccessRelations();
3531 assumeNoOutOfBounds();
3532 markFortranArrays();
3535 void Scop::updateAccessDimensionality() {
3536 // Check all array accesses for each base pointer and find a (virtual) element
3537 // size for the base pointer that divides all access functions.
3538 for (ScopStmt
&Stmt
: *this)
3539 for (MemoryAccess
*Access
: Stmt
) {
3540 if (!Access
->isArrayKind())
3542 ScopArrayInfo
*Array
=
3543 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
3545 if (Array
->getNumberOfDimensions() != 1)
3547 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
3548 const SCEV
*Subscript
= Access
->getSubscript(0);
3549 while (!isDivisible(Subscript
, DivisibleSize
, *SE
))
3551 auto *Ty
= IntegerType::get(SE
->getContext(), DivisibleSize
* 8);
3552 Array
->updateElementType(Ty
);
3555 for (auto &Stmt
: *this)
3556 for (auto &Access
: Stmt
)
3557 Access
->updateDimensionality();
3560 void Scop::foldAccessRelations() {
3561 for (auto &Stmt
: *this)
3562 for (auto &Access
: Stmt
)
3563 Access
->foldAccessRelation();
3566 void Scop::assumeNoOutOfBounds() {
3567 for (auto &Stmt
: *this)
3568 for (auto &Access
: Stmt
)
3569 Access
->assumeNoOutOfBound();
3572 void Scop::removeFromStmtMap(ScopStmt
&Stmt
) {
3573 for (Instruction
*Inst
: Stmt
.getInstructions())
3574 InstStmtMap
.erase(Inst
);
3576 if (Stmt
.isRegionStmt()) {
3577 for (BasicBlock
*BB
: Stmt
.getRegion()->blocks()) {
3579 // Skip entry basic block, as its instructions are already deleted as
3580 // part of the statement's instruction list.
3581 if (BB
== Stmt
.getEntryBlock())
3583 for (Instruction
&Inst
: *BB
)
3584 InstStmtMap
.erase(&Inst
);
3587 auto StmtMapIt
= StmtMap
.find(Stmt
.getBasicBlock());
3588 if (StmtMapIt
!= StmtMap
.end())
3589 StmtMapIt
->second
.erase(std::remove(StmtMapIt
->second
.begin(),
3590 StmtMapIt
->second
.end(), &Stmt
),
3591 StmtMapIt
->second
.end());
3592 for (Instruction
*Inst
: Stmt
.getInstructions())
3593 InstStmtMap
.erase(Inst
);
3597 void Scop::removeStmts(std::function
<bool(ScopStmt
&)> ShouldDelete
) {
3598 for (auto StmtIt
= Stmts
.begin(), StmtEnd
= Stmts
.end(); StmtIt
!= StmtEnd
;) {
3599 if (!ShouldDelete(*StmtIt
)) {
3604 removeFromStmtMap(*StmtIt
);
3605 StmtIt
= Stmts
.erase(StmtIt
);
3609 void Scop::removeStmtNotInDomainMap() {
3610 auto ShouldDelete
= [this](ScopStmt
&Stmt
) -> bool {
3611 return !this->DomainMap
.lookup(Stmt
.getEntryBlock());
3613 removeStmts(ShouldDelete
);
3616 void Scop::simplifySCoP(bool AfterHoisting
) {
3617 auto ShouldDelete
= [AfterHoisting
](ScopStmt
&Stmt
) -> bool {
3618 bool RemoveStmt
= Stmt
.isEmpty();
3620 // Remove read only statements only after invariant load hoisting.
3621 if (!RemoveStmt
&& AfterHoisting
) {
3622 bool OnlyRead
= true;
3623 for (MemoryAccess
*MA
: Stmt
) {
3631 RemoveStmt
= OnlyRead
;
3636 removeStmts(ShouldDelete
);
3639 InvariantEquivClassTy
*Scop::lookupInvariantEquivClass(Value
*Val
) {
3640 LoadInst
*LInst
= dyn_cast
<LoadInst
>(Val
);
3644 if (Value
*Rep
= InvEquivClassVMap
.lookup(LInst
))
3645 LInst
= cast
<LoadInst
>(Rep
);
3647 Type
*Ty
= LInst
->getType();
3648 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3649 for (auto &IAClass
: InvariantEquivClasses
) {
3650 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3653 auto &MAs
= IAClass
.InvariantAccesses
;
3654 for (auto *MA
: MAs
)
3655 if (MA
->getAccessInstruction() == Val
)
3662 bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
3663 for (const llvm::Argument
&Arg
: F
.args())
3664 if (&Arg
== maybeParam
)
3670 bool Scop::canAlwaysBeHoisted(MemoryAccess
*MA
, bool StmtInvalidCtxIsEmpty
,
3671 bool MAInvalidCtxIsEmpty
,
3672 bool NonHoistableCtxIsEmpty
) {
3673 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3674 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
3675 if (PollyAllowDereferenceOfAllFunctionParams
&&
3676 isAParameter(LInst
->getPointerOperand(), getFunction()))
3679 // TODO: We can provide more information for better but more expensive
3681 if (!isDereferenceableAndAlignedPointer(LInst
->getPointerOperand(),
3682 LInst
->getAlignment(), DL
))
3685 // If the location might be overwritten we do not hoist it unconditionally.
3687 // TODO: This is probably too conservative.
3688 if (!NonHoistableCtxIsEmpty
)
3691 // If a dereferenceable load is in a statement that is modeled precisely we
3693 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3696 // Even if the statement is not modeled precisely we can hoist the load if it
3697 // does not involve any parameters that might have been specialized by the
3698 // statement domain.
3699 for (unsigned u
= 0, e
= MA
->getNumSubscripts(); u
< e
; u
++)
3700 if (!isa
<SCEVConstant
>(MA
->getSubscript(u
)))
3705 void Scop::addInvariantLoads(ScopStmt
&Stmt
, InvariantAccessesTy
&InvMAs
) {
3709 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3710 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3712 // Get the context under which the statement is executed but remove the error
3713 // context under which this statement is reached.
3714 isl::set DomainCtx
= Stmt
.getDomain().params();
3715 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3717 if (isl_set_n_basic_set(DomainCtx
.get()) >= MaxDisjunctsInDomain
) {
3718 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3719 invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3723 // Project out all parameters that relate to loads in the statement. Otherwise
3724 // we could have cyclic dependences on the constraints under which the
3725 // hoisted loads are executed and we could not determine an order in which to
3726 // pre-load them. This happens because not only lower bounds are part of the
3727 // domain but also upper bounds.
3728 for (auto &InvMA
: InvMAs
) {
3729 auto *MA
= InvMA
.MA
;
3730 Instruction
*AccInst
= MA
->getAccessInstruction();
3731 if (SE
->isSCEVable(AccInst
->getType())) {
3732 SetVector
<Value
*> Values
;
3733 for (const SCEV
*Parameter
: Parameters
) {
3735 findValues(Parameter
, *SE
, Values
);
3736 if (!Values
.count(AccInst
))
3739 if (isl::id ParamId
= getIdForParam(Parameter
)) {
3740 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3742 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3748 for (auto &InvMA
: InvMAs
) {
3749 auto *MA
= InvMA
.MA
;
3750 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3752 // Check for another invariant access that accesses the same location as
3753 // MA and if found consolidate them. Otherwise create a new equivalence
3754 // class at the end of InvariantEquivClasses.
3755 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3756 Type
*Ty
= LInst
->getType();
3757 const SCEV
*PointerSCEV
= SE
->getSCEV(LInst
->getPointerOperand());
3759 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3760 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3761 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3764 // Check if we know that this pointer can be speculatively accessed.
3765 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3766 NonHoistableCtxIsEmpty
)) {
3767 MACtx
= isl::set::universe(DomainCtx
.get_space());
3770 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3771 MACtx
= MACtx
.gist_params(getContext());
3774 bool Consolidated
= false;
3775 for (auto &IAClass
: InvariantEquivClasses
) {
3776 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3779 // If the pointer and the type is equal check if the access function wrt.
3780 // to the domain is equal too. It can happen that the domain fixes
3781 // parameter values and these can be different for distinct part of the
3782 // SCoP. If this happens we cannot consolidate the loads but need to
3783 // create a new invariant load equivalence class.
3784 auto &MAs
= IAClass
.InvariantAccesses
;
3786 auto *LastMA
= MAs
.front();
3788 isl::set AR
= MA
->getAccessRelation().range();
3789 isl::set LastAR
= LastMA
->getAccessRelation().range();
3790 bool SameAR
= AR
.is_equal(LastAR
);
3796 // Add MA to the list of accesses that are in this class.
3799 Consolidated
= true;
3801 // Unify the execution context of the class and this statement.
3802 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
3803 if (IAClassDomainCtx
)
3804 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
3806 IAClassDomainCtx
= MACtx
;
3807 IAClass
.ExecutionContext
= IAClassDomainCtx
;
3814 // If we did not consolidate MA, thus did not find an equivalence class
3815 // for it, we create a new one.
3816 InvariantEquivClasses
.emplace_back(
3817 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3821 /// Check if an access range is too complex.
3823 /// An access range is too complex, if it contains either many disjuncts or
3824 /// very complex expressions. As a simple heuristic, we assume if a set to
3825 /// be too complex if the sum of existentially quantified dimensions and
3826 /// set dimensions is larger than a threshold. This reliably detects both
3827 /// sets with many disjuncts as well as sets with many divisions as they
3830 /// @param AccessRange The range to check for complexity.
3832 /// @returns True if the access range is too complex.
3833 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
3834 unsigned NumTotalDims
= 0;
3836 auto CountDimensions
= [&NumTotalDims
](isl::basic_set BSet
) -> isl::stat
{
3837 NumTotalDims
+= BSet
.dim(isl::dim::div
);
3838 NumTotalDims
+= BSet
.dim(isl::dim::set
);
3839 return isl::stat::ok
;
3842 AccessRange
.foreach_basic_set(CountDimensions
);
3844 if (NumTotalDims
> MaxDimensionsInAccessRange
)
3850 isl::set
Scop::getNonHoistableCtx(MemoryAccess
*Access
, isl::union_map Writes
) {
3851 // TODO: Loads that are not loop carried, hence are in a statement with
3852 // zero iterators, are by construction invariant, though we
3853 // currently "hoist" them anyway. This is necessary because we allow
3854 // them to be treated as parameters (e.g., in conditions) and our code
3855 // generation would otherwise use the old value.
3857 auto &Stmt
= *Access
->getStatement();
3858 BasicBlock
*BB
= Stmt
.getEntryBlock();
3860 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
3861 Access
->isMemoryIntrinsic())
3864 // Skip accesses that have an invariant base pointer which is defined but
3865 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3866 // returns a pointer that is used as a base address. However, as we want
3867 // to hoist indirect pointers, we allow the base pointer to be defined in
3868 // the region if it is also a memory access. Each ScopArrayInfo object
3869 // that has a base pointer origin has a base pointer that is loaded and
3870 // that it is invariant, thus it will be hoisted too. However, if there is
3871 // no base pointer origin we check that the base pointer is defined
3872 // outside the region.
3873 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
3874 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
3877 isl::map AccessRelation
= give(Access
->getAccessRelation().release());
3878 assert(!AccessRelation
.is_empty());
3880 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
3883 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
3884 isl::set SafeToLoad
;
3886 auto &DL
= getFunction().getParent()->getDataLayout();
3887 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getAlignment(),
3889 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
3890 } else if (BB
!= LI
->getParent()) {
3891 // Skip accesses in non-affine subregions as they might not be executed
3892 // under the same condition as the entry of the non-affine subregion.
3895 SafeToLoad
= AccessRelation
.range();
3898 if (isAccessRangeTooComplex(AccessRelation
.range()))
3901 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
3902 isl::set WrittenCtx
= Written
.params();
3903 bool IsWritten
= !WrittenCtx
.is_empty();
3908 WrittenCtx
= WrittenCtx
.remove_divs();
3910 isl_set_n_basic_set(WrittenCtx
.get()) >= MaxDisjunctsInDomain
;
3911 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
3914 addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(), AS_RESTRICTION
,
3919 void Scop::verifyInvariantLoads() {
3920 auto &RIL
= getRequiredInvariantLoads();
3921 for (LoadInst
*LI
: RIL
) {
3922 assert(LI
&& contains(LI
));
3923 // If there exists a statement in the scop which has a memory access for
3924 // @p LI, then mark this scop as infeasible for optimization.
3925 for (ScopStmt
&Stmt
: Stmts
)
3926 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
3927 invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
3933 void Scop::hoistInvariantLoads() {
3934 if (!PollyInvariantLoadHoisting
)
3937 isl::union_map Writes
= getWrites();
3938 for (ScopStmt
&Stmt
: *this) {
3939 InvariantAccessesTy InvariantAccesses
;
3941 for (MemoryAccess
*Access
: Stmt
)
3942 if (isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
))
3943 InvariantAccesses
.push_back({Access
, NHCtx
});
3945 // Transfer the memory access from the statement to the SCoP.
3946 for (auto InvMA
: InvariantAccesses
)
3947 Stmt
.removeMemoryAccess(InvMA
.MA
);
3948 addInvariantLoads(Stmt
, InvariantAccesses
);
3952 /// Find the canonical scop array info object for a set of invariant load
3953 /// hoisted loads. The canonical array is the one that corresponds to the
3954 /// first load in the list of accesses which is used as base pointer of a
3956 static const ScopArrayInfo
*findCanonicalArray(Scop
*S
,
3957 MemoryAccessList
&Accesses
) {
3958 for (MemoryAccess
*Access
: Accesses
) {
3959 const ScopArrayInfo
*CanonicalArray
= S
->getScopArrayInfoOrNull(
3960 Access
->getAccessInstruction(), MemoryKind::Array
);
3962 return CanonicalArray
;
3967 /// Check if @p Array severs as base array in an invariant load.
3968 static bool isUsedForIndirectHoistedLoad(Scop
*S
, const ScopArrayInfo
*Array
) {
3969 for (InvariantEquivClassTy
&EqClass2
: S
->getInvariantAccesses())
3970 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3971 if (Access2
->getScopArrayInfo() == Array
)
3976 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3977 /// with a reference to @p New.
3978 static void replaceBasePtrArrays(Scop
*S
, const ScopArrayInfo
*Old
,
3979 const ScopArrayInfo
*New
) {
3980 for (ScopStmt
&Stmt
: *S
)
3981 for (MemoryAccess
*Access
: Stmt
) {
3982 if (Access
->getLatestScopArrayInfo() != Old
)
3985 isl::id Id
= New
->getBasePtrId();
3986 isl::map Map
= Access
->getAccessRelation();
3987 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3988 Access
->setAccessRelation(Map
);
3992 void Scop::canonicalizeDynamicBasePtrs() {
3993 for (InvariantEquivClassTy
&EqClass
: InvariantEquivClasses
) {
3994 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3996 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3997 findCanonicalArray(this, BasePtrAccesses
);
3999 if (!CanonicalBasePtrSAI
)
4002 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
4003 const ScopArrayInfo
*BasePtrSAI
= getScopArrayInfoOrNull(
4004 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
4005 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
4006 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
4009 // we currently do not canonicalize arrays where some accesses are
4010 // hoisted as invariant loads. If we would, we need to update the access
4011 // function of the invariant loads as well. However, as this is not a
4012 // very common situation, we leave this for now to avoid further
4013 // complexity increases.
4014 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI
))
4017 replaceBasePtrArrays(this, BasePtrSAI
, CanonicalBasePtrSAI
);
4022 ScopArrayInfo
*Scop::getOrCreateScopArrayInfo(Value
*BasePtr
, Type
*ElementType
,
4023 ArrayRef
<const SCEV
*> Sizes
,
4025 const char *BaseName
) {
4026 assert((BasePtr
|| BaseName
) &&
4027 "BasePtr and BaseName can not be nullptr at the same time.");
4028 assert(!(BasePtr
&& BaseName
) && "BaseName is redundant.");
4029 auto &SAI
= BasePtr
? ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)]
4030 : ScopArrayNameMap
[BaseName
];
4032 auto &DL
= getFunction().getParent()->getDataLayout();
4033 SAI
.reset(new ScopArrayInfo(BasePtr
, ElementType
, getIslCtx(), Sizes
, Kind
,
4034 DL
, this, BaseName
));
4035 ScopArrayInfoSet
.insert(SAI
.get());
4037 SAI
->updateElementType(ElementType
);
4038 // In case of mismatching array sizes, we bail out by setting the run-time
4039 // context to false.
4040 if (!SAI
->updateSizes(Sizes
))
4041 invalidate(DELINEARIZATION
, DebugLoc());
4046 ScopArrayInfo
*Scop::createScopArrayInfo(Type
*ElementType
,
4047 const std::string
&BaseName
,
4048 const std::vector
<unsigned> &Sizes
) {
4049 auto *DimSizeType
= Type::getInt64Ty(getSE()->getContext());
4050 std::vector
<const SCEV
*> SCEVSizes
;
4052 for (auto size
: Sizes
)
4054 SCEVSizes
.push_back(getSE()->getConstant(DimSizeType
, size
, false));
4056 SCEVSizes
.push_back(nullptr);
4058 auto *SAI
= getOrCreateScopArrayInfo(nullptr, ElementType
, SCEVSizes
,
4059 MemoryKind::Array
, BaseName
.c_str());
4063 const ScopArrayInfo
*Scop::getScopArrayInfoOrNull(Value
*BasePtr
,
4065 auto *SAI
= ScopArrayInfoMap
[std::make_pair(BasePtr
, Kind
)].get();
4069 const ScopArrayInfo
*Scop::getScopArrayInfo(Value
*BasePtr
, MemoryKind Kind
) {
4070 auto *SAI
= getScopArrayInfoOrNull(BasePtr
, Kind
);
4071 assert(SAI
&& "No ScopArrayInfo available for this base pointer");
4075 std::string
Scop::getContextStr() const { return getContext().to_str(); }
4077 std::string
Scop::getAssumedContextStr() const {
4078 assert(AssumedContext
&& "Assumed context not yet built");
4079 return AssumedContext
.to_str();
4082 std::string
Scop::getInvalidContextStr() const {
4083 return InvalidContext
.to_str();
4086 std::string
Scop::getNameStr() const {
4087 std::string ExitName
, EntryName
;
4088 std::tie(EntryName
, ExitName
) = getEntryExitStr();
4089 return EntryName
+ "---" + ExitName
;
4092 std::pair
<std::string
, std::string
> Scop::getEntryExitStr() const {
4093 std::string ExitName
, EntryName
;
4094 raw_string_ostream
ExitStr(ExitName
);
4095 raw_string_ostream
EntryStr(EntryName
);
4097 R
.getEntry()->printAsOperand(EntryStr
, false);
4101 R
.getExit()->printAsOperand(ExitStr
, false);
4104 ExitName
= "FunctionExit";
4106 return std::make_pair(EntryName
, ExitName
);
4109 isl::set
Scop::getContext() const { return Context
; }
4110 isl::space
Scop::getParamSpace() const { return getContext().get_space(); }
4112 isl::space
Scop::getFullParamSpace() const {
4113 std::vector
<isl::id
> FortranIDs
;
4114 FortranIDs
= getFortranArrayIds(arrays());
4116 isl::space Space
= isl::space::params_alloc(
4117 getIslCtx(), ParameterIds
.size() + FortranIDs
.size());
4120 for (const SCEV
*Parameter
: Parameters
) {
4121 isl::id Id
= getIdForParam(Parameter
);
4122 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4125 for (isl::id Id
: FortranIDs
)
4126 Space
= Space
.set_dim_id(isl::dim::param
, PDim
++, Id
);
4131 isl::set
Scop::getAssumedContext() const {
4132 assert(AssumedContext
&& "Assumed context not yet built");
4133 return AssumedContext
;
4136 bool Scop::isProfitable(bool ScalarsAreUnprofitable
) const {
4137 if (PollyProcessUnprofitable
)
4143 unsigned OptimizableStmtsOrLoops
= 0;
4144 for (auto &Stmt
: *this) {
4145 if (Stmt
.getNumIterators() == 0)
4148 bool ContainsArrayAccs
= false;
4149 bool ContainsScalarAccs
= false;
4150 for (auto *MA
: Stmt
) {
4153 ContainsArrayAccs
|= MA
->isLatestArrayKind();
4154 ContainsScalarAccs
|= MA
->isLatestScalarKind();
4157 if (!ScalarsAreUnprofitable
|| (ContainsArrayAccs
&& !ContainsScalarAccs
))
4158 OptimizableStmtsOrLoops
+= Stmt
.getNumIterators();
4161 return OptimizableStmtsOrLoops
> 1;
4164 bool Scop::hasFeasibleRuntimeContext() const {
4165 auto PositiveContext
= getAssumedContext();
4166 auto NegativeContext
= getInvalidContext();
4167 PositiveContext
= addNonEmptyDomainConstraints(PositiveContext
);
4168 // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain
4169 if (!PositiveContext
)
4172 bool IsFeasible
= !(PositiveContext
.is_empty() ||
4173 PositiveContext
.is_subset(NegativeContext
));
4177 auto DomainContext
= getDomains().params();
4178 IsFeasible
= !DomainContext
.is_subset(NegativeContext
);
4179 IsFeasible
&= !Context
.is_subset(NegativeContext
);
4184 static std::string
toString(AssumptionKind Kind
) {
4187 return "No-aliasing";
4191 return "No-overflows";
4193 return "Signed-unsigned";
4195 return "Low complexity";
4197 return "Profitable";
4201 return "Finite loop";
4203 return "Invariant load";
4204 case DELINEARIZATION
:
4205 return "Delinearization";
4207 llvm_unreachable("Unknown AssumptionKind!");
4210 bool Scop::isEffectiveAssumption(isl::set Set
, AssumptionSign Sign
) {
4211 if (Sign
== AS_ASSUMPTION
) {
4212 if (Context
.is_subset(Set
))
4215 if (AssumedContext
.is_subset(Set
))
4218 if (Set
.is_disjoint(Context
))
4221 if (Set
.is_subset(InvalidContext
))
4227 bool Scop::trackAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4228 AssumptionSign Sign
, BasicBlock
*BB
) {
4229 if (PollyRemarksMinimal
&& !isEffectiveAssumption(Set
, Sign
))
4232 // Do never emit trivial assumptions as they only clutter the output.
4233 if (!PollyRemarksMinimal
) {
4235 if (Sign
== AS_ASSUMPTION
)
4236 Univ
= isl::set::universe(Set
.get_space());
4238 bool IsTrivial
= (Sign
== AS_RESTRICTION
&& Set
.is_empty()) ||
4239 (Sign
== AS_ASSUMPTION
&& Univ
.is_equal(Set
));
4247 AssumptionsAliasing
++;
4250 AssumptionsInbounds
++;
4253 AssumptionsWrapping
++;
4256 AssumptionsUnsigned
++;
4259 AssumptionsComplexity
++;
4262 AssumptionsUnprofitable
++;
4265 AssumptionsErrorBlock
++;
4268 AssumptionsInfiniteLoop
++;
4271 AssumptionsInvariantLoad
++;
4273 case DELINEARIZATION
:
4274 AssumptionsDelinearization
++;
4278 auto Suffix
= Sign
== AS_ASSUMPTION
? " assumption:\t" : " restriction:\t";
4279 std::string Msg
= toString(Kind
) + Suffix
+ Set
.to_str();
4281 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
, BB
)
4284 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "AssumpRestrict", Loc
,
4290 void Scop::addAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4291 AssumptionSign Sign
, BasicBlock
*BB
) {
4292 // Simplify the assumptions/restrictions first.
4293 Set
= Set
.gist_params(getContext());
4295 if (!trackAssumption(Kind
, Set
, Loc
, Sign
, BB
))
4298 if (Sign
== AS_ASSUMPTION
)
4299 AssumedContext
= AssumedContext
.intersect(Set
).coalesce();
4301 InvalidContext
= InvalidContext
.unite(Set
).coalesce();
4304 void Scop::recordAssumption(AssumptionKind Kind
, isl::set Set
, DebugLoc Loc
,
4305 AssumptionSign Sign
, BasicBlock
*BB
) {
4306 assert((Set
.is_params() || BB
) &&
4307 "Assumptions without a basic block must be parameter sets");
4308 RecordedAssumptions
.push_back({Kind
, Sign
, Set
, Loc
, BB
});
4311 void Scop::addRecordedAssumptions() {
4312 while (!RecordedAssumptions
.empty()) {
4313 Assumption AS
= RecordedAssumptions
.pop_back_val();
4316 addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
, nullptr /* BasicBlock */);
4320 // If the domain was deleted the assumptions are void.
4321 isl_set
*Dom
= getDomainConditions(AS
.BB
).release();
4325 // If a basic block was given use its domain to simplify the assumption.
4326 // In case of restrictions we know they only have to hold on the domain,
4327 // thus we can intersect them with the domain of the block. However, for
4328 // assumptions the domain has to imply them, thus:
4330 // Dom => S <==> A v B <==> A - B
4332 // To avoid the complement we will register A - B as a restriction not an
4334 isl_set
*S
= AS
.Set
.copy();
4335 if (AS
.Sign
== AS_RESTRICTION
)
4336 S
= isl_set_params(isl_set_intersect(S
, Dom
));
4337 else /* (AS.Sign == AS_ASSUMPTION) */
4338 S
= isl_set_params(isl_set_subtract(Dom
, S
));
4340 addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
);
4344 void Scop::invalidate(AssumptionKind Kind
, DebugLoc Loc
, BasicBlock
*BB
) {
4345 DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind
<< "\n");
4346 addAssumption(Kind
, isl::set::empty(getParamSpace()), Loc
, AS_ASSUMPTION
, BB
);
4349 isl::set
Scop::getInvalidContext() const { return InvalidContext
; }
4351 void Scop::printContext(raw_ostream
&OS
) const {
4353 OS
.indent(4) << Context
<< "\n";
4355 OS
.indent(4) << "Assumed Context:\n";
4356 OS
.indent(4) << AssumedContext
<< "\n";
4358 OS
.indent(4) << "Invalid Context:\n";
4359 OS
.indent(4) << InvalidContext
<< "\n";
4362 for (const SCEV
*Parameter
: Parameters
)
4363 OS
.indent(4) << "p" << Dim
++ << ": " << *Parameter
<< "\n";
4366 void Scop::printAliasAssumptions(raw_ostream
&OS
) const {
4368 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4369 if (Pair
.second
.size() == 0)
4372 noOfGroups
+= Pair
.second
.size();
4375 OS
.indent(4) << "Alias Groups (" << noOfGroups
<< "):\n";
4376 if (MinMaxAliasGroups
.empty()) {
4377 OS
.indent(8) << "n/a\n";
4381 for (const MinMaxVectorPairTy
&Pair
: MinMaxAliasGroups
) {
4383 // If the group has no read only accesses print the write accesses.
4384 if (Pair
.second
.empty()) {
4385 OS
.indent(8) << "[[";
4386 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4387 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4393 for (const MinMaxAccessTy
&MMAReadOnly
: Pair
.second
) {
4394 OS
.indent(8) << "[[";
4395 OS
<< " <" << MMAReadOnly
.first
<< ", " << MMAReadOnly
.second
<< ">";
4396 for (const MinMaxAccessTy
&MMANonReadOnly
: Pair
.first
) {
4397 OS
<< " <" << MMANonReadOnly
.first
<< ", " << MMANonReadOnly
.second
4405 void Scop::printStatements(raw_ostream
&OS
, bool PrintInstructions
) const {
4406 OS
<< "Statements {\n";
4408 for (const ScopStmt
&Stmt
: *this) {
4410 Stmt
.print(OS
, PrintInstructions
);
4413 OS
.indent(4) << "}\n";
4416 void Scop::printArrayInfo(raw_ostream
&OS
) const {
4419 for (auto &Array
: arrays())
4422 OS
.indent(4) << "}\n";
4424 OS
.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4426 for (auto &Array
: arrays())
4427 Array
->print(OS
, /* SizeAsPwAff */ true);
4429 OS
.indent(4) << "}\n";
4432 void Scop::print(raw_ostream
&OS
, bool PrintInstructions
) const {
4433 OS
.indent(4) << "Function: " << getFunction().getName() << "\n";
4434 OS
.indent(4) << "Region: " << getNameStr() << "\n";
4435 OS
.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4436 OS
.indent(4) << "Invariant Accesses: {\n";
4437 for (const auto &IAClass
: InvariantEquivClasses
) {
4438 const auto &MAs
= IAClass
.InvariantAccesses
;
4440 OS
.indent(12) << "Class Pointer: " << *IAClass
.IdentifyingPointer
<< "\n";
4442 MAs
.front()->print(OS
);
4443 OS
.indent(12) << "Execution Context: " << IAClass
.ExecutionContext
4447 OS
.indent(4) << "}\n";
4448 printContext(OS
.indent(4));
4449 printArrayInfo(OS
.indent(4));
4450 printAliasAssumptions(OS
);
4451 printStatements(OS
.indent(4), PrintInstructions
);
4454 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4455 LLVM_DUMP_METHOD
void Scop::dump() const { print(dbgs(), true); }
4458 isl::ctx
Scop::getIslCtx() const { return IslCtx
.get(); }
4460 __isl_give PWACtx
Scop::getPwAff(const SCEV
*E
, BasicBlock
*BB
,
4462 // First try to use the SCEVAffinator to generate a piecewise defined
4463 // affine function from @p E in the context of @p BB. If that tasks becomes to
4464 // complex the affinator might return a nullptr. In such a case we invalidate
4465 // the SCoP and return a dummy value. This way we do not need to add error
4466 // handling code to all users of this function.
4467 auto PWAC
= Affinator
.getPwAff(E
, BB
);
4469 // TODO: We could use a heuristic and either use:
4470 // SCEVAffinator::takeNonNegativeAssumption
4472 // SCEVAffinator::interpretAsUnsigned
4473 // to deal with unsigned or "NonNegative" SCEVs.
4475 Affinator
.takeNonNegativeAssumption(PWAC
);
4479 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
4480 invalidate(COMPLEXITY
, DL
, BB
);
4481 return Affinator
.getPwAff(SE
->getZero(E
->getType()), BB
);
4484 isl::union_set
Scop::getDomains() const {
4485 isl_space
*EmptySpace
= isl_space_params_alloc(getIslCtx().get(), 0);
4486 isl_union_set
*Domain
= isl_union_set_empty(EmptySpace
);
4488 for (const ScopStmt
&Stmt
: *this)
4489 Domain
= isl_union_set_add_set(Domain
, Stmt
.getDomain().release());
4491 return isl::manage(Domain
);
4494 isl::pw_aff
Scop::getPwAffOnly(const SCEV
*E
, BasicBlock
*BB
) {
4495 PWACtx PWAC
= getPwAff(E
, BB
);
4500 Scop::getAccessesOfType(std::function
<bool(MemoryAccess
&)> Predicate
) {
4501 isl::union_map Accesses
= isl::union_map::empty(getParamSpace());
4503 for (ScopStmt
&Stmt
: *this) {
4504 for (MemoryAccess
*MA
: Stmt
) {
4505 if (!Predicate(*MA
))
4508 isl::set Domain
= Stmt
.getDomain();
4509 isl::map AccessDomain
= MA
->getAccessRelation();
4510 AccessDomain
= AccessDomain
.intersect_domain(Domain
);
4511 Accesses
= Accesses
.add_map(AccessDomain
);
4515 return Accesses
.coalesce();
4518 isl::union_map
Scop::getMustWrites() {
4519 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMustWrite(); });
4522 isl::union_map
Scop::getMayWrites() {
4523 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isMayWrite(); });
4526 isl::union_map
Scop::getWrites() {
4527 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isWrite(); });
4530 isl::union_map
Scop::getReads() {
4531 return getAccessesOfType([](MemoryAccess
&MA
) { return MA
.isRead(); });
4534 isl::union_map
Scop::getAccesses() {
4535 return getAccessesOfType([](MemoryAccess
&MA
) { return true; });
4538 isl::union_map
Scop::getAccesses(ScopArrayInfo
*Array
) {
4539 return getAccessesOfType(
4540 [Array
](MemoryAccess
&MA
) { return MA
.getScopArrayInfo() == Array
; });
4543 // Check whether @p Node is an extension node.
4545 // @return true if @p Node is an extension node.
4546 isl_bool
isNotExtNode(__isl_keep isl_schedule_node
*Node
, void *User
) {
4547 if (isl_schedule_node_get_type(Node
) == isl_schedule_node_extension
)
4548 return isl_bool_error
;
4550 return isl_bool_true
;
4553 bool Scop::containsExtensionNode(isl::schedule Schedule
) {
4554 return isl_schedule_foreach_schedule_node_top_down(
4555 Schedule
.keep(), isNotExtNode
, nullptr) == isl_stat_error
;
4558 isl::union_map
Scop::getSchedule() const {
4559 auto Tree
= getScheduleTree();
4560 if (containsExtensionNode(Tree
))
4563 return Tree
.get_map();
4566 isl::schedule
Scop::getScheduleTree() const {
4567 return Schedule
.intersect_domain(getDomains());
4570 void Scop::setSchedule(isl::union_map NewSchedule
) {
4571 auto S
= isl::schedule::from_domain(getDomains());
4572 Schedule
= S
.insert_partial_schedule(
4573 isl::multi_union_pw_aff::from_union_map(NewSchedule
));
4576 void Scop::setScheduleTree(isl::schedule NewSchedule
) {
4577 Schedule
= NewSchedule
;
4580 bool Scop::restrictDomains(isl::union_set Domain
) {
4581 bool Changed
= false;
4582 for (ScopStmt
&Stmt
: *this) {
4583 isl::union_set StmtDomain
= isl::union_set(Stmt
.getDomain());
4584 isl::union_set NewStmtDomain
= StmtDomain
.intersect(Domain
);
4586 if (StmtDomain
.is_subset(NewStmtDomain
))
4591 NewStmtDomain
= NewStmtDomain
.coalesce();
4593 if (NewStmtDomain
.is_empty())
4594 Stmt
.restrictDomain(isl::set::empty(Stmt
.getDomainSpace()));
4596 Stmt
.restrictDomain(isl::set(NewStmtDomain
));
4601 ScalarEvolution
*Scop::getSE() const { return SE
; }
4603 // Create an isl_multi_union_aff that defines an identity mapping from the
4604 // elements of USet to their N-th dimension.
4608 // Domain: { A[i,j]; B[i,j,k] }
4611 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4613 // @param USet A union set describing the elements for which to generate a
4615 // @param N The dimension to map to.
4616 // @returns A mapping from USet to its N-th dimension.
4617 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, int N
) {
4620 assert(!USet
.is_empty());
4622 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
4624 auto Lambda
= [&Result
, N
](isl::set S
) -> isl::stat
{
4625 int Dim
= S
.dim(isl::dim::set
);
4626 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
4629 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
4631 Result
= Result
.add_pw_multi_aff(PMA
);
4632 return isl::stat::ok
;
4635 isl::stat Res
= USet
.foreach_set(Lambda
);
4638 assert(Res
== isl::stat::ok
);
4640 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
4643 void Scop::addScopStmt(BasicBlock
*BB
, Loop
*SurroundingLoop
,
4644 std::vector
<Instruction
*> Instructions
, int Count
) {
4645 assert(BB
&& "Unexpected nullptr!");
4646 Stmts
.emplace_back(*this, *BB
, SurroundingLoop
, Instructions
, Count
);
4647 auto *Stmt
= &Stmts
.back();
4648 StmtMap
[BB
].push_back(Stmt
);
4649 for (Instruction
*Inst
: Instructions
) {
4650 assert(!InstStmtMap
.count(Inst
) &&
4651 "Unexpected statement corresponding to the instruction.");
4652 InstStmtMap
[Inst
] = Stmt
;
4656 void Scop::addScopStmt(Region
*R
, Loop
*SurroundingLoop
,
4657 std::vector
<Instruction
*> Instructions
) {
4658 assert(R
&& "Unexpected nullptr!");
4659 Stmts
.emplace_back(*this, *R
, SurroundingLoop
, Instructions
);
4660 auto *Stmt
= &Stmts
.back();
4662 for (Instruction
*Inst
: Instructions
) {
4663 assert(!InstStmtMap
.count(Inst
) &&
4664 "Unexpected statement corresponding to the instruction.");
4665 InstStmtMap
[Inst
] = Stmt
;
4668 for (BasicBlock
*BB
: R
->blocks()) {
4669 StmtMap
[BB
].push_back(Stmt
);
4670 if (BB
== R
->getEntry())
4672 for (Instruction
&Inst
: *BB
) {
4673 assert(!InstStmtMap
.count(&Inst
) &&
4674 "Unexpected statement corresponding to the instruction.");
4675 InstStmtMap
[&Inst
] = Stmt
;
4680 ScopStmt
*Scop::addScopStmt(isl::map SourceRel
, isl::map TargetRel
,
4683 isl::set SourceDomain
= SourceRel
.domain();
4684 isl::set TargetDomain
= TargetRel
.domain();
4685 assert(Domain
.is_subset(TargetDomain
) &&
4686 "Target access not defined for complete statement domain");
4687 assert(Domain
.is_subset(SourceDomain
) &&
4688 "Source access not defined for complete statement domain");
4690 Stmts
.emplace_back(*this, SourceRel
, TargetRel
, Domain
);
4692 return &(Stmts
.back());
4695 void Scop::buildSchedule(LoopInfo
&LI
) {
4696 Loop
*L
= getLoopSurroundingScop(*this, LI
);
4697 LoopStackTy
LoopStack({LoopStackElementTy(L
, nullptr, 0)});
4698 buildSchedule(getRegion().getNode(), LoopStack
, LI
);
4699 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
4700 Schedule
= LoopStack
[0].Schedule
;
4703 /// To generate a schedule for the elements in a Region we traverse the Region
4704 /// in reverse-post-order and add the contained RegionNodes in traversal order
4705 /// to the schedule of the loop that is currently at the top of the LoopStack.
4706 /// For loop-free codes, this results in a correct sequential ordering.
4717 /// Including loops requires additional processing. Whenever a loop header is
4718 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4719 /// from an empty schedule, we first process all RegionNodes that are within
4720 /// this loop and complete the sequential schedule at this loop-level before
4721 /// processing about any other nodes. To implement this
4722 /// loop-nodes-first-processing, the reverse post-order traversal is
4723 /// insufficient. Hence, we additionally check if the traversal yields
4724 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4725 /// These region-nodes are then queue and only traverse after the all nodes
4726 /// within the current loop have been processed.
4727 void Scop::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4728 Loop
*OuterScopLoop
= getLoopSurroundingScop(*this, LI
);
4730 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
4731 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
4732 std::deque
<RegionNode
*> DelayList
;
4733 bool LastRNWaiting
= false;
4735 // Iterate over the region @p R in reverse post-order but queue
4736 // sub-regions/blocks iff they are not part of the last encountered but not
4737 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4738 // that we queued the last sub-region/block from the reverse post-order
4739 // iterator. If it is set we have to explore the next sub-region/block from
4740 // the iterator (if any) to guarantee progress. If it is not set we first try
4741 // the next queued sub-region/blocks.
4742 while (!WorkList
.empty() || !DelayList
.empty()) {
4745 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
4746 RN
= WorkList
.front();
4747 WorkList
.pop_front();
4748 LastRNWaiting
= false;
4750 RN
= DelayList
.front();
4751 DelayList
.pop_front();
4754 Loop
*L
= getRegionNodeLoop(RN
, LI
);
4758 Loop
*LastLoop
= LoopStack
.back().L
;
4759 if (LastLoop
!= L
) {
4760 if (LastLoop
&& !LastLoop
->contains(L
)) {
4761 LastRNWaiting
= true;
4762 DelayList
.push_back(RN
);
4765 LoopStack
.push_back({L
, nullptr, 0});
4767 buildSchedule(RN
, LoopStack
, LI
);
4771 void Scop::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
, LoopInfo
&LI
) {
4772 if (RN
->isSubRegion()) {
4773 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
4774 if (!isNonAffineSubRegion(LocalRegion
)) {
4775 buildSchedule(LocalRegion
, LoopStack
, LI
);
4780 assert(LoopStack
.rbegin() != LoopStack
.rend());
4781 auto LoopData
= LoopStack
.rbegin();
4782 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
4784 for (auto *Stmt
: getStmtListFor(RN
)) {
4785 isl::union_set UDomain
{Stmt
->getDomain()};
4786 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
4787 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
4790 // Check if we just processed the last node in this loop. If we did, finalize
4793 // - adding new schedule dimensions
4794 // - folding the resulting schedule into the parent loop schedule
4795 // - dropping the loop schedule from the LoopStack.
4797 // Then continue to check surrounding loops, which might also have been
4798 // completed by this node.
4799 size_t Dimension
= LoopStack
.size();
4800 while (LoopData
->L
&&
4801 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
4802 isl::schedule Schedule
= LoopData
->Schedule
;
4803 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
4805 assert(std::next(LoopData
) != LoopStack
.rend());
4810 isl::union_set Domain
= Schedule
.get_domain();
4811 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
4812 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
4813 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
4816 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
4818 // Now pop all loops processed up there from the LoopStack
4819 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
4822 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(BasicBlock
*BB
) const {
4823 auto StmtMapIt
= StmtMap
.find(BB
);
4824 if (StmtMapIt
== StmtMap
.end())
4826 return StmtMapIt
->second
;
4829 ScopStmt
*Scop::getLastStmtFor(BasicBlock
*BB
) const {
4830 ArrayRef
<ScopStmt
*> StmtList
= getStmtListFor(BB
);
4831 if (!StmtList
.empty())
4832 return StmtList
.back();
4836 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(RegionNode
*RN
) const {
4837 if (RN
->isSubRegion())
4838 return getStmtListFor(RN
->getNodeAs
<Region
>());
4839 return getStmtListFor(RN
->getNodeAs
<BasicBlock
>());
4842 ArrayRef
<ScopStmt
*> Scop::getStmtListFor(Region
*R
) const {
4843 return getStmtListFor(R
->getEntry());
4846 int Scop::getRelativeLoopDepth(const Loop
*L
) const {
4847 if (!L
|| !R
.contains(L
))
4849 // outermostLoopInRegion always returns nullptr for top level regions
4850 if (R
.isTopLevelRegion()) {
4851 // LoopInfo's depths start at 1, we start at 0
4852 return L
->getLoopDepth() - 1;
4854 Loop
*OuterLoop
= R
.outermostLoopInRegion(const_cast<Loop
*>(L
));
4856 return L
->getLoopDepth() - OuterLoop
->getLoopDepth();
4860 ScopArrayInfo
*Scop::getArrayInfoByName(const std::string BaseName
) {
4861 for (auto &SAI
: arrays()) {
4862 if (SAI
->getName() == BaseName
)
4868 void Scop::addAccessData(MemoryAccess
*Access
) {
4869 const ScopArrayInfo
*SAI
= Access
->getOriginalScopArrayInfo();
4870 assert(SAI
&& "can only use after access relations have been constructed");
4872 if (Access
->isOriginalValueKind() && Access
->isRead())
4873 ValueUseAccs
[SAI
].push_back(Access
);
4874 else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite())
4875 PHIIncomingAccs
[SAI
].push_back(Access
);
4878 void Scop::removeAccessData(MemoryAccess
*Access
) {
4879 if (Access
->isOriginalValueKind() && Access
->isWrite()) {
4880 ValueDefAccs
.erase(Access
->getAccessValue());
4881 } else if (Access
->isOriginalValueKind() && Access
->isRead()) {
4882 auto &Uses
= ValueUseAccs
[Access
->getScopArrayInfo()];
4883 std::remove(Uses
.begin(), Uses
.end(), Access
);
4884 } else if (Access
->isOriginalPHIKind() && Access
->isRead()) {
4885 PHINode
*PHI
= cast
<PHINode
>(Access
->getAccessInstruction());
4886 PHIReadAccs
.erase(PHI
);
4887 } else if (Access
->isOriginalAnyPHIKind() && Access
->isWrite()) {
4888 auto &Incomings
= PHIIncomingAccs
[Access
->getScopArrayInfo()];
4889 std::remove(Incomings
.begin(), Incomings
.end(), Access
);
4893 MemoryAccess
*Scop::getValueDef(const ScopArrayInfo
*SAI
) const {
4894 assert(SAI
->isValueKind());
4896 Instruction
*Val
= dyn_cast
<Instruction
>(SAI
->getBasePtr());
4900 return ValueDefAccs
.lookup(Val
);
4903 ArrayRef
<MemoryAccess
*> Scop::getValueUses(const ScopArrayInfo
*SAI
) const {
4904 assert(SAI
->isValueKind());
4905 auto It
= ValueUseAccs
.find(SAI
);
4906 if (It
== ValueUseAccs
.end())
4911 MemoryAccess
*Scop::getPHIRead(const ScopArrayInfo
*SAI
) const {
4912 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4914 if (SAI
->isExitPHIKind())
4917 PHINode
*PHI
= cast
<PHINode
>(SAI
->getBasePtr());
4918 return PHIReadAccs
.lookup(PHI
);
4921 ArrayRef
<MemoryAccess
*> Scop::getPHIIncomings(const ScopArrayInfo
*SAI
) const {
4922 assert(SAI
->isPHIKind() || SAI
->isExitPHIKind());
4923 auto It
= PHIIncomingAccs
.find(SAI
);
4924 if (It
== PHIIncomingAccs
.end())
4929 bool Scop::isEscaping(Instruction
*Inst
) {
4930 assert(contains(Inst
) && "The concept of escaping makes only sense for "
4931 "values defined inside the SCoP");
4933 for (Use
&Use
: Inst
->uses()) {
4934 BasicBlock
*UserBB
= getUseBlock(Use
);
4935 if (!contains(UserBB
))
4938 // When the SCoP region exit needs to be simplified, PHIs in the region exit
4939 // move to a new basic block such that its incoming blocks are not in the
4941 if (hasSingleExitEdge() && isa
<PHINode
>(Use
.getUser()) &&
4942 isExit(cast
<PHINode
>(Use
.getUser())->getParent()))
4948 Scop::ScopStatistics
Scop::getStatistics() const {
4949 ScopStatistics Result
;
4950 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4951 auto LoopStat
= ScopDetection::countBeneficialLoops(&R
, *SE
, *getLI(), 0);
4953 int NumTotalLoops
= LoopStat
.NumLoops
;
4954 Result
.NumBoxedLoops
= getBoxedLoops().size();
4955 Result
.NumAffineLoops
= NumTotalLoops
- Result
.NumBoxedLoops
;
4957 for (const ScopStmt
&Stmt
: *this) {
4958 isl::set Domain
= Stmt
.getDomain().intersect_params(getContext());
4959 bool IsInLoop
= Stmt
.getNumIterators() >= 1;
4960 for (MemoryAccess
*MA
: Stmt
) {
4964 if (MA
->isLatestValueKind()) {
4965 Result
.NumValueWrites
+= 1;
4967 Result
.NumValueWritesInLoops
+= 1;
4970 if (MA
->isLatestAnyPHIKind()) {
4971 Result
.NumPHIWrites
+= 1;
4973 Result
.NumPHIWritesInLoops
+= 1;
4977 MA
->getAccessRelation().intersect_domain(Domain
).range();
4978 if (AccSet
.is_singleton()) {
4979 Result
.NumSingletonWrites
+= 1;
4981 Result
.NumSingletonWritesInLoops
+= 1;
4989 raw_ostream
&polly::operator<<(raw_ostream
&OS
, const Scop
&scop
) {
4990 scop
.print(OS
, PollyPrintInstructions
);
4994 //===----------------------------------------------------------------------===//
4995 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
4996 AU
.addRequired
<LoopInfoWrapperPass
>();
4997 AU
.addRequired
<RegionInfoPass
>();
4998 AU
.addRequired
<DominatorTreeWrapperPass
>();
4999 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5000 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5001 AU
.addRequired
<AAResultsWrapperPass
>();
5002 AU
.addRequired
<AssumptionCacheTracker
>();
5003 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5004 AU
.setPreservesAll();
5007 void updateLoopCountStatistic(ScopDetection::LoopStats Stats
,
5008 Scop::ScopStatistics ScopStats
) {
5009 assert(Stats
.NumLoops
== ScopStats
.NumAffineLoops
+ ScopStats
.NumBoxedLoops
);
5012 NumLoopsInScop
+= Stats
.NumLoops
;
5014 std::max(MaxNumLoopsInScop
.getValue(), (unsigned)Stats
.NumLoops
);
5016 if (Stats
.MaxDepth
== 1)
5018 else if (Stats
.MaxDepth
== 2)
5020 else if (Stats
.MaxDepth
== 3)
5021 NumScopsDepthThree
++;
5022 else if (Stats
.MaxDepth
== 4)
5023 NumScopsDepthFour
++;
5024 else if (Stats
.MaxDepth
== 5)
5025 NumScopsDepthFive
++;
5027 NumScopsDepthLarger
++;
5029 NumAffineLoops
+= ScopStats
.NumAffineLoops
;
5030 NumBoxedLoops
+= ScopStats
.NumBoxedLoops
;
5032 NumValueWrites
+= ScopStats
.NumValueWrites
;
5033 NumValueWritesInLoops
+= ScopStats
.NumValueWritesInLoops
;
5034 NumPHIWrites
+= ScopStats
.NumPHIWrites
;
5035 NumPHIWritesInLoops
+= ScopStats
.NumPHIWritesInLoops
;
5036 NumSingletonWrites
+= ScopStats
.NumSingletonWrites
;
5037 NumSingletonWritesInLoops
+= ScopStats
.NumSingletonWritesInLoops
;
5040 bool ScopInfoRegionPass::runOnRegion(Region
*R
, RGPassManager
&RGM
) {
5041 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5043 if (!SD
.isMaxRegionInScop(*R
))
5046 Function
*F
= R
->getEntry()->getParent();
5047 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5048 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5049 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5050 auto const &DL
= F
->getParent()->getDataLayout();
5051 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5052 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(*F
);
5053 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5055 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5056 S
= SB
.getScop(); // take ownership of scop object
5058 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5060 ScopDetection::LoopStats Stats
=
5061 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5062 updateLoopCountStatistic(Stats
, S
->getStatistics());
5069 void ScopInfoRegionPass::print(raw_ostream
&OS
, const Module
*) const {
5071 S
->print(OS
, PollyPrintInstructions
);
5073 OS
<< "Invalid Scop!\n";
5076 char ScopInfoRegionPass::ID
= 0;
5078 Pass
*polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5080 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass
, "polly-scops",
5081 "Polly - Create polyhedral description of Scops", false,
5083 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5084 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5085 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5086 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5087 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5088 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5089 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5090 INITIALIZE_PASS_END(ScopInfoRegionPass
, "polly-scops",
5091 "Polly - Create polyhedral description of Scops", false,
5094 //===----------------------------------------------------------------------===//
5095 ScopInfo::ScopInfo(const DataLayout
&DL
, ScopDetection
&SD
, ScalarEvolution
&SE
,
5096 LoopInfo
&LI
, AliasAnalysis
&AA
, DominatorTree
&DT
,
5097 AssumptionCache
&AC
, OptimizationRemarkEmitter
&ORE
)
5098 : DL(DL
), SD(SD
), SE(SE
), LI(LI
), AA(AA
), DT(DT
), AC(AC
), ORE(ORE
) {
5102 void ScopInfo::recompute() {
5103 RegionToScopMap
.clear();
5104 /// Create polyhedral description of scops for all the valid regions of a
5106 for (auto &It
: SD
) {
5107 Region
*R
= const_cast<Region
*>(It
);
5108 if (!SD
.isMaxRegionInScop(*R
))
5111 ScopBuilder
SB(R
, AC
, AA
, DL
, DT
, LI
, SD
, SE
, ORE
);
5112 std::unique_ptr
<Scop
> S
= SB
.getScop();
5115 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5116 ScopDetection::LoopStats Stats
=
5117 ScopDetection::countBeneficialLoops(&S
->getRegion(), SE
, LI
, 0);
5118 updateLoopCountStatistic(Stats
, S
->getStatistics());
5120 bool Inserted
= RegionToScopMap
.insert({R
, std::move(S
)}).second
;
5121 assert(Inserted
&& "Building Scop for the same region twice!");
5126 bool ScopInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
5127 FunctionAnalysisManager::Invalidator
&Inv
) {
5128 // Check whether the analysis, all analyses on functions have been preserved
5129 // or anything we're holding references to is being invalidated
5130 auto PAC
= PA
.getChecker
<ScopInfoAnalysis
>();
5131 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>()) ||
5132 Inv
.invalidate
<ScopAnalysis
>(F
, PA
) ||
5133 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
5134 Inv
.invalidate
<LoopAnalysis
>(F
, PA
) ||
5135 Inv
.invalidate
<AAManager
>(F
, PA
) ||
5136 Inv
.invalidate
<DominatorTreeAnalysis
>(F
, PA
) ||
5137 Inv
.invalidate
<AssumptionAnalysis
>(F
, PA
);
5140 AnalysisKey
ScopInfoAnalysis::Key
;
5142 ScopInfoAnalysis::Result
ScopInfoAnalysis::run(Function
&F
,
5143 FunctionAnalysisManager
&FAM
) {
5144 auto &SD
= FAM
.getResult
<ScopAnalysis
>(F
);
5145 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
5146 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
5147 auto &AA
= FAM
.getResult
<AAManager
>(F
);
5148 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
5149 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(F
);
5150 auto &DL
= F
.getParent()->getDataLayout();
5151 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
5152 return {DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
};
5155 PreservedAnalyses
ScopInfoPrinterPass::run(Function
&F
,
5156 FunctionAnalysisManager
&FAM
) {
5157 auto &SI
= FAM
.getResult
<ScopInfoAnalysis
>(F
);
5158 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5159 // order here to keep the output persistent
5160 for (auto &It
: reverse(SI
)) {
5162 It
.second
->print(Stream
, PollyPrintInstructions
);
5164 Stream
<< "Invalid Scop!\n";
5166 return PreservedAnalyses::all();
5169 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
5170 AU
.addRequired
<LoopInfoWrapperPass
>();
5171 AU
.addRequired
<RegionInfoPass
>();
5172 AU
.addRequired
<DominatorTreeWrapperPass
>();
5173 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
5174 AU
.addRequiredTransitive
<ScopDetectionWrapperPass
>();
5175 AU
.addRequired
<AAResultsWrapperPass
>();
5176 AU
.addRequired
<AssumptionCacheTracker
>();
5177 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
5178 AU
.setPreservesAll();
5181 bool ScopInfoWrapperPass::runOnFunction(Function
&F
) {
5182 auto &SD
= getAnalysis
<ScopDetectionWrapperPass
>().getSD();
5183 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
5184 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
5185 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
5186 auto const &DL
= F
.getParent()->getDataLayout();
5187 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
5188 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
5189 auto &ORE
= getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
5191 Result
.reset(new ScopInfo
{DL
, SD
, SE
, LI
, AA
, DT
, AC
, ORE
});
5195 void ScopInfoWrapperPass::print(raw_ostream
&OS
, const Module
*) const {
5196 for (auto &It
: *Result
) {
5198 It
.second
->print(OS
, PollyPrintInstructions
);
5200 OS
<< "Invalid Scop!\n";
5204 char ScopInfoWrapperPass::ID
= 0;
5206 Pass
*polly::createScopInfoWrapperPassPass() {
5207 return new ScopInfoWrapperPass();
5210 INITIALIZE_PASS_BEGIN(
5211 ScopInfoWrapperPass
, "polly-function-scops",
5212 "Polly - Create polyhedral description of all Scops of a function", false,
5214 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
);
5215 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
);
5216 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
);
5217 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass
);
5218 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
);
5219 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass
);
5220 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
);
5221 INITIALIZE_PASS_END(
5222 ScopInfoWrapperPass
, "polly-function-scops",
5223 "Polly - Create polyhedral description of all Scops of a function", false,