[ScopInfo] Introduce list of statements in Scop::StmtMap. NFC.
[polly-mirror.git] / lib / Analysis / ScopInfo.cpp
blobd0247e8055d98f94e23e566f51ac3c8ed0bb2ed4
1 //===--------- ScopInfo.cpp ----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/Support/GICHelper.h"
25 #include "polly/Support/SCEVValidator.h"
26 #include "polly/Support/ScopHelper.h"
27 #include "llvm/ADT/DepthFirstIterator.h"
28 #include "llvm/ADT/MapVector.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SetVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Analysis/AliasAnalysis.h"
35 #include "llvm/Analysis/AssumptionCache.h"
36 #include "llvm/Analysis/Loads.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/LoopIterator.h"
39 #include "llvm/Analysis/RegionIterator.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/Support/Debug.h"
43 #include "isl/aff.h"
44 #include "isl/constraint.h"
45 #include "isl/local_space.h"
46 #include "isl/map.h"
47 #include "isl/options.h"
48 #include "isl/printer.h"
49 #include "isl/schedule.h"
50 #include "isl/schedule_node.h"
51 #include "isl/set.h"
52 #include "isl/union_map.h"
53 #include "isl/union_set.h"
54 #include "isl/val.h"
55 #include <sstream>
56 #include <string>
57 #include <vector>
59 using namespace llvm;
60 using namespace polly;
62 #define DEBUG_TYPE "polly-scops"
64 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
65 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
66 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
67 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
68 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
69 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
70 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
71 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
72 STATISTIC(AssumptionsInvariantLoad,
73 "Number of invariant loads assumptions taken.");
74 STATISTIC(AssumptionsDelinearization,
75 "Number of delinearization assumptions taken.");
77 STATISTIC(NumLoopsInScop, "Number of loops in scops");
78 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
79 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
80 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
81 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
82 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
83 STATISTIC(NumScopsDepthLarger,
84 "Number of scops with maximal loop depth 6 and larger");
85 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
87 // The maximal number of basic sets we allow during domain construction to
88 // be created. More complex scops will result in very high compile time and
89 // are also unlikely to result in good code
90 static int const MaxDisjunctsInDomain = 20;
92 // The number of disjunct in the context after which we stop to add more
93 // disjuncts. This parameter is there to avoid exponential growth in the
94 // number of disjunct when adding non-convex sets to the context.
95 static int const MaxDisjunctsInContext = 4;
97 static cl::opt<int>
98 OptComputeOut("polly-analysis-computeout",
99 cl::desc("Bound the scop analysis by a maximal amount of "
100 "computational steps (0 means no bound)"),
101 cl::Hidden, cl::init(800000), cl::ZeroOrMore,
102 cl::cat(PollyCategory));
104 static cl::opt<bool> PollyRemarksMinimal(
105 "polly-remarks-minimal",
106 cl::desc("Do not emit remarks about assumptions that are known"),
107 cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
109 // Multiplicative reductions can be disabled separately as these kind of
110 // operations can overflow easily. Additive reductions and bit operations
111 // are in contrast pretty stable.
112 static cl::opt<bool> DisableMultiplicativeReductions(
113 "polly-disable-multiplicative-reductions",
114 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
115 cl::init(false), cl::cat(PollyCategory));
117 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
118 "polly-rtc-max-array-disjuncts",
119 cl::desc("The maximal number of disjunts allowed in memory accesses to "
120 "to build RTCs."),
121 cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
123 static cl::opt<unsigned> RunTimeChecksMaxParameters(
124 "polly-rtc-max-parameters",
125 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
126 cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
128 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
129 "polly-rtc-max-arrays-per-group",
130 cl::desc("The maximal number of arrays to compare in each alias group."),
131 cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
133 static cl::opt<std::string> UserContextStr(
134 "polly-context", cl::value_desc("isl parameter set"),
135 cl::desc("Provide additional constraints on the context parameters"),
136 cl::init(""), cl::cat(PollyCategory));
138 static cl::opt<bool> DetectReductions("polly-detect-reductions",
139 cl::desc("Detect and exploit reductions"),
140 cl::Hidden, cl::ZeroOrMore,
141 cl::init(true), cl::cat(PollyCategory));
143 static cl::opt<bool>
144 IslOnErrorAbort("polly-on-isl-error-abort",
145 cl::desc("Abort if an isl error is encountered"),
146 cl::init(true), cl::cat(PollyCategory));
148 static cl::opt<bool> PollyPreciseInbounds(
149 "polly-precise-inbounds",
150 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
151 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
153 static cl::opt<bool>
154 PollyIgnoreInbounds("polly-ignore-inbounds",
155 cl::desc("Do not take inbounds assumptions at all"),
156 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
158 static cl::opt<bool> PollyIgnoreParamBounds(
159 "polly-ignore-parameter-bounds",
160 cl::desc(
161 "Do not add parameter bounds and do no gist simplify sets accordingly"),
162 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
164 static cl::opt<bool> PollyPreciseFoldAccesses(
165 "polly-precise-fold-accesses",
166 cl::desc("Fold memory accesses to model more possible delinearizations "
167 "(does not scale well)"),
168 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
170 bool polly::UseInstructionNames;
171 static cl::opt<bool, true> XUseInstructionNames(
172 "polly-use-llvm-names",
173 cl::desc("Use LLVM-IR names when deriving statement names"),
174 cl::location(UseInstructionNames), cl::Hidden, cl::init(false),
175 cl::ZeroOrMore, cl::cat(PollyCategory));
177 static cl::opt<bool> PollyPrintInstructions(
178 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
179 cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory));
181 //===----------------------------------------------------------------------===//
183 // Create a sequence of two schedules. Either argument may be null and is
184 // interpreted as the empty schedule. Can also return null if both schedules are
185 // empty.
186 static __isl_give isl_schedule *
187 combineInSequence(__isl_take isl_schedule *Prev,
188 __isl_take isl_schedule *Succ) {
189 if (!Prev)
190 return Succ;
191 if (!Succ)
192 return Prev;
194 return isl_schedule_sequence(Prev, Succ);
197 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
198 int dim, isl::dim type) {
199 isl::val V;
200 isl::ctx Ctx = S.get_ctx();
202 // The upper and lower bound for a parameter value is derived either from
203 // the data type of the parameter or from the - possibly more restrictive -
204 // range metadata.
205 V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true);
206 S = S.lower_bound_val(type, dim, V);
207 V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true);
208 S = S.upper_bound_val(type, dim, V);
210 if (Range.isFullSet())
211 return S;
213 if (isl_set_n_basic_set(S.get()) > MaxDisjunctsInContext)
214 return S;
216 // In case of signed wrapping, we can refine the set of valid values by
217 // excluding the part not covered by the wrapping range.
218 if (Range.isSignWrappedSet()) {
219 V = valFromAPInt(Ctx.get(), Range.getLower(), true);
220 isl::set SLB = S.lower_bound_val(type, dim, V);
222 V = valFromAPInt(Ctx.get(), Range.getUpper(), true);
223 V = V.sub_ui(1);
224 isl::set SUB = S.upper_bound_val(type, dim, V);
225 S = SLB.unite(SUB);
228 return S;
231 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
232 LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
233 if (!BasePtrLI)
234 return nullptr;
236 if (!S->contains(BasePtrLI))
237 return nullptr;
239 ScalarEvolution &SE = *S->getSE();
241 auto *OriginBaseSCEV =
242 SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
243 if (!OriginBaseSCEV)
244 return nullptr;
246 auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
247 if (!OriginBaseSCEVUnknown)
248 return nullptr;
250 return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
251 MemoryKind::Array);
254 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl_ctx *Ctx,
255 ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
256 const DataLayout &DL, Scop *S,
257 const char *BaseName)
258 : BasePtr(BasePtr), ElementType(ElementType), IsOnHeap(false), Kind(Kind),
259 DL(DL), S(*S), FAD(nullptr) {
260 std::string BasePtrName =
261 BaseName ? BaseName
262 : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
263 Kind == MemoryKind::PHI ? "__phi" : "",
264 UseInstructionNames);
265 Id = isl_id_alloc(Ctx, BasePtrName.c_str(), this);
267 updateSizes(Sizes);
269 if (!BasePtr || Kind != MemoryKind::Array) {
270 BasePtrOriginSAI = nullptr;
271 return;
274 BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
275 if (BasePtrOriginSAI)
276 const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
279 __isl_give isl_space *ScopArrayInfo::getSpace() const {
280 auto *Space =
281 isl_space_set_alloc(isl_id_get_ctx(Id), 0, getNumberOfDimensions());
282 Space = isl_space_set_tuple_id(Space, isl_dim_set, isl_id_copy(Id));
283 return Space;
286 bool ScopArrayInfo::isReadOnly() {
287 isl::union_set WriteSet = give(S.getWrites()).range();
288 isl::space Space = give(getSpace());
289 WriteSet = WriteSet.extract_set(Space);
291 return bool(WriteSet.is_empty());
294 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const {
295 if (Array->getElementType() != getElementType())
296 return false;
298 if (Array->getNumberOfDimensions() != getNumberOfDimensions())
299 return false;
301 for (unsigned i = 0; i < getNumberOfDimensions(); i++)
302 if (Array->getDimensionSize(i) != getDimensionSize(i))
303 return false;
305 return true;
308 void ScopArrayInfo::updateElementType(Type *NewElementType) {
309 if (NewElementType == ElementType)
310 return;
312 auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
313 auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
315 if (NewElementSize == OldElementSize || NewElementSize == 0)
316 return;
318 if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) {
319 ElementType = NewElementType;
320 } else {
321 auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
322 ElementType = IntegerType::get(ElementType->getContext(), GCD);
326 /// Make the ScopArrayInfo model a Fortran Array
327 void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
328 assert(FAD && "got invalid Fortran array descriptor");
329 if (this->FAD) {
330 assert(this->FAD == FAD &&
331 "receiving different array descriptors for same array");
332 return;
335 assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
336 assert(!this->FAD);
337 this->FAD = FAD;
339 isl::space Space(S.getIslCtx(), 1, 0);
341 std::string param_name = getName();
342 param_name += "_fortranarr_size";
343 // TODO: see if we need to add `this` as the id user pointer
344 isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name.c_str(), nullptr);
346 Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff);
347 isl::pw_aff PwAff =
348 isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0);
350 DimensionSizesPw[0] = PwAff.release();
353 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
354 bool CheckConsistency) {
355 int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
356 int ExtraDimsNew = NewSizes.size() - SharedDims;
357 int ExtraDimsOld = DimensionSizes.size() - SharedDims;
359 if (CheckConsistency) {
360 for (int i = 0; i < SharedDims; i++) {
361 auto *NewSize = NewSizes[i + ExtraDimsNew];
362 auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
363 if (NewSize && KnownSize && NewSize != KnownSize)
364 return false;
367 if (DimensionSizes.size() >= NewSizes.size())
368 return true;
371 DimensionSizes.clear();
372 DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
373 NewSizes.end());
374 for (isl_pw_aff *Size : DimensionSizesPw)
375 isl_pw_aff_free(Size);
376 DimensionSizesPw.clear();
377 for (const SCEV *Expr : DimensionSizes) {
378 if (!Expr) {
379 DimensionSizesPw.push_back(nullptr);
380 continue;
382 isl_pw_aff *Size = S.getPwAffOnly(Expr);
383 DimensionSizesPw.push_back(Size);
385 return true;
388 ScopArrayInfo::~ScopArrayInfo() {
389 isl_id_free(Id);
390 for (isl_pw_aff *Size : DimensionSizesPw)
391 isl_pw_aff_free(Size);
394 std::string ScopArrayInfo::getName() const { return isl_id_get_name(Id); }
396 int ScopArrayInfo::getElemSizeInBytes() const {
397 return DL.getTypeAllocSize(ElementType);
400 __isl_give isl_id *ScopArrayInfo::getBasePtrId() const {
401 return isl_id_copy(Id);
404 void ScopArrayInfo::dump() const { print(errs()); }
406 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
407 OS.indent(8) << *getElementType() << " " << getName();
408 unsigned u = 0;
409 // If this is a Fortran array, then we can print the outermost dimension
410 // as a isl_pw_aff even though there is no SCEV information.
411 bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
413 if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
414 !getDimensionSize(0)) {
415 OS << "[*]";
416 u++;
418 for (; u < getNumberOfDimensions(); u++) {
419 OS << "[";
421 if (SizeAsPwAff) {
422 auto *Size = getDimensionSizePw(u);
423 OS << " " << Size << " ";
424 isl_pw_aff_free(Size);
425 } else {
426 OS << *getDimensionSize(u);
429 OS << "]";
432 OS << ";";
434 if (BasePtrOriginSAI)
435 OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
437 OS << " // Element size " << getElemSizeInBytes() << "\n";
440 const ScopArrayInfo *
441 ScopArrayInfo::getFromAccessFunction(__isl_keep isl_pw_multi_aff *PMA) {
442 isl_id *Id = isl_pw_multi_aff_get_tuple_id(PMA, isl_dim_out);
443 assert(Id && "Output dimension didn't have an ID");
444 return getFromId(Id);
447 const ScopArrayInfo *ScopArrayInfo::getFromId(__isl_take isl_id *Id) {
448 void *User = isl_id_get_user(Id);
449 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
450 isl_id_free(Id);
451 return SAI;
454 void MemoryAccess::wrapConstantDimensions() {
455 auto *SAI = getScopArrayInfo();
456 isl::space ArraySpace = give(SAI->getSpace());
457 isl::ctx Ctx = ArraySpace.get_ctx();
458 unsigned DimsArray = SAI->getNumberOfDimensions();
460 isl::multi_aff DivModAff = isl::multi_aff::identity(
461 ArraySpace.map_from_domain_and_range(ArraySpace));
462 isl::local_space LArraySpace = isl::local_space(ArraySpace);
464 // Begin with last dimension, to iteratively carry into higher dimensions.
465 for (int i = DimsArray - 1; i > 0; i--) {
466 auto *DimSize = SAI->getDimensionSize(i);
467 auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
469 // This transformation is not applicable to dimensions with dynamic size.
470 if (!DimSizeCst)
471 continue;
473 // This transformation is not applicable to dimensions of size zero.
474 if (DimSize->isZero())
475 continue;
477 isl::val DimSizeVal =
478 valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false);
479 isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i);
480 isl::aff PrevVar =
481 isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1);
483 // Compute: index % size
484 // Modulo must apply in the divide of the previous iteration, if any.
485 isl::aff Modulo = Var.mod_val(DimSizeVal);
486 Modulo = Modulo.pullback(DivModAff);
488 // Compute: floor(index / size)
489 isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal));
490 Divide = Divide.floor();
491 Divide = Divide.add(PrevVar);
492 Divide = Divide.pullback(DivModAff);
494 // Apply Modulo and Divide.
495 DivModAff = DivModAff.set_aff(i, Modulo);
496 DivModAff = DivModAff.set_aff(i - 1, Divide);
499 // Apply all modulo/divides on the accesses.
500 isl::map Relation = give(AccessRelation);
501 Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff));
502 Relation = Relation.detect_equalities();
503 AccessRelation = Relation.release();
506 void MemoryAccess::updateDimensionality() {
507 auto *SAI = getScopArrayInfo();
508 isl::space ArraySpace = give(SAI->getSpace());
509 isl::space AccessSpace = give(isl_map_get_space(AccessRelation)).range();
510 isl::ctx Ctx = ArraySpace.get_ctx();
512 auto DimsArray = ArraySpace.dim(isl::dim::set);
513 auto DimsAccess = AccessSpace.dim(isl::dim::set);
514 auto DimsMissing = DimsArray - DimsAccess;
516 auto *BB = getStatement()->getEntryBlock();
517 auto &DL = BB->getModule()->getDataLayout();
518 unsigned ArrayElemSize = SAI->getElemSizeInBytes();
519 unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
521 isl::map Map = isl::map::from_domain_and_range(
522 isl::set::universe(AccessSpace), isl::set::universe(ArraySpace));
524 for (unsigned i = 0; i < DimsMissing; i++)
525 Map = Map.fix_si(isl::dim::out, i, 0);
527 for (unsigned i = DimsMissing; i < DimsArray; i++)
528 Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i);
530 AccessRelation = isl_map_apply_range(AccessRelation, Map.release());
532 // For the non delinearized arrays, divide the access function of the last
533 // subscript by the size of the elements in the array.
535 // A stride one array access in C expressed as A[i] is expressed in
536 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
537 // two subsequent values of 'i' index two values that are stored next to
538 // each other in memory. By this division we make this characteristic
539 // obvious again. If the base pointer was accessed with offsets not divisible
540 // by the accesses element size, we will have chosen a smaller ArrayElemSize
541 // that divides the offsets of all accesses to this base pointer.
542 if (DimsAccess == 1) {
543 isl::val V = isl::val(Ctx, ArrayElemSize);
544 AccessRelation = isl_map_floordiv_val(AccessRelation, V.release());
547 // We currently do this only if we added at least one dimension, which means
548 // some dimension's indices have not been specified, an indicator that some
549 // index values have been added together.
550 // TODO: Investigate general usefulness; Effect on unit tests is to make index
551 // expressions more complicated.
552 if (DimsMissing)
553 wrapConstantDimensions();
555 if (!isAffine())
556 computeBoundsOnAccessRelation(ArrayElemSize);
558 // Introduce multi-element accesses in case the type loaded by this memory
559 // access is larger than the canonical element type of the array.
561 // An access ((float *)A)[i] to an array char *A is modeled as
562 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
563 if (ElemBytes > ArrayElemSize) {
564 assert(ElemBytes % ArrayElemSize == 0 &&
565 "Loaded element size should be multiple of canonical element size");
566 isl::map Map = isl::map::from_domain_and_range(
567 isl::set::universe(ArraySpace), isl::set::universe(ArraySpace));
568 for (unsigned i = 0; i < DimsArray - 1; i++)
569 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
571 isl::constraint C;
572 isl::local_space LS;
574 LS = isl::local_space(Map.get_space());
575 int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
577 C = isl::constraint::alloc_inequality(LS);
578 C = C.set_constant_val(isl::val(Ctx, Num - 1));
579 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1);
580 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1);
581 Map = Map.add_constraint(C);
583 C = isl::constraint::alloc_inequality(LS);
584 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1);
585 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1);
586 C = C.set_constant_val(isl::val(Ctx, 0));
587 Map = Map.add_constraint(C);
588 AccessRelation = isl_map_apply_range(AccessRelation, Map.release());
592 const std::string
593 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
594 switch (RT) {
595 case MemoryAccess::RT_NONE:
596 llvm_unreachable("Requested a reduction operator string for a memory "
597 "access which isn't a reduction");
598 case MemoryAccess::RT_ADD:
599 return "+";
600 case MemoryAccess::RT_MUL:
601 return "*";
602 case MemoryAccess::RT_BOR:
603 return "|";
604 case MemoryAccess::RT_BXOR:
605 return "^";
606 case MemoryAccess::RT_BAND:
607 return "&";
609 llvm_unreachable("Unknown reduction type");
610 return "";
613 /// Return the reduction type for a given binary operator.
614 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
615 const Instruction *Load) {
616 if (!BinOp)
617 return MemoryAccess::RT_NONE;
618 switch (BinOp->getOpcode()) {
619 case Instruction::FAdd:
620 if (!BinOp->hasUnsafeAlgebra())
621 return MemoryAccess::RT_NONE;
622 // Fall through
623 case Instruction::Add:
624 return MemoryAccess::RT_ADD;
625 case Instruction::Or:
626 return MemoryAccess::RT_BOR;
627 case Instruction::Xor:
628 return MemoryAccess::RT_BXOR;
629 case Instruction::And:
630 return MemoryAccess::RT_BAND;
631 case Instruction::FMul:
632 if (!BinOp->hasUnsafeAlgebra())
633 return MemoryAccess::RT_NONE;
634 // Fall through
635 case Instruction::Mul:
636 if (DisableMultiplicativeReductions)
637 return MemoryAccess::RT_NONE;
638 return MemoryAccess::RT_MUL;
639 default:
640 return MemoryAccess::RT_NONE;
644 MemoryAccess::~MemoryAccess() {
645 isl_id_free(Id);
646 isl_set_free(InvalidDomain);
647 isl_map_free(AccessRelation);
648 isl_map_free(NewAccessRelation);
651 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
652 isl_id *ArrayId = getArrayId();
653 void *User = isl_id_get_user(ArrayId);
654 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
655 isl_id_free(ArrayId);
656 return SAI;
659 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
660 isl_id *ArrayId = getLatestArrayId();
661 void *User = isl_id_get_user(ArrayId);
662 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
663 isl_id_free(ArrayId);
664 return SAI;
667 __isl_give isl_id *MemoryAccess::getOriginalArrayId() const {
668 return isl_map_get_tuple_id(AccessRelation, isl_dim_out);
671 __isl_give isl_id *MemoryAccess::getLatestArrayId() const {
672 if (!hasNewAccessRelation())
673 return getOriginalArrayId();
674 return isl_map_get_tuple_id(NewAccessRelation, isl_dim_out);
677 __isl_give isl_map *MemoryAccess::getAddressFunction() const {
678 return isl_map_lexmin(getAccessRelation());
681 __isl_give isl_pw_multi_aff *MemoryAccess::applyScheduleToAccessRelation(
682 __isl_take isl_union_map *USchedule) const {
683 isl_map *Schedule, *ScheduledAccRel;
684 isl_union_set *UDomain;
686 UDomain = isl_union_set_from_set(getStatement()->getDomain());
687 USchedule = isl_union_map_intersect_domain(USchedule, UDomain);
688 Schedule = isl_map_from_union_map(USchedule);
689 ScheduledAccRel = isl_map_apply_domain(getAddressFunction(), Schedule);
690 return isl_pw_multi_aff_from_map(ScheduledAccRel);
693 __isl_give isl_map *MemoryAccess::getOriginalAccessRelation() const {
694 return isl_map_copy(AccessRelation);
697 std::string MemoryAccess::getOriginalAccessRelationStr() const {
698 return stringFromIslObj(AccessRelation);
701 __isl_give isl_space *MemoryAccess::getOriginalAccessRelationSpace() const {
702 return isl_map_get_space(AccessRelation);
705 __isl_give isl_map *MemoryAccess::getNewAccessRelation() const {
706 return isl_map_copy(NewAccessRelation);
709 std::string MemoryAccess::getNewAccessRelationStr() const {
710 return stringFromIslObj(NewAccessRelation);
713 std::string MemoryAccess::getAccessRelationStr() const {
714 return isl::manage(getAccessRelation()).to_str();
717 __isl_give isl_basic_map *
718 MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
719 isl_space *Space = isl_space_set_alloc(Statement->getIslCtx(), 0, 1);
720 Space = isl_space_align_params(Space, Statement->getDomainSpace());
722 return isl_basic_map_from_domain_and_range(
723 isl_basic_set_universe(Statement->getDomainSpace()),
724 isl_basic_set_universe(Space));
727 // Formalize no out-of-bound access assumption
729 // When delinearizing array accesses we optimistically assume that the
730 // delinearized accesses do not access out of bound locations (the subscript
731 // expression of each array evaluates for each statement instance that is
732 // executed to a value that is larger than zero and strictly smaller than the
733 // size of the corresponding dimension). The only exception is the outermost
734 // dimension for which we do not need to assume any upper bound. At this point
735 // we formalize this assumption to ensure that at code generation time the
736 // relevant run-time checks can be generated.
738 // To find the set of constraints necessary to avoid out of bound accesses, we
739 // first build the set of data locations that are not within array bounds. We
740 // then apply the reverse access relation to obtain the set of iterations that
741 // may contain invalid accesses and reduce this set of iterations to the ones
742 // that are actually executed by intersecting them with the domain of the
743 // statement. If we now project out all loop dimensions, we obtain a set of
744 // parameters that may cause statement instances to be executed that may
745 // possibly yield out of bound memory accesses. The complement of these
746 // constraints is the set of constraints that needs to be assumed to ensure such
747 // statement instances are never executed.
748 void MemoryAccess::assumeNoOutOfBound() {
749 if (PollyIgnoreInbounds)
750 return;
751 auto *SAI = getScopArrayInfo();
752 isl::space Space = give(getOriginalAccessRelationSpace()).range();
753 isl::set Outside = isl::set::empty(Space);
754 for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) {
755 isl::local_space LS(Space);
756 isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i);
757 isl::pw_aff Zero = isl::pw_aff(LS);
759 isl::set DimOutside = Var.lt_set(Zero);
760 isl::pw_aff SizeE = give(SAI->getDimensionSizePw(i));
761 SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set));
762 SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set));
763 DimOutside = DimOutside.unite(SizeE.le_set(Var));
765 Outside = Outside.unite(DimOutside);
768 Outside = Outside.apply(give(getAccessRelation()).reverse());
769 Outside = Outside.intersect(give(Statement->getDomain()));
770 Outside = Outside.params();
772 // Remove divs to avoid the construction of overly complicated assumptions.
773 // Doing so increases the set of parameter combinations that are assumed to
774 // not appear. This is always save, but may make the resulting run-time check
775 // bail out more often than strictly necessary.
776 Outside = Outside.remove_divs();
777 Outside = Outside.complement();
778 const auto &Loc = getAccessInstruction()
779 ? getAccessInstruction()->getDebugLoc()
780 : DebugLoc();
781 if (!PollyPreciseInbounds)
782 Outside = Outside.gist_params(give(Statement->getDomain()).params());
783 Statement->getParent()->recordAssumption(INBOUNDS, Outside.release(), Loc,
784 AS_ASSUMPTION);
787 void MemoryAccess::buildMemIntrinsicAccessRelation() {
788 assert(isMemoryIntrinsic());
789 assert(Subscripts.size() == 2 && Sizes.size() == 1);
791 isl::pw_aff SubscriptPWA = give(getPwAff(Subscripts[0]));
792 isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA);
794 isl::map LengthMap;
795 if (Subscripts[1] == nullptr) {
796 LengthMap = isl::map::universe(SubscriptMap.get_space());
797 } else {
798 isl::pw_aff LengthPWA = give(getPwAff(Subscripts[1]));
799 LengthMap = isl::map::from_pw_aff(LengthPWA);
800 isl::space RangeSpace = LengthMap.get_space().range();
801 LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace));
803 LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0);
804 LengthMap = LengthMap.align_params(SubscriptMap.get_space());
805 SubscriptMap = SubscriptMap.align_params(LengthMap.get_space());
806 LengthMap = LengthMap.sum(SubscriptMap);
807 AccessRelation =
808 LengthMap.set_tuple_id(isl::dim::in, give(getStatement()->getDomainId()))
809 .release();
812 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
813 ScalarEvolution *SE = Statement->getParent()->getSE();
815 auto MAI = MemAccInst(getAccessInstruction());
816 if (isa<MemIntrinsic>(MAI))
817 return;
819 Value *Ptr = MAI.getPointerOperand();
820 if (!Ptr || !SE->isSCEVable(Ptr->getType()))
821 return;
823 auto *PtrSCEV = SE->getSCEV(Ptr);
824 if (isa<SCEVCouldNotCompute>(PtrSCEV))
825 return;
827 auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
828 if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV))
829 PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
831 const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
832 if (Range.isFullSet())
833 return;
835 if (Range.isWrappedSet() || Range.isSignWrappedSet())
836 return;
838 bool isWrapping = Range.isSignWrappedSet();
840 unsigned BW = Range.getBitWidth();
841 const auto One = APInt(BW, 1);
842 const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin();
843 const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax();
845 auto Min = LB.sdiv(APInt(BW, ElementSize));
846 auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
848 assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
850 isl::map Relation = give(AccessRelation);
851 isl::set AccessRange = Relation.range();
852 AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0,
853 isl::dim::set);
854 AccessRelation = Relation.intersect_range(AccessRange).release();
857 void MemoryAccess::foldAccessRelation() {
858 if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1]))
859 return;
861 int Size = Subscripts.size();
863 isl::map NewAccessRelation = give(isl_map_copy(AccessRelation));
865 for (int i = Size - 2; i >= 0; --i) {
866 isl::space Space;
867 isl::map MapOne, MapTwo;
868 isl::pw_aff DimSize = give(getPwAff(Sizes[i + 1]));
870 isl::space SpaceSize = DimSize.get_space();
871 isl::id ParamId =
872 give(isl_space_get_dim_id(SpaceSize.get(), isl_dim_param, 0));
874 Space = give(isl_map_copy(AccessRelation)).get_space();
875 Space = Space.range().map_from_set();
876 Space = Space.align_params(SpaceSize);
878 int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId);
880 MapOne = isl::map::universe(Space);
881 for (int j = 0; j < Size; ++j)
882 MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j);
883 MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0);
885 MapTwo = isl::map::universe(Space);
886 for (int j = 0; j < Size; ++j)
887 if (j < i || j > i + 1)
888 MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j);
890 isl::local_space LS(Space);
891 isl::constraint C;
892 C = isl::constraint::alloc_equality(LS);
893 C = C.set_constant_si(-1);
894 C = C.set_coefficient_si(isl::dim::in, i, 1);
895 C = C.set_coefficient_si(isl::dim::out, i, -1);
896 MapTwo = MapTwo.add_constraint(C);
897 C = isl::constraint::alloc_equality(LS);
898 C = C.set_coefficient_si(isl::dim::in, i + 1, 1);
899 C = C.set_coefficient_si(isl::dim::out, i + 1, -1);
900 C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1);
901 MapTwo = MapTwo.add_constraint(C);
902 MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1);
904 MapOne = MapOne.unite(MapTwo);
905 NewAccessRelation = NewAccessRelation.apply_range(MapOne);
908 isl::id BaseAddrId = give(getScopArrayInfo()->getBasePtrId());
909 isl::space Space = give(Statement->getDomainSpace());
910 NewAccessRelation = NewAccessRelation.set_tuple_id(
911 isl::dim::in, Space.get_tuple_id(isl::dim::set));
912 NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
913 NewAccessRelation =
914 NewAccessRelation.gist_domain(give(Statement->getDomain()));
916 // Access dimension folding might in certain cases increase the number of
917 // disjuncts in the memory access, which can possibly complicate the generated
918 // run-time checks and can lead to costly compilation.
919 if (!PollyPreciseFoldAccesses &&
920 isl_map_n_basic_map(NewAccessRelation.get()) >
921 isl_map_n_basic_map(AccessRelation)) {
922 } else {
923 isl_map_free(AccessRelation);
924 AccessRelation = NewAccessRelation.release();
928 /// Check if @p Expr is divisible by @p Size.
929 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
930 assert(Size != 0);
931 if (Size == 1)
932 return true;
934 // Only one factor needs to be divisible.
935 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
936 for (auto *FactorExpr : MulExpr->operands())
937 if (isDivisible(FactorExpr, Size, SE))
938 return true;
939 return false;
942 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
943 // to be divisible.
944 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
945 for (auto *OpExpr : NAryExpr->operands())
946 if (!isDivisible(OpExpr, Size, SE))
947 return false;
948 return true;
951 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
952 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
953 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
954 return MulSCEV == Expr;
957 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
958 assert(!AccessRelation && "AccessRelation already built");
960 // Initialize the invalid domain which describes all iterations for which the
961 // access relation is not modeled correctly.
962 auto *StmtInvalidDomain = getStatement()->getInvalidDomain();
963 InvalidDomain = isl_set_empty(isl_set_get_space(StmtInvalidDomain));
964 isl_set_free(StmtInvalidDomain);
966 isl_ctx *Ctx = isl_id_get_ctx(Id);
967 isl_id *BaseAddrId = SAI->getBasePtrId();
969 if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) {
970 buildMemIntrinsicAccessRelation();
971 AccessRelation =
972 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
973 return;
976 if (!isAffine()) {
977 // We overapproximate non-affine accesses with a possible access to the
978 // whole array. For read accesses it does not make a difference, if an
979 // access must or may happen. However, for write accesses it is important to
980 // differentiate between writes that must happen and writes that may happen.
981 if (!AccessRelation)
982 AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement));
984 AccessRelation =
985 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
986 return;
989 isl_space *Space = isl_space_alloc(Ctx, 0, Statement->getNumIterators(), 0);
990 AccessRelation = isl_map_universe(Space);
992 for (int i = 0, Size = Subscripts.size(); i < Size; ++i) {
993 isl_pw_aff *Affine = getPwAff(Subscripts[i]);
994 isl_map *SubscriptMap = isl_map_from_pw_aff(Affine);
995 AccessRelation = isl_map_flat_range_product(AccessRelation, SubscriptMap);
998 Space = Statement->getDomainSpace();
999 AccessRelation = isl_map_set_tuple_id(
1000 AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set));
1001 AccessRelation =
1002 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
1004 AccessRelation = isl_map_gist_domain(AccessRelation, Statement->getDomain());
1005 isl_space_free(Space);
1008 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
1009 AccessType AccType, Value *BaseAddress,
1010 Type *ElementType, bool Affine,
1011 ArrayRef<const SCEV *> Subscripts,
1012 ArrayRef<const SCEV *> Sizes, Value *AccessValue,
1013 MemoryKind Kind)
1014 : Kind(Kind), AccType(AccType), RedType(RT_NONE), Statement(Stmt),
1015 InvalidDomain(nullptr), BaseAddr(BaseAddress), ElementType(ElementType),
1016 Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst),
1017 AccessValue(AccessValue), IsAffine(Affine),
1018 Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
1019 NewAccessRelation(nullptr), FAD(nullptr) {
1020 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1021 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1023 std::string IdName = Stmt->getBaseName() + Access;
1024 Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this);
1027 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType,
1028 __isl_take isl_map *AccRel)
1029 : Kind(MemoryKind::Array), AccType(AccType), RedType(RT_NONE),
1030 Statement(Stmt), InvalidDomain(nullptr), AccessInstruction(nullptr),
1031 IsAffine(true), AccessRelation(nullptr), NewAccessRelation(AccRel),
1032 FAD(nullptr) {
1033 auto *ArrayInfoId = isl_map_get_tuple_id(NewAccessRelation, isl_dim_out);
1034 auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
1035 Sizes.push_back(nullptr);
1036 for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++)
1037 Sizes.push_back(SAI->getDimensionSize(i));
1038 ElementType = SAI->getElementType();
1039 BaseAddr = SAI->getBasePtr();
1040 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1041 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1043 std::string IdName = Stmt->getBaseName() + Access;
1044 Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this);
1047 void MemoryAccess::realignParams() {
1048 auto *Ctx = Statement->getParent()->getContext();
1049 InvalidDomain = isl_set_gist_params(InvalidDomain, isl_set_copy(Ctx));
1050 AccessRelation = isl_map_gist_params(AccessRelation, Ctx);
1053 const std::string MemoryAccess::getReductionOperatorStr() const {
1054 return MemoryAccess::getReductionOperatorStr(getReductionType());
1057 __isl_give isl_id *MemoryAccess::getId() const { return isl_id_copy(Id); }
1059 raw_ostream &polly::operator<<(raw_ostream &OS,
1060 MemoryAccess::ReductionType RT) {
1061 if (RT == MemoryAccess::RT_NONE)
1062 OS << "NONE";
1063 else
1064 OS << MemoryAccess::getReductionOperatorStr(RT);
1065 return OS;
1068 void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; }
1070 void MemoryAccess::print(raw_ostream &OS) const {
1071 switch (AccType) {
1072 case READ:
1073 OS.indent(12) << "ReadAccess :=\t";
1074 break;
1075 case MUST_WRITE:
1076 OS.indent(12) << "MustWriteAccess :=\t";
1077 break;
1078 case MAY_WRITE:
1079 OS.indent(12) << "MayWriteAccess :=\t";
1080 break;
1083 OS << "[Reduction Type: " << getReductionType() << "] ";
1085 if (FAD) {
1086 OS << "[Fortran array descriptor: " << FAD->getName();
1087 OS << "] ";
1090 OS << "[Scalar: " << isScalarKind() << "]\n";
1091 OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1092 if (hasNewAccessRelation())
1093 OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1096 void MemoryAccess::dump() const { print(errs()); }
1098 __isl_give isl_pw_aff *MemoryAccess::getPwAff(const SCEV *E) {
1099 auto *Stmt = getStatement();
1100 PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1101 isl_set *StmtDom = isl_set_reset_tuple_id(getStatement()->getDomain());
1102 isl_set *NewInvalidDom = isl_set_intersect(StmtDom, PWAC.second);
1103 InvalidDomain = isl_set_union(InvalidDomain, NewInvalidDom);
1104 return PWAC.first;
1107 // Create a map in the size of the provided set domain, that maps from the
1108 // one element of the provided set domain to another element of the provided
1109 // set domain.
1110 // The mapping is limited to all points that are equal in all but the last
1111 // dimension and for which the last dimension of the input is strict smaller
1112 // than the last dimension of the output.
1114 // getEqualAndLarger(set[i0, i1, ..., iX]):
1116 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1117 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1119 static isl_map *getEqualAndLarger(__isl_take isl_space *setDomain) {
1120 isl_space *Space = isl_space_map_from_set(setDomain);
1121 isl_map *Map = isl_map_universe(Space);
1122 unsigned lastDimension = isl_map_dim(Map, isl_dim_in) - 1;
1124 // Set all but the last dimension to be equal for the input and output
1126 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1127 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1128 for (unsigned i = 0; i < lastDimension; ++i)
1129 Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i);
1131 // Set the last dimension of the input to be strict smaller than the
1132 // last dimension of the output.
1134 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1135 Map = isl_map_order_lt(Map, isl_dim_in, lastDimension, isl_dim_out,
1136 lastDimension);
1137 return Map;
1140 __isl_give isl_set *
1141 MemoryAccess::getStride(__isl_take const isl_map *Schedule) const {
1142 isl_map *S = const_cast<isl_map *>(Schedule);
1143 isl_map *AccessRelation = getAccessRelation();
1144 isl_space *Space = isl_space_range(isl_map_get_space(S));
1145 isl_map *NextScatt = getEqualAndLarger(Space);
1147 S = isl_map_reverse(S);
1148 NextScatt = isl_map_lexmin(NextScatt);
1150 NextScatt = isl_map_apply_range(NextScatt, isl_map_copy(S));
1151 NextScatt = isl_map_apply_range(NextScatt, isl_map_copy(AccessRelation));
1152 NextScatt = isl_map_apply_domain(NextScatt, S);
1153 NextScatt = isl_map_apply_domain(NextScatt, AccessRelation);
1155 isl_set *Deltas = isl_map_deltas(NextScatt);
1156 return Deltas;
1159 bool MemoryAccess::isStrideX(__isl_take const isl_map *Schedule,
1160 int StrideWidth) const {
1161 isl_set *Stride, *StrideX;
1162 bool IsStrideX;
1164 Stride = getStride(Schedule);
1165 StrideX = isl_set_universe(isl_set_get_space(Stride));
1166 for (unsigned i = 0; i < isl_set_dim(StrideX, isl_dim_set) - 1; i++)
1167 StrideX = isl_set_fix_si(StrideX, isl_dim_set, i, 0);
1168 StrideX = isl_set_fix_si(StrideX, isl_dim_set,
1169 isl_set_dim(StrideX, isl_dim_set) - 1, StrideWidth);
1170 IsStrideX = isl_set_is_subset(Stride, StrideX);
1172 isl_set_free(StrideX);
1173 isl_set_free(Stride);
1175 return IsStrideX;
1178 bool MemoryAccess::isStrideZero(__isl_take const isl_map *Schedule) const {
1179 return isStrideX(Schedule, 0);
1182 bool MemoryAccess::isStrideOne(__isl_take const isl_map *Schedule) const {
1183 return isStrideX(Schedule, 1);
1186 void MemoryAccess::setAccessRelation(__isl_take isl_map *NewAccess) {
1187 isl_map_free(AccessRelation);
1188 AccessRelation = NewAccess;
1191 void MemoryAccess::setNewAccessRelation(__isl_take isl_map *NewAccess) {
1192 assert(NewAccess);
1194 #ifndef NDEBUG
1195 // Check domain space compatibility.
1196 auto *NewSpace = isl_map_get_space(NewAccess);
1197 auto *NewDomainSpace = isl_space_domain(isl_space_copy(NewSpace));
1198 auto *OriginalDomainSpace = getStatement()->getDomainSpace();
1199 assert(isl_space_has_equal_tuples(OriginalDomainSpace, NewDomainSpace));
1200 isl_space_free(NewDomainSpace);
1201 isl_space_free(OriginalDomainSpace);
1203 // Reads must be executed unconditionally. Writes might be executed in a
1204 // subdomain only.
1205 if (isRead()) {
1206 // Check whether there is an access for every statement instance.
1207 auto *StmtDomain = getStatement()->getDomain();
1208 StmtDomain = isl_set_intersect_params(
1209 StmtDomain, getStatement()->getParent()->getContext());
1210 auto *NewDomain = isl_map_domain(isl_map_copy(NewAccess));
1211 assert(isl_set_is_subset(StmtDomain, NewDomain) &&
1212 "Partial READ accesses not supported");
1213 isl_set_free(NewDomain);
1214 isl_set_free(StmtDomain);
1217 auto *NewAccessSpace = isl_space_range(NewSpace);
1218 assert(isl_space_has_tuple_id(NewAccessSpace, isl_dim_set) &&
1219 "Must specify the array that is accessed");
1220 auto *NewArrayId = isl_space_get_tuple_id(NewAccessSpace, isl_dim_set);
1221 auto *SAI = static_cast<ScopArrayInfo *>(isl_id_get_user(NewArrayId));
1222 assert(SAI && "Must set a ScopArrayInfo");
1224 if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1225 InvariantEquivClassTy *EqClass =
1226 getStatement()->getParent()->lookupInvariantEquivClass(
1227 SAI->getBasePtr());
1228 assert(EqClass &&
1229 "Access functions to indirect arrays must have an invariant and "
1230 "hoisted base pointer");
1233 // Check whether access dimensions correspond to number of dimensions of the
1234 // accesses array.
1235 auto Dims = SAI->getNumberOfDimensions();
1236 assert(isl_space_dim(NewAccessSpace, isl_dim_set) == Dims &&
1237 "Access dims must match array dims");
1238 isl_space_free(NewAccessSpace);
1239 isl_id_free(NewArrayId);
1240 #endif
1242 isl_map_free(NewAccessRelation);
1243 NewAccess = isl_map_gist_domain(NewAccess, getStatement()->getDomain());
1244 NewAccessRelation = NewAccess;
1247 bool MemoryAccess::isLatestPartialAccess() const {
1248 isl::set StmtDom = give(getStatement()->getDomain());
1249 isl::set AccDom = give(isl_map_domain(getLatestAccessRelation()));
1251 return isl_set_is_subset(StmtDom.keep(), AccDom.keep()) == isl_bool_false;
1254 //===----------------------------------------------------------------------===//
1256 __isl_give isl_map *ScopStmt::getSchedule() const {
1257 isl_set *Domain = getDomain();
1258 if (isl_set_is_empty(Domain)) {
1259 isl_set_free(Domain);
1260 return isl_map_from_aff(
1261 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1263 auto *Schedule = getParent()->getSchedule();
1264 if (!Schedule) {
1265 isl_set_free(Domain);
1266 return nullptr;
1268 Schedule = isl_union_map_intersect_domain(
1269 Schedule, isl_union_set_from_set(isl_set_copy(Domain)));
1270 if (isl_union_map_is_empty(Schedule)) {
1271 isl_set_free(Domain);
1272 isl_union_map_free(Schedule);
1273 return isl_map_from_aff(
1274 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1276 auto *M = isl_map_from_union_map(Schedule);
1277 M = isl_map_coalesce(M);
1278 M = isl_map_gist_domain(M, Domain);
1279 M = isl_map_coalesce(M);
1280 return M;
1283 void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) {
1284 assert(isl_set_is_subset(NewDomain, Domain) &&
1285 "New domain is not a subset of old domain!");
1286 isl_set_free(Domain);
1287 Domain = NewDomain;
1290 void ScopStmt::buildAccessRelations() {
1291 Scop &S = *getParent();
1292 for (MemoryAccess *Access : MemAccs) {
1293 Type *ElementType = Access->getElementType();
1295 MemoryKind Ty;
1296 if (Access->isPHIKind())
1297 Ty = MemoryKind::PHI;
1298 else if (Access->isExitPHIKind())
1299 Ty = MemoryKind::ExitPHI;
1300 else if (Access->isValueKind())
1301 Ty = MemoryKind::Value;
1302 else
1303 Ty = MemoryKind::Array;
1305 auto *SAI = S.getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1306 ElementType, Access->Sizes, Ty);
1307 Access->buildAccessRelation(SAI);
1311 MemoryAccess *ScopStmt::lookupPHIReadOf(PHINode *PHI) const {
1312 for (auto *MA : *this) {
1313 if (!MA->isRead())
1314 continue;
1315 if (!MA->isLatestAnyPHIKind())
1316 continue;
1318 if (MA->getAccessInstruction() == PHI)
1319 return MA;
1321 return nullptr;
1324 void ScopStmt::addAccess(MemoryAccess *Access) {
1325 Instruction *AccessInst = Access->getAccessInstruction();
1327 if (Access->isArrayKind()) {
1328 MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1329 MAL.emplace_front(Access);
1330 } else if (Access->isValueKind() && Access->isWrite()) {
1331 Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1332 assert(Parent.getStmtFor(AccessVal) == this);
1333 assert(!ValueWrites.lookup(AccessVal));
1335 ValueWrites[AccessVal] = Access;
1336 } else if (Access->isValueKind() && Access->isRead()) {
1337 Value *AccessVal = Access->getAccessValue();
1338 assert(!ValueReads.lookup(AccessVal));
1340 ValueReads[AccessVal] = Access;
1341 } else if (Access->isAnyPHIKind() && Access->isWrite()) {
1342 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1343 assert(!PHIWrites.lookup(PHI));
1345 PHIWrites[PHI] = Access;
1348 MemAccs.push_back(Access);
1351 void ScopStmt::realignParams() {
1352 for (MemoryAccess *MA : *this)
1353 MA->realignParams();
1355 auto *Ctx = Parent.getContext();
1356 InvalidDomain = isl_set_gist_params(InvalidDomain, isl_set_copy(Ctx));
1357 Domain = isl_set_gist_params(Domain, Ctx);
1360 /// Add @p BSet to the set @p User if @p BSet is bounded.
1361 static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet,
1362 void *User) {
1363 isl_set **BoundedParts = static_cast<isl_set **>(User);
1364 if (isl_basic_set_is_bounded(BSet))
1365 *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet));
1366 else
1367 isl_basic_set_free(BSet);
1368 return isl_stat_ok;
1371 /// Return the bounded parts of @p S.
1372 static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) {
1373 isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S));
1374 isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts);
1375 isl_set_free(S);
1376 return BoundedParts;
1379 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1381 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1382 /// both with regards to the dimension @p Dim.
1383 static std::pair<__isl_give isl_set *, __isl_give isl_set *>
1384 partitionSetParts(__isl_take isl_set *S, unsigned Dim) {
1386 for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++)
1387 S = isl_set_lower_bound_si(S, isl_dim_set, u, 0);
1389 unsigned NumDimsS = isl_set_n_dim(S);
1390 isl_set *OnlyDimS = isl_set_copy(S);
1392 // Remove dimensions that are greater than Dim as they are not interesting.
1393 assert(NumDimsS >= Dim + 1);
1394 OnlyDimS =
1395 isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1);
1397 // Create artificial parametric upper bounds for dimensions smaller than Dim
1398 // as we are not interested in them.
1399 OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim);
1400 for (unsigned u = 0; u < Dim; u++) {
1401 isl_constraint *C = isl_inequality_alloc(
1402 isl_local_space_from_space(isl_set_get_space(OnlyDimS)));
1403 C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1);
1404 C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1);
1405 OnlyDimS = isl_set_add_constraint(OnlyDimS, C);
1408 // Collect all bounded parts of OnlyDimS.
1409 isl_set *BoundedParts = collectBoundedParts(OnlyDimS);
1411 // Create the dimensions greater than Dim again.
1412 BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1,
1413 NumDimsS - Dim - 1);
1415 // Remove the artificial upper bound parameters again.
1416 BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim);
1418 isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts));
1419 return std::make_pair(UnboundedParts, BoundedParts);
1422 /// Set the dimension Ids from @p From in @p To.
1423 static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From,
1424 __isl_take isl_set *To) {
1425 for (unsigned u = 0, e = isl_set_n_dim(From); u < e; u++) {
1426 isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u);
1427 To = isl_set_set_dim_id(To, isl_dim_set, u, DimId);
1429 return To;
1432 /// Create the conditions under which @p L @p Pred @p R is true.
1433 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1434 __isl_take isl_pw_aff *L,
1435 __isl_take isl_pw_aff *R) {
1436 switch (Pred) {
1437 case ICmpInst::ICMP_EQ:
1438 return isl_pw_aff_eq_set(L, R);
1439 case ICmpInst::ICMP_NE:
1440 return isl_pw_aff_ne_set(L, R);
1441 case ICmpInst::ICMP_SLT:
1442 return isl_pw_aff_lt_set(L, R);
1443 case ICmpInst::ICMP_SLE:
1444 return isl_pw_aff_le_set(L, R);
1445 case ICmpInst::ICMP_SGT:
1446 return isl_pw_aff_gt_set(L, R);
1447 case ICmpInst::ICMP_SGE:
1448 return isl_pw_aff_ge_set(L, R);
1449 case ICmpInst::ICMP_ULT:
1450 return isl_pw_aff_lt_set(L, R);
1451 case ICmpInst::ICMP_UGT:
1452 return isl_pw_aff_gt_set(L, R);
1453 case ICmpInst::ICMP_ULE:
1454 return isl_pw_aff_le_set(L, R);
1455 case ICmpInst::ICMP_UGE:
1456 return isl_pw_aff_ge_set(L, R);
1457 default:
1458 llvm_unreachable("Non integer predicate not supported");
1462 /// Create the conditions under which @p L @p Pred @p R is true.
1464 /// Helper function that will make sure the dimensions of the result have the
1465 /// same isl_id's as the @p Domain.
1466 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1467 __isl_take isl_pw_aff *L,
1468 __isl_take isl_pw_aff *R,
1469 __isl_keep isl_set *Domain) {
1470 isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R);
1471 return setDimensionIds(Domain, ConsequenceCondSet);
1474 /// Compute the isl representation for the SCEV @p E in this BB.
1476 /// @param S The Scop in which @p BB resides in.
1477 /// @param BB The BB for which isl representation is to be
1478 /// computed.
1479 /// @param InvalidDomainMap A map of BB to their invalid domains.
1480 /// @param E The SCEV that should be translated.
1481 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1483 /// Note that this function will also adjust the invalid context accordingly.
1485 __isl_give isl_pw_aff *
1486 getPwAff(Scop &S, BasicBlock *BB,
1487 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, const SCEV *E,
1488 bool NonNegative = false) {
1489 PWACtx PWAC = S.getPwAff(E, BB, NonNegative);
1490 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(isl::manage(PWAC.second));
1491 return PWAC.first;
1494 /// Build the conditions sets for the switch @p SI in the @p Domain.
1496 /// This will fill @p ConditionSets with the conditions under which control
1497 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1498 /// have as many elements as @p SI has successors.
1499 static bool
1500 buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L,
1501 __isl_keep isl_set *Domain,
1502 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1503 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1505 Value *Condition = getConditionFromTerminator(SI);
1506 assert(Condition && "No condition for switch");
1508 ScalarEvolution &SE = *S.getSE();
1509 isl_pw_aff *LHS, *RHS;
1510 LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
1512 unsigned NumSuccessors = SI->getNumSuccessors();
1513 ConditionSets.resize(NumSuccessors);
1514 for (auto &Case : SI->cases()) {
1515 unsigned Idx = Case.getSuccessorIndex();
1516 ConstantInt *CaseValue = Case.getCaseValue();
1518 RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue));
1519 isl_set *CaseConditionSet =
1520 buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain);
1521 ConditionSets[Idx] = isl_set_coalesce(
1522 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1525 assert(ConditionSets[0] == nullptr && "Default condition set was set");
1526 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1527 for (unsigned u = 2; u < NumSuccessors; u++)
1528 ConditionSetUnion =
1529 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1530 ConditionSets[0] = setDimensionIds(
1531 Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion));
1533 isl_pw_aff_free(LHS);
1535 return true;
1538 /// Build the conditions sets for the branch condition @p Condition in
1539 /// the @p Domain.
1541 /// This will fill @p ConditionSets with the conditions under which control
1542 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1543 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1544 /// context under which @p Condition is true/false will be returned as the
1545 /// new elements of @p ConditionSets.
1546 static bool
1547 buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1548 TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain,
1549 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1550 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1552 isl_set *ConsequenceCondSet = nullptr;
1553 if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
1554 if (CCond->isZero())
1555 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1556 else
1557 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1558 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
1559 auto Opcode = BinOp->getOpcode();
1560 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1562 bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain,
1563 InvalidDomainMap, ConditionSets) &&
1564 buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain,
1565 InvalidDomainMap, ConditionSets);
1566 if (!Valid) {
1567 while (!ConditionSets.empty())
1568 isl_set_free(ConditionSets.pop_back_val());
1569 return false;
1572 isl_set_free(ConditionSets.pop_back_val());
1573 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1574 isl_set_free(ConditionSets.pop_back_val());
1575 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1577 if (Opcode == Instruction::And)
1578 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1579 else
1580 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1581 } else {
1582 auto *ICond = dyn_cast<ICmpInst>(Condition);
1583 assert(ICond &&
1584 "Condition of exiting branch was neither constant nor ICmp!");
1586 ScalarEvolution &SE = *S.getSE();
1587 isl_pw_aff *LHS, *RHS;
1588 // For unsigned comparisons we assumed the signed bit of neither operand
1589 // to be set. The comparison is equal to a signed comparison under this
1590 // assumption.
1591 bool NonNeg = ICond->isUnsigned();
1592 LHS = getPwAff(S, BB, InvalidDomainMap,
1593 SE.getSCEVAtScope(ICond->getOperand(0), L), NonNeg);
1594 RHS = getPwAff(S, BB, InvalidDomainMap,
1595 SE.getSCEVAtScope(ICond->getOperand(1), L), NonNeg);
1596 ConsequenceCondSet =
1597 buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain);
1600 // If no terminator was given we are only looking for parameter constraints
1601 // under which @p Condition is true/false.
1602 if (!TI)
1603 ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1604 assert(ConsequenceCondSet);
1605 ConsequenceCondSet = isl_set_coalesce(
1606 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1608 isl_set *AlternativeCondSet = nullptr;
1609 bool TooComplex =
1610 isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1612 if (!TooComplex) {
1613 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1614 isl_set_copy(ConsequenceCondSet));
1615 TooComplex =
1616 isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1619 if (TooComplex) {
1620 S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
1621 TI ? TI->getParent() : nullptr /* BasicBlock */);
1622 isl_set_free(AlternativeCondSet);
1623 isl_set_free(ConsequenceCondSet);
1624 return false;
1627 ConditionSets.push_back(ConsequenceCondSet);
1628 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1630 return true;
1633 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1635 /// This will fill @p ConditionSets with the conditions under which control
1636 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1637 /// have as many elements as @p TI has successors.
1638 static bool
1639 buildConditionSets(Scop &S, BasicBlock *BB, TerminatorInst *TI, Loop *L,
1640 __isl_keep isl_set *Domain,
1641 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1642 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1644 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1645 return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap,
1646 ConditionSets);
1648 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1650 if (TI->getNumSuccessors() == 1) {
1651 ConditionSets.push_back(isl_set_copy(Domain));
1652 return true;
1655 Value *Condition = getConditionFromTerminator(TI);
1656 assert(Condition && "No condition for Terminator");
1658 return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap,
1659 ConditionSets);
1662 void ScopStmt::buildDomain() {
1663 isl_id *Id = isl_id_alloc(getIslCtx(), getBaseName(), this);
1665 Domain = getParent()->getDomainConditions(this);
1666 Domain = isl_set_set_tuple_id(Domain, Id);
1669 void ScopStmt::collectSurroundingLoops() {
1670 for (unsigned u = 0, e = isl_set_n_dim(Domain); u < e; u++) {
1671 isl_id *DimId = isl_set_get_dim_id(Domain, isl_dim_set, u);
1672 NestLoops.push_back(static_cast<Loop *>(isl_id_get_user(DimId)));
1673 isl_id_free(DimId);
1677 ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop)
1678 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(nullptr),
1679 R(&R), Build(nullptr), SurroundingLoop(SurroundingLoop) {
1681 BaseName = getIslCompatibleName(
1682 "Stmt", R.getNameStr(), parent.getNextStmtIdx(), "", UseInstructionNames);
1685 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, Loop *SurroundingLoop,
1686 std::vector<Instruction *> Instructions)
1687 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1688 R(nullptr), Build(nullptr), SurroundingLoop(SurroundingLoop),
1689 Instructions(Instructions) {
1691 BaseName = getIslCompatibleName("Stmt", &bb, parent.getNextStmtIdx(), "",
1692 UseInstructionNames);
1695 ScopStmt::ScopStmt(Scop &parent, __isl_take isl_map *SourceRel,
1696 __isl_take isl_map *TargetRel, __isl_take isl_set *NewDomain)
1697 : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain), BB(nullptr),
1698 R(nullptr), Build(nullptr) {
1699 BaseName = getIslCompatibleName("CopyStmt_", "",
1700 std::to_string(parent.getCopyStmtsNum()));
1701 auto *Id = isl_id_alloc(getIslCtx(), getBaseName(), this);
1702 Domain = isl_set_set_tuple_id(Domain, isl_id_copy(Id));
1703 TargetRel = isl_map_set_tuple_id(TargetRel, isl_dim_in, Id);
1704 auto *Access =
1705 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1706 parent.addAccessFunction(Access);
1707 addAccess(Access);
1708 SourceRel = isl_map_set_tuple_id(SourceRel, isl_dim_in, isl_id_copy(Id));
1709 Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1710 parent.addAccessFunction(Access);
1711 addAccess(Access);
1714 void ScopStmt::init(LoopInfo &LI) {
1715 assert(!Domain && "init must be called only once");
1717 buildDomain();
1718 collectSurroundingLoops();
1719 buildAccessRelations();
1721 if (DetectReductions)
1722 checkForReductions();
1725 /// Collect loads which might form a reduction chain with @p StoreMA.
1727 /// Check if the stored value for @p StoreMA is a binary operator with one or
1728 /// two loads as operands. If the binary operand is commutative & associative,
1729 /// used only once (by @p StoreMA) and its load operands are also used only
1730 /// once, we have found a possible reduction chain. It starts at an operand
1731 /// load and includes the binary operator and @p StoreMA.
1733 /// Note: We allow only one use to ensure the load and binary operator cannot
1734 /// escape this block or into any other store except @p StoreMA.
1735 void ScopStmt::collectCandiateReductionLoads(
1736 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1737 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1738 if (!Store)
1739 return;
1741 // Skip if there is not one binary operator between the load and the store
1742 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1743 if (!BinOp)
1744 return;
1746 // Skip if the binary operators has multiple uses
1747 if (BinOp->getNumUses() != 1)
1748 return;
1750 // Skip if the opcode of the binary operator is not commutative/associative
1751 if (!BinOp->isCommutative() || !BinOp->isAssociative())
1752 return;
1754 // Skip if the binary operator is outside the current SCoP
1755 if (BinOp->getParent() != Store->getParent())
1756 return;
1758 // Skip if it is a multiplicative reduction and we disabled them
1759 if (DisableMultiplicativeReductions &&
1760 (BinOp->getOpcode() == Instruction::Mul ||
1761 BinOp->getOpcode() == Instruction::FMul))
1762 return;
1764 // Check the binary operator operands for a candidate load
1765 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1766 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1767 if (!PossibleLoad0 && !PossibleLoad1)
1768 return;
1770 // A load is only a candidate if it cannot escape (thus has only this use)
1771 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1772 if (PossibleLoad0->getParent() == Store->getParent())
1773 Loads.push_back(&getArrayAccessFor(PossibleLoad0));
1774 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1775 if (PossibleLoad1->getParent() == Store->getParent())
1776 Loads.push_back(&getArrayAccessFor(PossibleLoad1));
1779 /// Check for reductions in this ScopStmt.
1781 /// Iterate over all store memory accesses and check for valid binary reduction
1782 /// like chains. For all candidates we check if they have the same base address
1783 /// and there are no other accesses which overlap with them. The base address
1784 /// check rules out impossible reductions candidates early. The overlap check,
1785 /// together with the "only one user" check in collectCandiateReductionLoads,
1786 /// guarantees that none of the intermediate results will escape during
1787 /// execution of the loop nest. We basically check here that no other memory
1788 /// access can access the same memory as the potential reduction.
1789 void ScopStmt::checkForReductions() {
1790 SmallVector<MemoryAccess *, 2> Loads;
1791 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1793 // First collect candidate load-store reduction chains by iterating over all
1794 // stores and collecting possible reduction loads.
1795 for (MemoryAccess *StoreMA : MemAccs) {
1796 if (StoreMA->isRead())
1797 continue;
1799 Loads.clear();
1800 collectCandiateReductionLoads(StoreMA, Loads);
1801 for (MemoryAccess *LoadMA : Loads)
1802 Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1805 // Then check each possible candidate pair.
1806 for (const auto &CandidatePair : Candidates) {
1807 bool Valid = true;
1808 isl_map *LoadAccs = CandidatePair.first->getAccessRelation();
1809 isl_map *StoreAccs = CandidatePair.second->getAccessRelation();
1811 // Skip those with obviously unequal base addresses.
1812 if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) {
1813 isl_map_free(LoadAccs);
1814 isl_map_free(StoreAccs);
1815 continue;
1818 // And check if the remaining for overlap with other memory accesses.
1819 isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs);
1820 AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain());
1821 isl_set *AllAccs = isl_map_range(AllAccsRel);
1823 for (MemoryAccess *MA : MemAccs) {
1824 if (MA == CandidatePair.first || MA == CandidatePair.second)
1825 continue;
1827 isl_map *AccRel =
1828 isl_map_intersect_domain(MA->getAccessRelation(), getDomain());
1829 isl_set *Accs = isl_map_range(AccRel);
1831 if (isl_set_has_equal_space(AllAccs, Accs)) {
1832 isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs));
1833 Valid = Valid && isl_set_is_empty(OverlapAccs);
1834 isl_set_free(OverlapAccs);
1835 } else {
1836 isl_set_free(Accs);
1840 isl_set_free(AllAccs);
1841 if (!Valid)
1842 continue;
1844 const LoadInst *Load =
1845 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1846 MemoryAccess::ReductionType RT =
1847 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1849 // If no overlapping access was found we mark the load and store as
1850 // reduction like.
1851 CandidatePair.first->markAsReductionLike(RT);
1852 CandidatePair.second->markAsReductionLike(RT);
1856 std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); }
1858 std::string ScopStmt::getScheduleStr() const {
1859 auto *S = getSchedule();
1860 if (!S)
1861 return "";
1862 auto Str = stringFromIslObj(S);
1863 isl_map_free(S);
1864 return Str;
1867 void ScopStmt::setInvalidDomain(__isl_take isl_set *ID) {
1868 isl_set_free(InvalidDomain);
1869 InvalidDomain = ID;
1872 BasicBlock *ScopStmt::getEntryBlock() const {
1873 if (isBlockStmt())
1874 return getBasicBlock();
1875 return getRegion()->getEntry();
1878 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1880 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1882 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1883 return NestLoops[Dimension];
1886 isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1888 __isl_give isl_set *ScopStmt::getDomain() const { return isl_set_copy(Domain); }
1890 __isl_give isl_space *ScopStmt::getDomainSpace() const {
1891 return isl_set_get_space(Domain);
1894 __isl_give isl_id *ScopStmt::getDomainId() const {
1895 return isl_set_get_tuple_id(Domain);
1898 ScopStmt::~ScopStmt() {
1899 isl_set_free(Domain);
1900 isl_set_free(InvalidDomain);
1903 void ScopStmt::printInstructions(raw_ostream &OS) const {
1904 OS << "Instructions {\n";
1906 for (Instruction *Inst : Instructions)
1907 OS.indent(16) << *Inst << "\n";
1909 OS.indent(16) << "}\n";
1912 void ScopStmt::print(raw_ostream &OS) const {
1913 OS << "\t" << getBaseName() << "\n";
1914 OS.indent(12) << "Domain :=\n";
1916 if (Domain) {
1917 OS.indent(16) << getDomainStr() << ";\n";
1918 } else
1919 OS.indent(16) << "n/a\n";
1921 OS.indent(12) << "Schedule :=\n";
1923 if (Domain) {
1924 OS.indent(16) << getScheduleStr() << ";\n";
1925 } else
1926 OS.indent(16) << "n/a\n";
1928 for (MemoryAccess *Access : MemAccs)
1929 Access->print(OS);
1931 if (PollyPrintInstructions)
1932 printInstructions(OS.indent(12));
1935 void ScopStmt::dump() const { print(dbgs()); }
1937 void ScopStmt::removeAccessData(MemoryAccess *MA) {
1938 if (MA->isRead() && MA->isOriginalValueKind()) {
1939 bool Found = ValueReads.erase(MA->getAccessValue());
1940 (void)Found;
1941 assert(Found && "Expected access data not found");
1943 if (MA->isWrite() && MA->isOriginalValueKind()) {
1944 bool Found = ValueWrites.erase(cast<Instruction>(MA->getAccessValue()));
1945 (void)Found;
1946 assert(Found && "Expected access data not found");
1948 if (MA->isWrite() && MA->isOriginalAnyPHIKind()) {
1949 bool Found = PHIWrites.erase(cast<PHINode>(MA->getAccessInstruction()));
1950 (void)Found;
1951 assert(Found && "Expected access data not found");
1955 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
1956 // Remove the memory accesses from this statement together with all scalar
1957 // accesses that were caused by it. MemoryKind::Value READs have no access
1958 // instruction, hence would not be removed by this function. However, it is
1959 // only used for invariant LoadInst accesses, its arguments are always affine,
1960 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1961 // accesses to be removed.
1962 auto Predicate = [&](MemoryAccess *Acc) {
1963 return Acc->getAccessInstruction() == MA->getAccessInstruction();
1965 for (auto *MA : MemAccs) {
1966 if (Predicate(MA))
1967 removeAccessData(MA);
1969 MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
1970 MemAccs.end());
1971 InstructionToAccess.erase(MA->getAccessInstruction());
1974 void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) {
1975 auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
1976 assert(MAIt != MemAccs.end());
1977 MemAccs.erase(MAIt);
1979 removeAccessData(MA);
1981 auto It = InstructionToAccess.find(MA->getAccessInstruction());
1982 if (It != InstructionToAccess.end()) {
1983 It->second.remove(MA);
1984 if (It->second.empty())
1985 InstructionToAccess.erase(MA->getAccessInstruction());
1989 //===----------------------------------------------------------------------===//
1990 /// Scop class implement
1992 void Scop::setContext(__isl_take isl_set *NewContext) {
1993 NewContext = isl_set_align_params(NewContext, isl_set_get_space(Context));
1994 isl_set_free(Context);
1995 Context = NewContext;
1998 namespace {
1999 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
2000 struct SCEVSensitiveParameterRewriter
2001 : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
2002 ValueToValueMap &VMap;
2004 public:
2005 SCEVSensitiveParameterRewriter(ValueToValueMap &VMap, ScalarEvolution &SE)
2006 : SCEVRewriteVisitor(SE), VMap(VMap) {}
2008 static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
2009 ValueToValueMap &VMap) {
2010 SCEVSensitiveParameterRewriter SSPR(VMap, SE);
2011 return SSPR.visit(E);
2014 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
2015 auto *Start = visit(E->getStart());
2016 auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
2017 visit(E->getStepRecurrence(SE)),
2018 E->getLoop(), SCEV::FlagAnyWrap);
2019 return SE.getAddExpr(Start, AddRec);
2022 const SCEV *visitUnknown(const SCEVUnknown *E) {
2023 if (auto *NewValue = VMap.lookup(E->getValue()))
2024 return SE.getUnknown(NewValue);
2025 return E;
2029 /// Check whether we should remap a SCEV expression.
2030 struct SCEVFindInsideScop : public SCEVTraversal<SCEVFindInsideScop> {
2031 ValueToValueMap &VMap;
2032 bool FoundInside = false;
2033 Scop *S;
2035 public:
2036 SCEVFindInsideScop(ValueToValueMap &VMap, ScalarEvolution &SE, Scop *S)
2037 : SCEVTraversal(*this), VMap(VMap), S(S) {}
2039 static bool hasVariant(const SCEV *E, ScalarEvolution &SE,
2040 ValueToValueMap &VMap, Scop *S) {
2041 SCEVFindInsideScop SFIS(VMap, SE, S);
2042 SFIS.visitAll(E);
2043 return SFIS.FoundInside;
2046 bool follow(const SCEV *E) {
2047 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E)) {
2048 FoundInside |= S->getRegion().contains(AddRec->getLoop());
2049 } else if (auto *Unknown = dyn_cast<SCEVUnknown>(E)) {
2050 if (Instruction *I = dyn_cast<Instruction>(Unknown->getValue()))
2051 FoundInside |= S->getRegion().contains(I) && !VMap.count(I);
2053 return !FoundInside;
2055 bool isDone() { return FoundInside; }
2057 } // namespace
2059 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) {
2060 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
2061 // doesn't like addition between an AddRec and an expression that
2062 // doesn't have a dominance relationship with it.)
2063 if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this))
2064 return E;
2066 // Rewrite SCEV.
2067 return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
2070 // This table of function names is used to translate parameter names in more
2071 // human-readable names. This makes it easier to interpret Polly analysis
2072 // results.
2073 StringMap<std::string> KnownNames = {
2074 {"_Z13get_global_idj", "global_id"},
2075 {"_Z12get_local_idj", "local_id"},
2076 {"_Z15get_global_sizej", "global_size"},
2077 {"_Z14get_local_sizej", "local_size"},
2078 {"_Z12get_work_dimv", "work_dim"},
2079 {"_Z17get_global_offsetj", "global_offset"},
2080 {"_Z12get_group_idj", "group_id"},
2081 {"_Z14get_num_groupsj", "num_groups"},
2084 static std::string getCallParamName(CallInst *Call) {
2085 std::string Result;
2086 raw_string_ostream OS(Result);
2087 std::string Name = Call->getCalledFunction()->getName();
2089 auto Iterator = KnownNames.find(Name);
2090 if (Iterator != KnownNames.end())
2091 Name = "__" + Iterator->getValue();
2092 OS << Name;
2093 for (auto &Operand : Call->arg_operands()) {
2094 ConstantInt *Op = cast<ConstantInt>(&Operand);
2095 OS << "_" << Op->getValue();
2097 OS.flush();
2098 return Result;
2101 void Scop::createParameterId(const SCEV *Parameter) {
2102 assert(Parameters.count(Parameter));
2103 assert(!ParameterIds.count(Parameter));
2105 std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
2107 if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
2108 Value *Val = ValueParameter->getValue();
2109 CallInst *Call = dyn_cast<CallInst>(Val);
2111 if (Call && isConstCall(Call)) {
2112 ParameterName = getCallParamName(Call);
2113 } else if (UseInstructionNames) {
2114 // If this parameter references a specific Value and this value has a name
2115 // we use this name as it is likely to be unique and more useful than just
2116 // a number.
2117 if (Val->hasName())
2118 ParameterName = Val->getName();
2119 else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) {
2120 auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
2121 if (LoadOrigin->hasName()) {
2122 ParameterName += "_loaded_from_";
2123 ParameterName +=
2124 LI->getPointerOperand()->stripInBoundsOffsets()->getName();
2129 ParameterName = getIslCompatibleName("", ParameterName, "");
2132 auto *Id = isl_id_alloc(getIslCtx(), ParameterName.c_str(),
2133 const_cast<void *>((const void *)Parameter));
2134 ParameterIds[Parameter] = Id;
2137 void Scop::addParams(const ParameterSetTy &NewParameters) {
2138 for (const SCEV *Parameter : NewParameters) {
2139 // Normalize the SCEV to get the representing element for an invariant load.
2140 Parameter = extractConstantFactor(Parameter, *SE).second;
2141 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2143 if (Parameters.insert(Parameter))
2144 createParameterId(Parameter);
2148 __isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) {
2149 // Normalize the SCEV to get the representing element for an invariant load.
2150 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2151 return isl_id_copy(ParameterIds.lookup(Parameter));
2154 __isl_give isl_set *
2155 Scop::addNonEmptyDomainConstraints(__isl_take isl_set *C) const {
2156 isl_set *DomainContext = isl_union_set_params(getDomains());
2157 return isl_set_intersect_params(C, DomainContext);
2160 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
2161 return DT.dominates(BB, getEntry());
2164 void Scop::addUserAssumptions(
2165 AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
2166 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2167 for (auto &Assumption : AC.assumptions()) {
2168 auto *CI = dyn_cast_or_null<CallInst>(Assumption);
2169 if (!CI || CI->getNumArgOperands() != 1)
2170 continue;
2172 bool InScop = contains(CI);
2173 if (!InScop && !isDominatedBy(DT, CI->getParent()))
2174 continue;
2176 auto *L = LI.getLoopFor(CI->getParent());
2177 auto *Val = CI->getArgOperand(0);
2178 ParameterSetTy DetectedParams;
2179 if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) {
2180 ORE.emit(
2181 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
2182 << "Non-affine user assumption ignored.");
2183 continue;
2186 // Collect all newly introduced parameters.
2187 ParameterSetTy NewParams;
2188 for (auto *Param : DetectedParams) {
2189 Param = extractConstantFactor(Param, *SE).second;
2190 Param = getRepresentingInvariantLoadSCEV(Param);
2191 if (Parameters.count(Param))
2192 continue;
2193 NewParams.insert(Param);
2196 SmallVector<isl_set *, 2> ConditionSets;
2197 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
2198 auto &Stmt = InScop ? *getStmtFor(CI->getParent()) : *Stmts.begin();
2199 auto *Dom = InScop ? getDomainConditions(&Stmt) : isl_set_copy(Context);
2200 bool Valid = buildConditionSets(*this, Stmt.getEntryBlock(), Val, TI, L,
2201 Dom, InvalidDomainMap, ConditionSets);
2202 isl_set_free(Dom);
2204 if (!Valid)
2205 continue;
2207 isl_set *AssumptionCtx = nullptr;
2208 if (InScop) {
2209 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
2210 isl_set_free(ConditionSets[0]);
2211 } else {
2212 AssumptionCtx = isl_set_complement(ConditionSets[1]);
2213 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
2216 // Project out newly introduced parameters as they are not otherwise useful.
2217 if (!NewParams.empty()) {
2218 for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
2219 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2220 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2221 isl_id_free(Id);
2223 if (!NewParams.count(Param))
2224 continue;
2226 AssumptionCtx =
2227 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2230 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
2231 << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
2232 Context = isl_set_intersect(Context, AssumptionCtx);
2236 void Scop::addUserContext() {
2237 if (UserContextStr.empty())
2238 return;
2240 isl_set *UserContext =
2241 isl_set_read_from_str(getIslCtx(), UserContextStr.c_str());
2242 isl_space *Space = getParamSpace();
2243 if (isl_space_dim(Space, isl_dim_param) !=
2244 isl_set_dim(UserContext, isl_dim_param)) {
2245 auto SpaceStr = isl_space_to_str(Space);
2246 errs() << "Error: the context provided in -polly-context has not the same "
2247 << "number of dimensions than the computed context. Due to this "
2248 << "mismatch, the -polly-context option is ignored. Please provide "
2249 << "the context in the parameter space: " << SpaceStr << ".\n";
2250 free(SpaceStr);
2251 isl_set_free(UserContext);
2252 isl_space_free(Space);
2253 return;
2256 for (unsigned i = 0; i < isl_space_dim(Space, isl_dim_param); i++) {
2257 auto *NameContext = isl_set_get_dim_name(Context, isl_dim_param, i);
2258 auto *NameUserContext = isl_set_get_dim_name(UserContext, isl_dim_param, i);
2260 if (strcmp(NameContext, NameUserContext) != 0) {
2261 auto SpaceStr = isl_space_to_str(Space);
2262 errs() << "Error: the name of dimension " << i
2263 << " provided in -polly-context "
2264 << "is '" << NameUserContext << "', but the name in the computed "
2265 << "context is '" << NameContext
2266 << "'. Due to this name mismatch, "
2267 << "the -polly-context option is ignored. Please provide "
2268 << "the context in the parameter space: " << SpaceStr << ".\n";
2269 free(SpaceStr);
2270 isl_set_free(UserContext);
2271 isl_space_free(Space);
2272 return;
2275 UserContext =
2276 isl_set_set_dim_id(UserContext, isl_dim_param, i,
2277 isl_space_get_dim_id(Space, isl_dim_param, i));
2280 Context = isl_set_intersect(Context, UserContext);
2281 isl_space_free(Space);
2284 void Scop::buildInvariantEquivalenceClasses() {
2285 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2287 const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2288 for (LoadInst *LInst : RIL) {
2289 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2291 Type *Ty = LInst->getType();
2292 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2293 if (ClassRep) {
2294 InvEquivClassVMap[LInst] = ClassRep;
2295 continue;
2298 ClassRep = LInst;
2299 InvariantEquivClasses.emplace_back(
2300 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2304 void Scop::buildContext() {
2305 isl_space *Space = isl_space_params_alloc(getIslCtx(), 0);
2306 Context = isl_set_universe(isl_space_copy(Space));
2307 InvalidContext = isl_set_empty(isl_space_copy(Space));
2308 AssumedContext = isl_set_universe(Space);
2311 void Scop::addParameterBounds() {
2312 unsigned PDim = 0;
2313 for (auto *Parameter : Parameters) {
2314 ConstantRange SRange = SE->getSignedRange(Parameter);
2315 Context =
2316 addRangeBoundsToSet(give(Context), SRange, PDim++, isl::dim::param)
2317 .release();
2321 // We use the outermost dimension to generate GPU transfers for Fortran arrays
2322 // even when the array bounds are not known statically. To do so, we need the
2323 // outermost dimension information. We add this into the context so that the
2324 // outermost dimension is available during codegen.
2325 // We currently do not care about dimensions other than the outermost
2326 // dimension since it doesn't affect transfers.
2327 static isl_set *addFortranArrayOutermostDimParams(__isl_give isl_set *Context,
2328 Scop::array_range Arrays) {
2330 std::vector<isl_id *> OutermostSizeIds;
2331 for (auto Array : Arrays) {
2332 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2333 // for its outermost dimension. Fortran arrays will have this since the
2334 // outermost dimension size can be picked up from their runtime description.
2335 // TODO: actually need to check if it has a FAD, but for now this works.
2336 if (Array->getNumberOfDimensions() > 0) {
2337 isl_pw_aff *PwAff = Array->getDimensionSizePw(0);
2338 if (!PwAff)
2339 continue;
2341 isl_id *Id = isl_pw_aff_get_dim_id(PwAff, isl_dim_param, 0);
2342 isl_pw_aff_free(PwAff);
2343 assert(Id && "Invalid Id for PwAff expression in Fortran array");
2344 OutermostSizeIds.push_back(Id);
2348 const int NumTrueParams = isl_set_dim(Context, isl_dim_param);
2349 Context = isl_set_add_dims(Context, isl_dim_param, OutermostSizeIds.size());
2351 for (size_t i = 0; i < OutermostSizeIds.size(); i++) {
2352 Context = isl_set_set_dim_id(Context, isl_dim_param, NumTrueParams + i,
2353 OutermostSizeIds[i]);
2354 Context =
2355 isl_set_lower_bound_si(Context, isl_dim_param, NumTrueParams + i, 0);
2358 return Context;
2361 void Scop::realignParams() {
2362 if (PollyIgnoreParamBounds)
2363 return;
2365 // Add all parameters into a common model.
2366 isl_space *Space = isl_space_params_alloc(getIslCtx(), ParameterIds.size());
2368 unsigned PDim = 0;
2369 for (const auto *Parameter : Parameters) {
2370 isl_id *id = getIdForParam(Parameter);
2371 Space = isl_space_set_dim_id(Space, isl_dim_param, PDim++, id);
2374 // Align the parameters of all data structures to the model.
2375 Context = isl_set_align_params(Context, Space);
2377 // Add the outermost dimension of the Fortran arrays into the Context.
2378 // See the description of the function for more information.
2379 Context = addFortranArrayOutermostDimParams(Context, arrays());
2381 // As all parameters are known add bounds to them.
2382 addParameterBounds();
2384 for (ScopStmt &Stmt : *this)
2385 Stmt.realignParams();
2386 // Simplify the schedule according to the context too.
2387 Schedule = isl_schedule_gist_domain_params(Schedule, getContext());
2390 static __isl_give isl_set *
2391 simplifyAssumptionContext(__isl_take isl_set *AssumptionContext,
2392 const Scop &S) {
2393 // If we have modeled all blocks in the SCoP that have side effects we can
2394 // simplify the context with the constraints that are needed for anything to
2395 // be executed at all. However, if we have error blocks in the SCoP we already
2396 // assumed some parameter combinations cannot occur and removed them from the
2397 // domains, thus we cannot use the remaining domain to simplify the
2398 // assumptions.
2399 if (!S.hasErrorBlock()) {
2400 isl_set *DomainParameters = isl_union_set_params(S.getDomains());
2401 AssumptionContext =
2402 isl_set_gist_params(AssumptionContext, DomainParameters);
2405 AssumptionContext = isl_set_gist_params(AssumptionContext, S.getContext());
2406 return AssumptionContext;
2409 void Scop::simplifyContexts() {
2410 // The parameter constraints of the iteration domains give us a set of
2411 // constraints that need to hold for all cases where at least a single
2412 // statement iteration is executed in the whole scop. We now simplify the
2413 // assumed context under the assumption that such constraints hold and at
2414 // least a single statement iteration is executed. For cases where no
2415 // statement instances are executed, the assumptions we have taken about
2416 // the executed code do not matter and can be changed.
2418 // WARNING: This only holds if the assumptions we have taken do not reduce
2419 // the set of statement instances that are executed. Otherwise we
2420 // may run into a case where the iteration domains suggest that
2421 // for a certain set of parameter constraints no code is executed,
2422 // but in the original program some computation would have been
2423 // performed. In such a case, modifying the run-time conditions and
2424 // possibly influencing the run-time check may cause certain scops
2425 // to not be executed.
2427 // Example:
2429 // When delinearizing the following code:
2431 // for (long i = 0; i < 100; i++)
2432 // for (long j = 0; j < m; j++)
2433 // A[i+p][j] = 1.0;
2435 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2436 // otherwise we would access out of bound data. Now, knowing that code is
2437 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2438 AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2439 InvalidContext = isl_set_align_params(InvalidContext, getParamSpace());
2442 /// Add the minimal/maximal access in @p Set to @p User.
2443 static isl::stat
2444 buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2445 isl::pw_multi_aff MinPMA, MaxPMA;
2446 isl::pw_aff LastDimAff;
2447 isl::aff OneAff;
2448 unsigned Pos;
2449 isl::ctx Ctx = Set.get_ctx();
2451 Set = Set.remove_divs();
2453 if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain)
2454 return isl::stat::error;
2456 // Restrict the number of parameters involved in the access as the lexmin/
2457 // lexmax computation will take too long if this number is high.
2459 // Experiments with a simple test case using an i7 4800MQ:
2461 // #Parameters involved | Time (in sec)
2462 // 6 | 0.01
2463 // 7 | 0.04
2464 // 8 | 0.12
2465 // 9 | 0.40
2466 // 10 | 1.54
2467 // 11 | 6.78
2468 // 12 | 30.38
2470 if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2471 unsigned InvolvedParams = 0;
2472 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
2473 if (Set.involves_dims(isl::dim::param, u, 1))
2474 InvolvedParams++;
2476 if (InvolvedParams > RunTimeChecksMaxParameters)
2477 return isl::stat::error;
2480 if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts)
2481 return isl::stat::error;
2483 MinPMA = Set.lexmin_pw_multi_aff();
2484 MaxPMA = Set.lexmax_pw_multi_aff();
2486 if (isl_ctx_last_error(Ctx.get()) == isl_error_quota)
2487 return isl::stat::error;
2489 MinPMA = MinPMA.coalesce();
2490 MaxPMA = MaxPMA.coalesce();
2492 // Adjust the last dimension of the maximal access by one as we want to
2493 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2494 // we test during code generation might now point after the end of the
2495 // allocated array but we will never dereference it anyway.
2496 assert(MaxPMA.dim(isl::dim::out) && "Assumed at least one output dimension");
2497 Pos = MaxPMA.dim(isl::dim::out) - 1;
2498 LastDimAff = MaxPMA.get_pw_aff(Pos);
2499 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2500 OneAff = OneAff.add_constant_si(1);
2501 LastDimAff = LastDimAff.add(OneAff);
2502 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2504 MinMaxAccesses.push_back(std::make_pair(MinPMA.copy(), MaxPMA.copy()));
2506 return isl::stat::ok;
2509 static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) {
2510 isl_set *Domain = MA->getStatement()->getDomain();
2511 Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain));
2512 return isl_set_reset_tuple_id(Domain);
2515 /// Wrapper function to calculate minimal/maximal accesses to each array.
2516 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2517 Scop::MinMaxVectorTy &MinMaxAccesses) {
2519 MinMaxAccesses.reserve(AliasGroup.size());
2521 isl::union_set Domains = give(S.getDomains());
2522 isl::union_map Accesses = isl::union_map::empty(give(S.getParamSpace()));
2524 for (MemoryAccess *MA : AliasGroup)
2525 Accesses = Accesses.add_map(give(MA->getAccessRelation()));
2527 Accesses = Accesses.intersect_domain(Domains);
2528 isl::union_set Locations = Accesses.range();
2529 Locations = Locations.coalesce();
2530 Locations = Locations.detect_equalities();
2532 auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat {
2533 return buildMinMaxAccess(Set, MinMaxAccesses, S);
2535 return Locations.foreach_set(Lambda) == isl::stat::ok;
2538 /// Helper to treat non-affine regions and basic blocks the same.
2540 ///{
2542 /// Return the block that is the representing block for @p RN.
2543 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2544 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2545 : RN->getNodeAs<BasicBlock>();
2548 /// Return the @p idx'th block that is executed after @p RN.
2549 static inline BasicBlock *
2550 getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) {
2551 if (RN->isSubRegion()) {
2552 assert(idx == 0);
2553 return RN->getNodeAs<Region>()->getExit();
2555 return TI->getSuccessor(idx);
2558 /// Return the smallest loop surrounding @p RN.
2559 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2560 if (!RN->isSubRegion()) {
2561 BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2562 Loop *L = LI.getLoopFor(BB);
2564 // Unreachable statements are not considered to belong to a LLVM loop, as
2565 // they are not part of an actual loop in the control flow graph.
2566 // Nevertheless, we handle certain unreachable statements that are common
2567 // when modeling run-time bounds checks as being part of the loop to be
2568 // able to model them and to later eliminate the run-time bounds checks.
2570 // Specifically, for basic blocks that terminate in an unreachable and
2571 // where the immediate predecessor is part of a loop, we assume these
2572 // basic blocks belong to the loop the predecessor belongs to. This
2573 // allows us to model the following code.
2575 // for (i = 0; i < N; i++) {
2576 // if (i > 1024)
2577 // abort(); <- this abort might be translated to an
2578 // unreachable
2580 // A[i] = ...
2581 // }
2582 if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
2583 L = LI.getLoopFor(BB->getPrevNode());
2584 return L;
2587 Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2588 Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2589 while (L && NonAffineSubRegion->contains(L))
2590 L = L->getParentLoop();
2591 return L;
2594 /// Get the number of blocks in @p L.
2596 /// The number of blocks in a loop are the number of basic blocks actually
2597 /// belonging to the loop, as well as all single basic blocks that the loop
2598 /// exits to and which terminate in an unreachable instruction. We do not
2599 /// allow such basic blocks in the exit of a scop, hence they belong to the
2600 /// scop and represent run-time conditions which we want to model and
2601 /// subsequently speculate away.
2603 /// @see getRegionNodeLoop for additional details.
2604 unsigned getNumBlocksInLoop(Loop *L) {
2605 unsigned NumBlocks = L->getNumBlocks();
2606 SmallVector<llvm::BasicBlock *, 4> ExitBlocks;
2607 L->getExitBlocks(ExitBlocks);
2609 for (auto ExitBlock : ExitBlocks) {
2610 if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2611 NumBlocks++;
2613 return NumBlocks;
2616 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2617 if (!RN->isSubRegion())
2618 return 1;
2620 Region *R = RN->getNodeAs<Region>();
2621 return std::distance(R->block_begin(), R->block_end());
2624 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2625 const DominatorTree &DT) {
2626 if (!RN->isSubRegion())
2627 return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2628 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2629 if (isErrorBlock(*BB, R, LI, DT))
2630 return true;
2631 return false;
2634 ///}
2636 static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain,
2637 unsigned Dim, Loop *L) {
2638 Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1);
2639 isl_id *DimId =
2640 isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L));
2641 return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId);
2644 __isl_give isl_set *Scop::getDomainConditions(const ScopStmt *Stmt) const {
2645 return getDomainConditions(Stmt->getEntryBlock());
2648 __isl_give isl_set *Scop::getDomainConditions(BasicBlock *BB) const {
2649 auto DIt = DomainMap.find(BB);
2650 if (DIt != DomainMap.end())
2651 return DIt->getSecond().copy();
2653 auto &RI = *R.getRegionInfo();
2654 auto *BBR = RI.getRegionFor(BB);
2655 while (BBR->getEntry() == BB)
2656 BBR = BBR->getParent();
2657 return getDomainConditions(BBR->getEntry());
2660 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI,
2661 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2663 bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2664 auto *EntryBB = R->getEntry();
2665 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
2666 int LD = getRelativeLoopDepth(L);
2667 auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD + 1));
2669 while (LD-- >= 0) {
2670 S = addDomainDimId(S, LD + 1, L);
2671 L = L->getParentLoop();
2674 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
2675 DomainMap[EntryBB] = isl::manage(S);
2677 if (IsOnlyNonAffineRegion)
2678 return !containsErrorBlock(R->getNode(), *R, LI, DT);
2680 if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap))
2681 return false;
2683 if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap))
2684 return false;
2686 // Error blocks and blocks dominated by them have been assumed to never be
2687 // executed. Representing them in the Scop does not add any value. In fact,
2688 // it is likely to cause issues during construction of the ScopStmts. The
2689 // contents of error blocks have not been verified to be expressible and
2690 // will cause problems when building up a ScopStmt for them.
2691 // Furthermore, basic blocks dominated by error blocks may reference
2692 // instructions in the error block which, if the error block is not modeled,
2693 // can themselves not be constructed properly. To this end we will replace
2694 // the domains of error blocks and those only reachable via error blocks
2695 // with an empty set. Additionally, we will record for each block under which
2696 // parameter combination it would be reached via an error block in its
2697 // InvalidDomain. This information is needed during load hoisting.
2698 if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap))
2699 return false;
2701 return true;
2704 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2705 /// to be compatible to domains constructed for loop @p NewL.
2707 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2708 /// edge from @p OldL to @p NewL.
2709 static __isl_give isl_set *adjustDomainDimensions(Scop &S,
2710 __isl_take isl_set *Dom,
2711 Loop *OldL, Loop *NewL) {
2713 // If the loops are the same there is nothing to do.
2714 if (NewL == OldL)
2715 return Dom;
2717 int OldDepth = S.getRelativeLoopDepth(OldL);
2718 int NewDepth = S.getRelativeLoopDepth(NewL);
2719 // If both loops are non-affine loops there is nothing to do.
2720 if (OldDepth == -1 && NewDepth == -1)
2721 return Dom;
2723 // Distinguish three cases:
2724 // 1) The depth is the same but the loops are not.
2725 // => One loop was left one was entered.
2726 // 2) The depth increased from OldL to NewL.
2727 // => One loop was entered, none was left.
2728 // 3) The depth decreased from OldL to NewL.
2729 // => Loops were left were difference of the depths defines how many.
2730 if (OldDepth == NewDepth) {
2731 assert(OldL->getParentLoop() == NewL->getParentLoop());
2732 Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1);
2733 Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2734 Dom = addDomainDimId(Dom, NewDepth, NewL);
2735 } else if (OldDepth < NewDepth) {
2736 assert(OldDepth + 1 == NewDepth);
2737 auto &R = S.getRegion();
2738 (void)R;
2739 assert(NewL->getParentLoop() == OldL ||
2740 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2741 Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2742 Dom = addDomainDimId(Dom, NewDepth, NewL);
2743 } else {
2744 assert(OldDepth > NewDepth);
2745 int Diff = OldDepth - NewDepth;
2746 int NumDim = isl_set_n_dim(Dom);
2747 assert(NumDim >= Diff);
2748 Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff);
2751 return Dom;
2754 bool Scop::propagateInvalidStmtDomains(
2755 Region *R, DominatorTree &DT, LoopInfo &LI,
2756 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2758 ReversePostOrderTraversal<Region *> RTraversal(R);
2759 for (auto *RN : RTraversal) {
2761 // Recurse for affine subregions but go on for basic blocks and non-affine
2762 // subregions.
2763 if (RN->isSubRegion()) {
2764 Region *SubRegion = RN->getNodeAs<Region>();
2765 if (!isNonAffineSubRegion(SubRegion)) {
2766 propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap);
2767 continue;
2771 bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2772 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2773 isl::set &Domain = DomainMap[BB];
2774 assert(Domain && "Cannot propagate a nullptr");
2776 isl::set InvalidDomain = InvalidDomainMap[BB];
2778 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
2780 if (!IsInvalidBlock) {
2781 InvalidDomain = InvalidDomain.intersect(Domain);
2782 } else {
2783 InvalidDomain = Domain;
2784 isl::set DomPar = Domain.params();
2785 recordAssumption(ERRORBLOCK, DomPar.release(),
2786 BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
2787 Domain = nullptr;
2790 if (InvalidDomain.is_empty()) {
2791 InvalidDomainMap[BB] = InvalidDomain;
2792 continue;
2795 auto *BBLoop = getRegionNodeLoop(RN, LI);
2796 auto *TI = BB->getTerminator();
2797 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
2798 for (unsigned u = 0; u < NumSuccs; u++) {
2799 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2801 // Skip successors outside the SCoP.
2802 if (!contains(SuccBB))
2803 continue;
2805 // Skip backedges.
2806 if (DT.dominates(SuccBB, BB))
2807 continue;
2809 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2811 auto *AdjustedInvalidDomain = adjustDomainDimensions(
2812 *this, InvalidDomain.copy(), BBLoop, SuccBBLoop);
2814 auto *SuccInvalidDomain = InvalidDomainMap[SuccBB].copy();
2815 SuccInvalidDomain =
2816 isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain);
2817 SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain);
2818 unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain);
2820 InvalidDomainMap[SuccBB] = isl::manage(SuccInvalidDomain);
2822 // Check if the maximal number of domain disjunctions was reached.
2823 // In case this happens we will bail.
2824 if (NumConjucts < MaxDisjunctsInDomain)
2825 continue;
2827 InvalidDomainMap.erase(BB);
2828 invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
2829 return false;
2832 InvalidDomainMap[BB] = InvalidDomain;
2835 return true;
2838 void Scop::propagateDomainConstraintsToRegionExit(
2839 BasicBlock *BB, Loop *BBLoop,
2840 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI,
2841 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2843 // Check if the block @p BB is the entry of a region. If so we propagate it's
2844 // domain to the exit block of the region. Otherwise we are done.
2845 auto *RI = R.getRegionInfo();
2846 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
2847 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
2848 if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB))
2849 return;
2851 // Do not propagate the domain if there is a loop backedge inside the region
2852 // that would prevent the exit block from being executed.
2853 auto *L = BBLoop;
2854 while (L && contains(L)) {
2855 SmallVector<BasicBlock *, 4> LatchBBs;
2856 BBLoop->getLoopLatches(LatchBBs);
2857 for (auto *LatchBB : LatchBBs)
2858 if (BB != LatchBB && BBReg->contains(LatchBB))
2859 return;
2860 L = L->getParentLoop();
2863 isl::set Domain = DomainMap[BB];
2864 assert(Domain && "Cannot propagate a nullptr");
2866 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops());
2868 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2869 // adjust the domain before we can propagate it.
2870 isl::set AdjustedDomain = isl::manage(
2871 adjustDomainDimensions(*this, Domain.copy(), BBLoop, ExitBBLoop));
2872 isl::set &ExitDomain = DomainMap[ExitBB];
2874 // If the exit domain is not yet created we set it otherwise we "add" the
2875 // current domain.
2876 ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
2878 // Initialize the invalid domain.
2879 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
2881 FinishedExitBlocks.insert(ExitBB);
2884 bool Scop::buildDomainsWithBranchConstraints(
2885 Region *R, DominatorTree &DT, LoopInfo &LI,
2886 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2888 // To create the domain for each block in R we iterate over all blocks and
2889 // subregions in R and propagate the conditions under which the current region
2890 // element is executed. To this end we iterate in reverse post order over R as
2891 // it ensures that we first visit all predecessors of a region node (either a
2892 // basic block or a subregion) before we visit the region node itself.
2893 // Initially, only the domain for the SCoP region entry block is set and from
2894 // there we propagate the current domain to all successors, however we add the
2895 // condition that the successor is actually executed next.
2896 // As we are only interested in non-loop carried constraints here we can
2897 // simply skip loop back edges.
2899 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2900 ReversePostOrderTraversal<Region *> RTraversal(R);
2901 for (auto *RN : RTraversal) {
2903 // Recurse for affine subregions but go on for basic blocks and non-affine
2904 // subregions.
2905 if (RN->isSubRegion()) {
2906 Region *SubRegion = RN->getNodeAs<Region>();
2907 if (!isNonAffineSubRegion(SubRegion)) {
2908 if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI,
2909 InvalidDomainMap))
2910 return false;
2911 continue;
2915 if (containsErrorBlock(RN, getRegion(), LI, DT))
2916 HasErrorBlock = true;
2918 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2919 TerminatorInst *TI = BB->getTerminator();
2921 if (isa<UnreachableInst>(TI))
2922 continue;
2924 isl::set Domain = DomainMap.lookup(BB);
2925 if (!Domain)
2926 continue;
2927 MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get()));
2929 auto *BBLoop = getRegionNodeLoop(RN, LI);
2930 // Propagate the domain from BB directly to blocks that have a superset
2931 // domain, at the moment only region exit nodes of regions that start in BB.
2932 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI,
2933 InvalidDomainMap);
2935 // If all successors of BB have been set a domain through the propagation
2936 // above we do not need to build condition sets but can just skip this
2937 // block. However, it is important to note that this is a local property
2938 // with regards to the region @p R. To this end FinishedExitBlocks is a
2939 // local variable.
2940 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
2941 return FinishedExitBlocks.count(SuccBB);
2943 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
2944 continue;
2946 // Build the condition sets for the successor nodes of the current region
2947 // node. If it is a non-affine subregion we will always execute the single
2948 // exit node, hence the single entry node domain is the condition set. For
2949 // basic blocks we use the helper function buildConditionSets.
2950 SmallVector<isl_set *, 8> ConditionSets;
2951 if (RN->isSubRegion())
2952 ConditionSets.push_back(Domain.copy());
2953 else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(),
2954 InvalidDomainMap, ConditionSets))
2955 return false;
2957 // Now iterate over the successors and set their initial domain based on
2958 // their condition set. We skip back edges here and have to be careful when
2959 // we leave a loop not to keep constraints over a dimension that doesn't
2960 // exist anymore.
2961 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
2962 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
2963 isl::set CondSet = isl::manage(ConditionSets[u]);
2964 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2966 // Skip blocks outside the region.
2967 if (!contains(SuccBB))
2968 continue;
2970 // If we propagate the domain of some block to "SuccBB" we do not have to
2971 // adjust the domain.
2972 if (FinishedExitBlocks.count(SuccBB))
2973 continue;
2975 // Skip back edges.
2976 if (DT.dominates(SuccBB, BB))
2977 continue;
2979 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2981 CondSet = isl::manage(
2982 adjustDomainDimensions(*this, CondSet.copy(), BBLoop, SuccBBLoop));
2984 // Set the domain for the successor or merge it with an existing domain in
2985 // case there are multiple paths (without loop back edges) to the
2986 // successor block.
2987 isl::set &SuccDomain = DomainMap[SuccBB];
2989 if (SuccDomain) {
2990 SuccDomain = SuccDomain.unite(CondSet).coalesce();
2991 } else {
2992 // Initialize the invalid domain.
2993 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
2994 SuccDomain = CondSet;
2997 SuccDomain = SuccDomain.detect_equalities();
2999 // Check if the maximal number of domain disjunctions was reached.
3000 // In case this happens we will clean up and bail.
3001 if (isl_set_n_basic_set(SuccDomain.get()) < MaxDisjunctsInDomain)
3002 continue;
3004 invalidate(COMPLEXITY, DebugLoc());
3005 while (++u < ConditionSets.size())
3006 isl_set_free(ConditionSets[u]);
3007 return false;
3011 return true;
3014 __isl_give isl_set *
3015 Scop::getPredecessorDomainConstraints(BasicBlock *BB,
3016 __isl_keep isl_set *Domain,
3017 DominatorTree &DT, LoopInfo &LI) {
3018 // If @p BB is the ScopEntry we are done
3019 if (R.getEntry() == BB)
3020 return isl_set_universe(isl_set_get_space(Domain));
3022 // The region info of this function.
3023 auto &RI = *R.getRegionInfo();
3025 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops());
3027 // A domain to collect all predecessor domains, thus all conditions under
3028 // which the block is executed. To this end we start with the empty domain.
3029 isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain));
3031 // Set of regions of which the entry block domain has been propagated to BB.
3032 // all predecessors inside any of the regions can be skipped.
3033 SmallSet<Region *, 8> PropagatedRegions;
3035 for (auto *PredBB : predecessors(BB)) {
3036 // Skip backedges.
3037 if (DT.dominates(BB, PredBB))
3038 continue;
3040 // If the predecessor is in a region we used for propagation we can skip it.
3041 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
3042 if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
3043 PredBBInRegion)) {
3044 continue;
3047 // Check if there is a valid region we can use for propagation, thus look
3048 // for a region that contains the predecessor and has @p BB as exit block.
3049 auto *PredR = RI.getRegionFor(PredBB);
3050 while (PredR->getExit() != BB && !PredR->contains(BB))
3051 PredR->getParent();
3053 // If a valid region for propagation was found use the entry of that region
3054 // for propagation, otherwise the PredBB directly.
3055 if (PredR->getExit() == BB) {
3056 PredBB = PredR->getEntry();
3057 PropagatedRegions.insert(PredR);
3060 auto *PredBBDom = getDomainConditions(PredBB);
3061 Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops());
3063 PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
3065 PredDom = isl_set_union(PredDom, PredBBDom);
3068 return PredDom;
3071 bool Scop::propagateDomainConstraints(
3072 Region *R, DominatorTree &DT, LoopInfo &LI,
3073 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3074 // Iterate over the region R and propagate the domain constrains from the
3075 // predecessors to the current node. In contrast to the
3076 // buildDomainsWithBranchConstraints function, this one will pull the domain
3077 // information from the predecessors instead of pushing it to the successors.
3078 // Additionally, we assume the domains to be already present in the domain
3079 // map here. However, we iterate again in reverse post order so we know all
3080 // predecessors have been visited before a block or non-affine subregion is
3081 // visited.
3083 ReversePostOrderTraversal<Region *> RTraversal(R);
3084 for (auto *RN : RTraversal) {
3086 // Recurse for affine subregions but go on for basic blocks and non-affine
3087 // subregions.
3088 if (RN->isSubRegion()) {
3089 Region *SubRegion = RN->getNodeAs<Region>();
3090 if (!isNonAffineSubRegion(SubRegion)) {
3091 if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap))
3092 return false;
3093 continue;
3097 BasicBlock *BB = getRegionNodeBasicBlock(RN);
3098 isl::set &Domain = DomainMap[BB];
3099 assert(Domain);
3101 // Under the union of all predecessor conditions we can reach this block.
3102 isl::set PredDom =
3103 isl::manage(getPredecessorDomainConstraints(BB, Domain.get(), DT, LI));
3104 Domain = Domain.intersect(PredDom).coalesce();
3105 Domain = Domain.align_params(isl::manage(getParamSpace()));
3107 Loop *BBLoop = getRegionNodeLoop(RN, LI);
3108 if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop))
3109 if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap))
3110 return false;
3113 return true;
3116 /// Create a map to map from a given iteration to a subsequent iteration.
3118 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3119 /// is incremented by one and all other dimensions are equal, e.g.,
3120 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3122 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3123 static __isl_give isl_map *
3124 createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) {
3125 auto *MapSpace = isl_space_map_from_set(SetSpace);
3126 auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace));
3127 for (unsigned u = 0; u < isl_map_dim(NextIterationMap, isl_dim_in); u++)
3128 if (u != Dim)
3129 NextIterationMap =
3130 isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u);
3131 auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace));
3132 C = isl_constraint_set_constant_si(C, 1);
3133 C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1);
3134 C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1);
3135 NextIterationMap = isl_map_add_constraint(NextIterationMap, C);
3136 return NextIterationMap;
3139 bool Scop::addLoopBoundsToHeaderDomain(
3140 Loop *L, LoopInfo &LI, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3141 int LoopDepth = getRelativeLoopDepth(L);
3142 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
3144 BasicBlock *HeaderBB = L->getHeader();
3145 assert(DomainMap.count(HeaderBB));
3146 isl::set &HeaderBBDom = DomainMap[HeaderBB];
3148 isl::map NextIterationMap = isl::manage(
3149 createNextIterationMap(HeaderBBDom.get_space().release(), LoopDepth));
3151 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
3153 SmallVector<llvm::BasicBlock *, 4> LatchBlocks;
3154 L->getLoopLatches(LatchBlocks);
3156 for (BasicBlock *LatchBB : LatchBlocks) {
3158 // If the latch is only reachable via error statements we skip it.
3159 isl::set LatchBBDom = DomainMap.lookup(LatchBB);
3160 if (!LatchBBDom)
3161 continue;
3163 isl::set BackedgeCondition = nullptr;
3165 TerminatorInst *TI = LatchBB->getTerminator();
3166 BranchInst *BI = dyn_cast<BranchInst>(TI);
3167 assert(BI && "Only branch instructions allowed in loop latches");
3169 if (BI->isUnconditional())
3170 BackedgeCondition = LatchBBDom;
3171 else {
3172 SmallVector<isl_set *, 8> ConditionSets;
3173 int idx = BI->getSuccessor(0) != HeaderBB;
3174 if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(),
3175 InvalidDomainMap, ConditionSets))
3176 return false;
3178 // Free the non back edge condition set as we do not need it.
3179 isl_set_free(ConditionSets[1 - idx]);
3181 BackedgeCondition = isl::manage(ConditionSets[idx]);
3184 int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
3185 assert(LatchLoopDepth >= LoopDepth);
3186 BackedgeCondition = BackedgeCondition.project_out(
3187 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
3188 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
3191 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
3192 for (int i = 0; i < LoopDepth; i++)
3193 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
3195 isl::set UnionBackedgeConditionComplement =
3196 UnionBackedgeCondition.complement();
3197 UnionBackedgeConditionComplement =
3198 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
3200 UnionBackedgeConditionComplement =
3201 UnionBackedgeConditionComplement.apply(ForwardMap);
3202 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
3203 HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
3205 auto Parts = partitionSetParts(HeaderBBDom.copy(), LoopDepth);
3206 HeaderBBDom = isl::manage(Parts.second);
3208 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3209 // the bounded assumptions to the context as they are already implied by the
3210 // <nsw> tag.
3211 if (Affinator.hasNSWAddRecForLoop(L)) {
3212 isl_set_free(Parts.first);
3213 return true;
3216 isl_set *UnboundedCtx = isl_set_params(Parts.first);
3217 recordAssumption(INFINITELOOP, UnboundedCtx,
3218 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
3219 return true;
3222 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
3223 Value *PointerBase = MA->getOriginalBaseAddr();
3225 auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
3226 if (!PointerBaseInst)
3227 return nullptr;
3229 auto *BasePtrStmt = getStmtFor(PointerBaseInst);
3230 if (!BasePtrStmt)
3231 return nullptr;
3233 return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
3236 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
3237 isl::union_map Writes) {
3238 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) {
3239 return getNonHoistableCtx(BasePtrMA, Writes).is_null();
3242 Value *BaseAddr = MA->getOriginalBaseAddr();
3243 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
3244 if (!isa<LoadInst>(BasePtrInst))
3245 return contains(BasePtrInst);
3247 return false;
3250 bool Scop::buildAliasChecks(AliasAnalysis &AA) {
3251 if (!PollyUseRuntimeAliasChecks)
3252 return true;
3254 if (buildAliasGroups(AA)) {
3255 // Aliasing assumptions do not go through addAssumption but we still want to
3256 // collect statistics so we do it here explicitly.
3257 if (MinMaxAliasGroups.size())
3258 AssumptionsAliasing++;
3259 return true;
3262 // If a problem occurs while building the alias groups we need to delete
3263 // this SCoP and pretend it wasn't valid in the first place. To this end
3264 // we make the assumed context infeasible.
3265 invalidate(ALIASING, DebugLoc());
3267 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3268 << " could not be created as the number of parameters involved "
3269 "is too high. The SCoP will be "
3270 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3271 "the maximal number of parameters but be advised that the "
3272 "compile time might increase exponentially.\n\n");
3273 return false;
3276 std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3277 Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3278 AliasSetTracker AST(AA);
3280 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3281 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3282 for (ScopStmt &Stmt : *this) {
3284 isl_set *StmtDomain = Stmt.getDomain();
3285 bool StmtDomainEmpty = isl_set_is_empty(StmtDomain);
3286 isl_set_free(StmtDomain);
3288 // Statements with an empty domain will never be executed.
3289 if (StmtDomainEmpty)
3290 continue;
3292 for (MemoryAccess *MA : Stmt) {
3293 if (MA->isScalarKind())
3294 continue;
3295 if (!MA->isRead())
3296 HasWriteAccess.insert(MA->getScopArrayInfo());
3297 MemAccInst Acc(MA->getAccessInstruction());
3298 if (MA->isRead() && isa<MemTransferInst>(Acc))
3299 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3300 else
3301 PtrToAcc[Acc.getPointerOperand()] = MA;
3302 AST.add(Acc);
3306 AliasGroupVectorTy AliasGroups;
3307 for (AliasSet &AS : AST) {
3308 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3309 continue;
3310 AliasGroupTy AG;
3311 for (auto &PR : AS)
3312 AG.push_back(PtrToAcc[PR.getValue()]);
3313 if (AG.size() < 2)
3314 continue;
3315 AliasGroups.push_back(std::move(AG));
3318 return std::make_tuple(AliasGroups, HasWriteAccess);
3321 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3322 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3323 AliasGroupTy NewAG;
3324 AliasGroupTy &AG = AliasGroups[u];
3325 AliasGroupTy::iterator AGI = AG.begin();
3326 isl_set *AGDomain = getAccessDomain(*AGI);
3327 while (AGI != AG.end()) {
3328 MemoryAccess *MA = *AGI;
3329 isl_set *MADomain = getAccessDomain(MA);
3330 if (isl_set_is_disjoint(AGDomain, MADomain)) {
3331 NewAG.push_back(MA);
3332 AGI = AG.erase(AGI);
3333 isl_set_free(MADomain);
3334 } else {
3335 AGDomain = isl_set_union(AGDomain, MADomain);
3336 AGI++;
3339 if (NewAG.size() > 1)
3340 AliasGroups.push_back(std::move(NewAG));
3341 isl_set_free(AGDomain);
3345 bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3346 // To create sound alias checks we perform the following steps:
3347 // o) We partition each group into read only and non read only accesses.
3348 // o) For each group with more than one base pointer we then compute minimal
3349 // and maximal accesses to each array of a group in read only and non
3350 // read only partitions separately.
3351 AliasGroupVectorTy AliasGroups;
3352 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3354 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3356 splitAliasGroupsByDomain(AliasGroups);
3358 for (AliasGroupTy &AG : AliasGroups) {
3359 if (!hasFeasibleRuntimeContext())
3360 return false;
3363 IslMaxOperationsGuard MaxOpGuard(getIslCtx(), OptComputeOut);
3364 bool Valid = buildAliasGroup(AG, HasWriteAccess);
3365 if (!Valid)
3366 return false;
3368 if (isl_ctx_last_error(getIslCtx()) == isl_error_quota) {
3369 invalidate(COMPLEXITY, DebugLoc());
3370 return false;
3374 return true;
3377 bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3378 DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3379 AliasGroupTy ReadOnlyAccesses;
3380 AliasGroupTy ReadWriteAccesses;
3381 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3382 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3384 if (AliasGroup.size() < 2)
3385 return true;
3387 for (MemoryAccess *Access : AliasGroup) {
3388 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3389 Access->getAccessInstruction())
3390 << "Possibly aliasing pointer, use restrict keyword.");
3391 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3392 if (HasWriteAccess.count(Array)) {
3393 ReadWriteArrays.insert(Array);
3394 ReadWriteAccesses.push_back(Access);
3395 } else {
3396 ReadOnlyArrays.insert(Array);
3397 ReadOnlyAccesses.push_back(Access);
3401 // If there are no read-only pointers, and less than two read-write pointers,
3402 // no alias check is needed.
3403 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3404 return true;
3406 // If there is no read-write pointer, no alias check is needed.
3407 if (ReadWriteArrays.empty())
3408 return true;
3410 // For non-affine accesses, no alias check can be generated as we cannot
3411 // compute a sufficiently tight lower and upper bound: bail out.
3412 for (MemoryAccess *MA : AliasGroup) {
3413 if (!MA->isAffine()) {
3414 invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3415 MA->getAccessInstruction()->getParent());
3416 return false;
3420 // Ensure that for all memory accesses for which we generate alias checks,
3421 // their base pointers are available.
3422 for (MemoryAccess *MA : AliasGroup) {
3423 if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3424 addRequiredInvariantLoad(
3425 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3428 MinMaxAliasGroups.emplace_back();
3429 MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3430 MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3431 MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3433 bool Valid;
3435 Valid =
3436 calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3438 if (!Valid)
3439 return false;
3441 // Bail out if the number of values we need to compare is too large.
3442 // This is important as the number of comparisons grows quadratically with
3443 // the number of values we need to compare.
3444 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3445 RunTimeChecksMaxArraysPerGroup)
3446 return false;
3448 Valid =
3449 calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3451 if (!Valid)
3452 return false;
3454 return true;
3457 /// Get the smallest loop that contains @p S but is not in @p S.
3458 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3459 // Start with the smallest loop containing the entry and expand that
3460 // loop until it contains all blocks in the region. If there is a loop
3461 // containing all blocks in the region check if it is itself contained
3462 // and if so take the parent loop as it will be the smallest containing
3463 // the region but not contained by it.
3464 Loop *L = LI.getLoopFor(S.getEntry());
3465 while (L) {
3466 bool AllContained = true;
3467 for (auto *BB : S.blocks())
3468 AllContained &= L->contains(BB);
3469 if (AllContained)
3470 break;
3471 L = L->getParentLoop();
3474 return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
3477 int Scop::NextScopID = 0;
3479 std::string Scop::CurrentFunc = "";
3481 int Scop::getNextID(std::string ParentFunc) {
3482 if (ParentFunc != CurrentFunc) {
3483 CurrentFunc = ParentFunc;
3484 NextScopID = 0;
3486 return NextScopID++;
3489 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3490 ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE)
3491 : SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false),
3492 HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false),
3493 MaxLoopDepth(0), CopyStmtsNum(0), SkipScop(false), DC(DC), ORE(ORE),
3494 IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr),
3495 Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr),
3496 Schedule(nullptr),
3497 ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
3498 if (IslOnErrorAbort)
3499 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT);
3500 buildContext();
3503 void Scop::foldSizeConstantsToRight() {
3504 isl_union_set *Accessed = isl_union_map_range(getAccesses());
3506 for (auto Array : arrays()) {
3507 if (Array->getNumberOfDimensions() <= 1)
3508 continue;
3510 isl_space *Space = Array->getSpace();
3512 Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed));
3514 if (!isl_union_set_contains(Accessed, Space)) {
3515 isl_space_free(Space);
3516 continue;
3519 isl_set *Elements = isl_union_set_extract_set(Accessed, Space);
3521 isl_map *Transform =
3522 isl_map_universe(isl_space_map_from_set(Array->getSpace()));
3524 std::vector<int> Int;
3526 int Dims = isl_set_dim(Elements, isl_dim_set);
3527 for (int i = 0; i < Dims; i++) {
3528 isl_set *DimOnly =
3529 isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i);
3530 DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1);
3531 DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0);
3533 isl_basic_set *DimHull = isl_set_affine_hull(DimOnly);
3535 if (i == Dims - 1) {
3536 Int.push_back(1);
3537 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3538 isl_basic_set_free(DimHull);
3539 continue;
3542 if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) {
3543 isl_aff *Diff = isl_basic_set_get_div(DimHull, 0);
3544 isl_val *Val = isl_aff_get_denominator_val(Diff);
3545 isl_aff_free(Diff);
3547 int ValInt = 1;
3549 if (isl_val_is_int(Val))
3550 ValInt = isl_val_get_num_si(Val);
3551 isl_val_free(Val);
3553 Int.push_back(ValInt);
3555 isl_constraint *C = isl_constraint_alloc_equality(
3556 isl_local_space_from_space(isl_map_get_space(Transform)));
3557 C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt);
3558 C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1);
3559 Transform = isl_map_add_constraint(Transform, C);
3560 isl_basic_set_free(DimHull);
3561 continue;
3564 isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull);
3565 ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0);
3567 int ValInt = 1;
3568 if (isl_basic_set_is_equal(ZeroSet, DimHull)) {
3569 ValInt = 0;
3572 Int.push_back(ValInt);
3573 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3574 isl_basic_set_free(DimHull);
3575 isl_basic_set_free(ZeroSet);
3578 isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform));
3580 if (!isl_set_is_subset(Elements, MappedElements)) {
3581 isl_set_free(Elements);
3582 isl_set_free(MappedElements);
3583 isl_map_free(Transform);
3584 continue;
3587 isl_set_free(MappedElements);
3589 bool CanFold = true;
3591 if (Int[0] <= 1)
3592 CanFold = false;
3594 unsigned NumDims = Array->getNumberOfDimensions();
3595 for (unsigned i = 1; i < NumDims - 1; i++)
3596 if (Int[0] != Int[i] && Int[i])
3597 CanFold = false;
3599 if (!CanFold) {
3600 isl_set_free(Elements);
3601 isl_map_free(Transform);
3602 continue;
3605 for (auto &Access : AccessFunctions)
3606 if (Access->getScopArrayInfo() == Array)
3607 Access->setAccessRelation(isl_map_apply_range(
3608 Access->getAccessRelation(), isl_map_copy(Transform)));
3610 isl_map_free(Transform);
3612 std::vector<const SCEV *> Sizes;
3613 for (unsigned i = 0; i < NumDims; i++) {
3614 auto Size = Array->getDimensionSize(i);
3616 if (i == NumDims - 1)
3617 Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3618 Sizes.push_back(Size);
3621 Array->updateSizes(Sizes, false /* CheckConsistency */);
3623 isl_set_free(Elements);
3625 isl_union_set_free(Accessed);
3626 return;
3629 void Scop::markFortranArrays() {
3630 for (ScopStmt &Stmt : Stmts) {
3631 for (MemoryAccess *MemAcc : Stmt) {
3632 Value *FAD = MemAcc->getFortranArrayDescriptor();
3633 if (!FAD)
3634 continue;
3636 // TODO: const_cast-ing to edit
3637 ScopArrayInfo *SAI =
3638 const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
3639 assert(SAI && "memory access into a Fortran array does not "
3640 "have an associated ScopArrayInfo");
3641 SAI->applyAndSetFAD(FAD);
3646 void Scop::finalizeAccesses() {
3647 updateAccessDimensionality();
3648 foldSizeConstantsToRight();
3649 foldAccessRelations();
3650 assumeNoOutOfBounds();
3651 markFortranArrays();
3654 Scop::~Scop() {
3655 isl_set_free(Context);
3656 isl_set_free(AssumedContext);
3657 isl_set_free(InvalidContext);
3658 isl_schedule_free(Schedule);
3660 for (auto &It : ParameterIds)
3661 isl_id_free(It.second);
3663 for (auto &AS : RecordedAssumptions)
3664 isl_set_free(AS.Set);
3666 // Free the alias groups
3667 for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) {
3668 for (MinMaxAccessTy &MMA : MinMaxAccessPair.first) {
3669 isl_pw_multi_aff_free(MMA.first);
3670 isl_pw_multi_aff_free(MMA.second);
3672 for (MinMaxAccessTy &MMA : MinMaxAccessPair.second) {
3673 isl_pw_multi_aff_free(MMA.first);
3674 isl_pw_multi_aff_free(MMA.second);
3678 for (const auto &IAClass : InvariantEquivClasses)
3679 isl_set_free(IAClass.ExecutionContext);
3681 // Explicitly release all Scop objects and the underlying isl objects before
3682 // we release the isl context.
3683 Stmts.clear();
3684 ScopArrayInfoSet.clear();
3685 ScopArrayInfoMap.clear();
3686 ScopArrayNameMap.clear();
3687 AccessFunctions.clear();
3690 void Scop::updateAccessDimensionality() {
3691 // Check all array accesses for each base pointer and find a (virtual) element
3692 // size for the base pointer that divides all access functions.
3693 for (ScopStmt &Stmt : *this)
3694 for (MemoryAccess *Access : Stmt) {
3695 if (!Access->isArrayKind())
3696 continue;
3697 ScopArrayInfo *Array =
3698 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3700 if (Array->getNumberOfDimensions() != 1)
3701 continue;
3702 unsigned DivisibleSize = Array->getElemSizeInBytes();
3703 const SCEV *Subscript = Access->getSubscript(0);
3704 while (!isDivisible(Subscript, DivisibleSize, *SE))
3705 DivisibleSize /= 2;
3706 auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3707 Array->updateElementType(Ty);
3710 for (auto &Stmt : *this)
3711 for (auto &Access : Stmt)
3712 Access->updateDimensionality();
3715 void Scop::foldAccessRelations() {
3716 for (auto &Stmt : *this)
3717 for (auto &Access : Stmt)
3718 Access->foldAccessRelation();
3721 void Scop::assumeNoOutOfBounds() {
3722 for (auto &Stmt : *this)
3723 for (auto &Access : Stmt)
3724 Access->assumeNoOutOfBound();
3727 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
3728 if (Stmt.isRegionStmt())
3729 for (BasicBlock *BB : Stmt.getRegion()->blocks())
3730 StmtMap.erase(BB);
3731 else
3732 StmtMap.erase(Stmt.getBasicBlock());
3735 void Scop::removeStmts(std::function<bool(ScopStmt &)> ShouldDelete) {
3736 for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
3737 if (!ShouldDelete(*StmtIt)) {
3738 StmtIt++;
3739 continue;
3742 removeFromStmtMap(*StmtIt);
3743 StmtIt = Stmts.erase(StmtIt);
3747 void Scop::removeStmtNotInDomainMap() {
3748 auto ShouldDelete = [this](ScopStmt &Stmt) -> bool {
3749 return !this->DomainMap[Stmt.getEntryBlock()];
3751 removeStmts(ShouldDelete);
3754 void Scop::simplifySCoP(bool AfterHoisting) {
3756 auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool {
3757 bool RemoveStmt = Stmt.isEmpty();
3759 // Remove read only statements only after invariant load hoisting.
3760 if (!RemoveStmt && AfterHoisting) {
3761 bool OnlyRead = true;
3762 for (MemoryAccess *MA : Stmt) {
3763 if (MA->isRead())
3764 continue;
3766 OnlyRead = false;
3767 break;
3770 RemoveStmt = OnlyRead;
3772 return RemoveStmt;
3775 removeStmts(ShouldDelete);
3778 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3779 LoadInst *LInst = dyn_cast<LoadInst>(Val);
3780 if (!LInst)
3781 return nullptr;
3783 if (Value *Rep = InvEquivClassVMap.lookup(LInst))
3784 LInst = cast<LoadInst>(Rep);
3786 Type *Ty = LInst->getType();
3787 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3788 for (auto &IAClass : InvariantEquivClasses) {
3789 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3790 continue;
3792 auto &MAs = IAClass.InvariantAccesses;
3793 for (auto *MA : MAs)
3794 if (MA->getAccessInstruction() == Val)
3795 return &IAClass;
3798 return nullptr;
3801 /// Check if @p MA can always be hoisted without execution context.
3802 static bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3803 bool MAInvalidCtxIsEmpty,
3804 bool NonHoistableCtxIsEmpty) {
3805 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3806 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3807 // TODO: We can provide more information for better but more expensive
3808 // results.
3809 if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3810 LInst->getAlignment(), DL))
3811 return false;
3813 // If the location might be overwritten we do not hoist it unconditionally.
3815 // TODO: This is probably to conservative.
3816 if (!NonHoistableCtxIsEmpty)
3817 return false;
3819 // If a dereferenceable load is in a statement that is modeled precisely we
3820 // can hoist it.
3821 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3822 return true;
3824 // Even if the statement is not modeled precisely we can hoist the load if it
3825 // does not involve any parameters that might have been specialized by the
3826 // statement domain.
3827 for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
3828 if (!isa<SCEVConstant>(MA->getSubscript(u)))
3829 return false;
3830 return true;
3833 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3835 if (InvMAs.empty())
3836 return;
3838 auto *StmtInvalidCtx = Stmt.getInvalidContext();
3839 bool StmtInvalidCtxIsEmpty = isl_set_is_empty(StmtInvalidCtx);
3841 // Get the context under which the statement is executed but remove the error
3842 // context under which this statement is reached.
3843 isl_set *DomainCtx = isl_set_params(Stmt.getDomain());
3844 DomainCtx = isl_set_subtract(DomainCtx, StmtInvalidCtx);
3846 if (isl_set_n_basic_set(DomainCtx) >= MaxDisjunctsInDomain) {
3847 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3848 invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3849 isl_set_free(DomainCtx);
3850 for (auto &InvMA : InvMAs)
3851 isl_set_free(InvMA.NonHoistableCtx);
3852 return;
3855 // Project out all parameters that relate to loads in the statement. Otherwise
3856 // we could have cyclic dependences on the constraints under which the
3857 // hoisted loads are executed and we could not determine an order in which to
3858 // pre-load them. This happens because not only lower bounds are part of the
3859 // domain but also upper bounds.
3860 for (auto &InvMA : InvMAs) {
3861 auto *MA = InvMA.MA;
3862 Instruction *AccInst = MA->getAccessInstruction();
3863 if (SE->isSCEVable(AccInst->getType())) {
3864 SetVector<Value *> Values;
3865 for (const SCEV *Parameter : Parameters) {
3866 Values.clear();
3867 findValues(Parameter, *SE, Values);
3868 if (!Values.count(AccInst))
3869 continue;
3871 if (isl_id *ParamId = getIdForParam(Parameter)) {
3872 int Dim = isl_set_find_dim_by_id(DomainCtx, isl_dim_param, ParamId);
3873 if (Dim >= 0)
3874 DomainCtx = isl_set_eliminate(DomainCtx, isl_dim_param, Dim, 1);
3875 isl_id_free(ParamId);
3881 for (auto &InvMA : InvMAs) {
3882 auto *MA = InvMA.MA;
3883 auto *NHCtx = InvMA.NonHoistableCtx;
3885 // Check for another invariant access that accesses the same location as
3886 // MA and if found consolidate them. Otherwise create a new equivalence
3887 // class at the end of InvariantEquivClasses.
3888 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3889 Type *Ty = LInst->getType();
3890 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3892 auto *MAInvalidCtx = MA->getInvalidContext();
3893 bool NonHoistableCtxIsEmpty = isl_set_is_empty(NHCtx);
3894 bool MAInvalidCtxIsEmpty = isl_set_is_empty(MAInvalidCtx);
3896 isl_set *MACtx;
3897 // Check if we know that this pointer can be speculatively accessed.
3898 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3899 NonHoistableCtxIsEmpty)) {
3900 MACtx = isl_set_universe(isl_set_get_space(DomainCtx));
3901 isl_set_free(MAInvalidCtx);
3902 isl_set_free(NHCtx);
3903 } else {
3904 MACtx = isl_set_copy(DomainCtx);
3905 MACtx = isl_set_subtract(MACtx, isl_set_union(MAInvalidCtx, NHCtx));
3906 MACtx = isl_set_gist_params(MACtx, getContext());
3909 bool Consolidated = false;
3910 for (auto &IAClass : InvariantEquivClasses) {
3911 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3912 continue;
3914 // If the pointer and the type is equal check if the access function wrt.
3915 // to the domain is equal too. It can happen that the domain fixes
3916 // parameter values and these can be different for distinct part of the
3917 // SCoP. If this happens we cannot consolidate the loads but need to
3918 // create a new invariant load equivalence class.
3919 auto &MAs = IAClass.InvariantAccesses;
3920 if (!MAs.empty()) {
3921 auto *LastMA = MAs.front();
3923 auto *AR = isl_map_range(MA->getAccessRelation());
3924 auto *LastAR = isl_map_range(LastMA->getAccessRelation());
3925 bool SameAR = isl_set_is_equal(AR, LastAR);
3926 isl_set_free(AR);
3927 isl_set_free(LastAR);
3929 if (!SameAR)
3930 continue;
3933 // Add MA to the list of accesses that are in this class.
3934 MAs.push_front(MA);
3936 Consolidated = true;
3938 // Unify the execution context of the class and this statement.
3939 isl_set *&IAClassDomainCtx = IAClass.ExecutionContext;
3940 if (IAClassDomainCtx)
3941 IAClassDomainCtx =
3942 isl_set_coalesce(isl_set_union(IAClassDomainCtx, MACtx));
3943 else
3944 IAClassDomainCtx = MACtx;
3945 break;
3948 if (Consolidated)
3949 continue;
3951 // If we did not consolidate MA, thus did not find an equivalence class
3952 // for it, we create a new one.
3953 InvariantEquivClasses.emplace_back(
3954 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3957 isl_set_free(DomainCtx);
3960 isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
3961 // TODO: Loads that are not loop carried, hence are in a statement with
3962 // zero iterators, are by construction invariant, though we
3963 // currently "hoist" them anyway. This is necessary because we allow
3964 // them to be treated as parameters (e.g., in conditions) and our code
3965 // generation would otherwise use the old value.
3967 auto &Stmt = *Access->getStatement();
3968 BasicBlock *BB = Stmt.getEntryBlock();
3970 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
3971 Access->isMemoryIntrinsic())
3972 return nullptr;
3974 // Skip accesses that have an invariant base pointer which is defined but
3975 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3976 // returns a pointer that is used as a base address. However, as we want
3977 // to hoist indirect pointers, we allow the base pointer to be defined in
3978 // the region if it is also a memory access. Each ScopArrayInfo object
3979 // that has a base pointer origin has a base pointer that is loaded and
3980 // that it is invariant, thus it will be hoisted too. However, if there is
3981 // no base pointer origin we check that the base pointer is defined
3982 // outside the region.
3983 auto *LI = cast<LoadInst>(Access->getAccessInstruction());
3984 if (hasNonHoistableBasePtrInScop(Access, Writes))
3985 return nullptr;
3987 isl::map AccessRelation = give(Access->getAccessRelation());
3988 assert(!AccessRelation.is_empty());
3990 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
3991 return nullptr;
3993 AccessRelation = AccessRelation.intersect_domain(give(Stmt.getDomain()));
3994 isl::set SafeToLoad;
3996 auto &DL = getFunction().getParent()->getDataLayout();
3997 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
3998 DL)) {
3999 SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
4000 } else if (BB != LI->getParent()) {
4001 // Skip accesses in non-affine subregions as they might not be executed
4002 // under the same condition as the entry of the non-affine subregion.
4003 return nullptr;
4004 } else {
4005 SafeToLoad = AccessRelation.range();
4008 isl::union_map Written = Writes.intersect_range(SafeToLoad);
4009 isl::set WrittenCtx = Written.params();
4010 bool IsWritten = !WrittenCtx.is_empty();
4012 if (!IsWritten)
4013 return WrittenCtx;
4015 WrittenCtx = WrittenCtx.remove_divs();
4016 bool TooComplex =
4017 isl_set_n_basic_set(WrittenCtx.get()) >= MaxDisjunctsInDomain;
4018 if (TooComplex || !isRequiredInvariantLoad(LI))
4019 return nullptr;
4021 addAssumption(INVARIANTLOAD, WrittenCtx.copy(), LI->getDebugLoc(),
4022 AS_RESTRICTION, LI->getParent());
4023 return WrittenCtx;
4026 void Scop::verifyInvariantLoads() {
4027 auto &RIL = getRequiredInvariantLoads();
4028 for (LoadInst *LI : RIL) {
4029 assert(LI && contains(LI));
4030 ScopStmt *Stmt = getStmtFor(LI);
4031 if (Stmt && Stmt->getArrayAccessOrNULLFor(LI)) {
4032 invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
4033 return;
4038 void Scop::hoistInvariantLoads() {
4039 if (!PollyInvariantLoadHoisting)
4040 return;
4042 isl::union_map Writes = give(getWrites());
4043 for (ScopStmt &Stmt : *this) {
4044 InvariantAccessesTy InvariantAccesses;
4046 for (MemoryAccess *Access : Stmt)
4047 if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
4048 InvariantAccesses.push_back({Access, NHCtx.release()});
4050 // Transfer the memory access from the statement to the SCoP.
4051 for (auto InvMA : InvariantAccesses)
4052 Stmt.removeMemoryAccess(InvMA.MA);
4053 addInvariantLoads(Stmt, InvariantAccesses);
4057 /// Find the canonical scop array info object for a set of invariant load
4058 /// hoisted loads. The canonical array is the one that corresponds to the
4059 /// first load in the list of accesses which is used as base pointer of a
4060 /// scop array.
4061 static const ScopArrayInfo *findCanonicalArray(Scop *S,
4062 MemoryAccessList &Accesses) {
4063 for (MemoryAccess *Access : Accesses) {
4064 const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
4065 Access->getAccessInstruction(), MemoryKind::Array);
4066 if (CanonicalArray)
4067 return CanonicalArray;
4069 return nullptr;
4072 /// Check if @p Array severs as base array in an invariant load.
4073 static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
4074 for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
4075 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
4076 if (Access2->getScopArrayInfo() == Array)
4077 return true;
4078 return false;
4081 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4082 /// with a reference to @p New.
4083 static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
4084 const ScopArrayInfo *New) {
4085 for (ScopStmt &Stmt : *S)
4086 for (MemoryAccess *Access : Stmt) {
4087 if (Access->getLatestScopArrayInfo() != Old)
4088 continue;
4090 isl_id *Id = New->getBasePtrId();
4091 isl_map *Map = Access->getAccessRelation();
4092 Map = isl_map_set_tuple_id(Map, isl_dim_out, Id);
4093 Access->setAccessRelation(Map);
4097 void Scop::canonicalizeDynamicBasePtrs() {
4098 for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
4099 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
4101 const ScopArrayInfo *CanonicalBasePtrSAI =
4102 findCanonicalArray(this, BasePtrAccesses);
4104 if (!CanonicalBasePtrSAI)
4105 continue;
4107 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
4108 const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
4109 BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
4110 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
4111 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
4112 continue;
4114 // we currently do not canonicalize arrays where some accesses are
4115 // hoisted as invariant loads. If we would, we need to update the access
4116 // function of the invariant loads as well. However, as this is not a
4117 // very common situation, we leave this for now to avoid further
4118 // complexity increases.
4119 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
4120 continue;
4122 replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
4127 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
4128 ArrayRef<const SCEV *> Sizes,
4129 MemoryKind Kind,
4130 const char *BaseName) {
4131 assert((BasePtr || BaseName) &&
4132 "BasePtr and BaseName can not be nullptr at the same time.");
4133 assert(!(BasePtr && BaseName) && "BaseName is redundant.");
4134 auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
4135 : ScopArrayNameMap[BaseName];
4136 if (!SAI) {
4137 auto &DL = getFunction().getParent()->getDataLayout();
4138 SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
4139 DL, this, BaseName));
4140 ScopArrayInfoSet.insert(SAI.get());
4141 } else {
4142 SAI->updateElementType(ElementType);
4143 // In case of mismatching array sizes, we bail out by setting the run-time
4144 // context to false.
4145 if (!SAI->updateSizes(Sizes))
4146 invalidate(DELINEARIZATION, DebugLoc());
4148 return SAI.get();
4151 ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
4152 const std::string &BaseName,
4153 const std::vector<unsigned> &Sizes) {
4154 auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
4155 std::vector<const SCEV *> SCEVSizes;
4157 for (auto size : Sizes)
4158 if (size)
4159 SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
4160 else
4161 SCEVSizes.push_back(nullptr);
4163 auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
4164 MemoryKind::Array, BaseName.c_str());
4165 return SAI;
4168 const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr,
4169 MemoryKind Kind) {
4170 auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
4171 return SAI;
4174 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
4175 auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind);
4176 assert(SAI && "No ScopArrayInfo available for this base pointer");
4177 return SAI;
4180 std::string Scop::getContextStr() const { return stringFromIslObj(Context); }
4182 std::string Scop::getAssumedContextStr() const {
4183 assert(AssumedContext && "Assumed context not yet built");
4184 return stringFromIslObj(AssumedContext);
4187 std::string Scop::getInvalidContextStr() const {
4188 return stringFromIslObj(InvalidContext);
4191 std::string Scop::getNameStr() const {
4192 std::string ExitName, EntryName;
4193 std::tie(EntryName, ExitName) = getEntryExitStr();
4194 return EntryName + "---" + ExitName;
4197 std::pair<std::string, std::string> Scop::getEntryExitStr() const {
4198 std::string ExitName, EntryName;
4199 raw_string_ostream ExitStr(ExitName);
4200 raw_string_ostream EntryStr(EntryName);
4202 R.getEntry()->printAsOperand(EntryStr, false);
4203 EntryStr.str();
4205 if (R.getExit()) {
4206 R.getExit()->printAsOperand(ExitStr, false);
4207 ExitStr.str();
4208 } else
4209 ExitName = "FunctionExit";
4211 return std::make_pair(EntryName, ExitName);
4214 __isl_give isl_set *Scop::getContext() const { return isl_set_copy(Context); }
4215 __isl_give isl_space *Scop::getParamSpace() const {
4216 return isl_set_get_space(Context);
4219 __isl_give isl_set *Scop::getAssumedContext() const {
4220 assert(AssumedContext && "Assumed context not yet built");
4221 return isl_set_copy(AssumedContext);
4224 bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
4225 if (PollyProcessUnprofitable)
4226 return true;
4228 if (isEmpty())
4229 return false;
4231 unsigned OptimizableStmtsOrLoops = 0;
4232 for (auto &Stmt : *this) {
4233 if (Stmt.getNumIterators() == 0)
4234 continue;
4236 bool ContainsArrayAccs = false;
4237 bool ContainsScalarAccs = false;
4238 for (auto *MA : Stmt) {
4239 if (MA->isRead())
4240 continue;
4241 ContainsArrayAccs |= MA->isLatestArrayKind();
4242 ContainsScalarAccs |= MA->isLatestScalarKind();
4245 if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
4246 OptimizableStmtsOrLoops += Stmt.getNumIterators();
4249 return OptimizableStmtsOrLoops > 1;
4252 bool Scop::hasFeasibleRuntimeContext() const {
4253 auto *PositiveContext = getAssumedContext();
4254 auto *NegativeContext = getInvalidContext();
4255 PositiveContext = addNonEmptyDomainConstraints(PositiveContext);
4256 bool IsFeasible = !(isl_set_is_empty(PositiveContext) ||
4257 isl_set_is_subset(PositiveContext, NegativeContext));
4258 isl_set_free(PositiveContext);
4259 if (!IsFeasible) {
4260 isl_set_free(NegativeContext);
4261 return false;
4264 auto *DomainContext = isl_union_set_params(getDomains());
4265 IsFeasible = !isl_set_is_subset(DomainContext, NegativeContext);
4266 IsFeasible &= !isl_set_is_subset(Context, NegativeContext);
4267 isl_set_free(NegativeContext);
4268 isl_set_free(DomainContext);
4270 return IsFeasible;
4273 static std::string toString(AssumptionKind Kind) {
4274 switch (Kind) {
4275 case ALIASING:
4276 return "No-aliasing";
4277 case INBOUNDS:
4278 return "Inbounds";
4279 case WRAPPING:
4280 return "No-overflows";
4281 case UNSIGNED:
4282 return "Signed-unsigned";
4283 case COMPLEXITY:
4284 return "Low complexity";
4285 case PROFITABLE:
4286 return "Profitable";
4287 case ERRORBLOCK:
4288 return "No-error";
4289 case INFINITELOOP:
4290 return "Finite loop";
4291 case INVARIANTLOAD:
4292 return "Invariant load";
4293 case DELINEARIZATION:
4294 return "Delinearization";
4296 llvm_unreachable("Unknown AssumptionKind!");
4299 bool Scop::isEffectiveAssumption(__isl_keep isl_set *Set, AssumptionSign Sign) {
4300 if (Sign == AS_ASSUMPTION) {
4301 if (isl_set_is_subset(Context, Set))
4302 return false;
4304 if (isl_set_is_subset(AssumedContext, Set))
4305 return false;
4306 } else {
4307 if (isl_set_is_disjoint(Set, Context))
4308 return false;
4310 if (isl_set_is_subset(Set, InvalidContext))
4311 return false;
4313 return true;
4316 bool Scop::trackAssumption(AssumptionKind Kind, __isl_keep isl_set *Set,
4317 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4318 if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign))
4319 return false;
4321 // Do never emit trivial assumptions as they only clutter the output.
4322 if (!PollyRemarksMinimal) {
4323 isl_set *Univ = nullptr;
4324 if (Sign == AS_ASSUMPTION)
4325 Univ = isl_set_universe(isl_set_get_space(Set));
4327 bool IsTrivial = (Sign == AS_RESTRICTION && isl_set_is_empty(Set)) ||
4328 (Sign == AS_ASSUMPTION && isl_set_is_equal(Univ, Set));
4329 isl_set_free(Univ);
4331 if (IsTrivial)
4332 return false;
4335 switch (Kind) {
4336 case ALIASING:
4337 AssumptionsAliasing++;
4338 break;
4339 case INBOUNDS:
4340 AssumptionsInbounds++;
4341 break;
4342 case WRAPPING:
4343 AssumptionsWrapping++;
4344 break;
4345 case UNSIGNED:
4346 AssumptionsUnsigned++;
4347 break;
4348 case COMPLEXITY:
4349 AssumptionsComplexity++;
4350 break;
4351 case PROFITABLE:
4352 AssumptionsUnprofitable++;
4353 break;
4354 case ERRORBLOCK:
4355 AssumptionsErrorBlock++;
4356 break;
4357 case INFINITELOOP:
4358 AssumptionsInfiniteLoop++;
4359 break;
4360 case INVARIANTLOAD:
4361 AssumptionsInvariantLoad++;
4362 break;
4363 case DELINEARIZATION:
4364 AssumptionsDelinearization++;
4365 break;
4368 auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t";
4369 std::string Msg = toString(Kind) + Suffix + stringFromIslObj(Set);
4370 if (BB)
4371 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB)
4372 << Msg);
4373 else
4374 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc,
4375 R.getEntry())
4376 << Msg);
4377 return true;
4380 void Scop::addAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4381 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4382 // Simplify the assumptions/restrictions first.
4383 Set = isl_set_gist_params(Set, getContext());
4385 if (!trackAssumption(Kind, Set, Loc, Sign, BB)) {
4386 isl_set_free(Set);
4387 return;
4390 if (Sign == AS_ASSUMPTION) {
4391 AssumedContext = isl_set_intersect(AssumedContext, Set);
4392 AssumedContext = isl_set_coalesce(AssumedContext);
4393 } else {
4394 InvalidContext = isl_set_union(InvalidContext, Set);
4395 InvalidContext = isl_set_coalesce(InvalidContext);
4399 void Scop::recordAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4400 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4401 assert((isl_set_is_params(Set) || BB) &&
4402 "Assumptions without a basic block must be parameter sets");
4403 RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4406 void Scop::addRecordedAssumptions() {
4407 while (!RecordedAssumptions.empty()) {
4408 const Assumption &AS = RecordedAssumptions.pop_back_val();
4410 if (!AS.BB) {
4411 addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
4412 continue;
4415 // If the domain was deleted the assumptions are void.
4416 isl_set *Dom = getDomainConditions(AS.BB);
4417 if (!Dom) {
4418 isl_set_free(AS.Set);
4419 continue;
4422 // If a basic block was given use its domain to simplify the assumption.
4423 // In case of restrictions we know they only have to hold on the domain,
4424 // thus we can intersect them with the domain of the block. However, for
4425 // assumptions the domain has to imply them, thus:
4426 // _ _____
4427 // Dom => S <==> A v B <==> A - B
4429 // To avoid the complement we will register A - B as a restriction not an
4430 // assumption.
4431 isl_set *S = AS.Set;
4432 if (AS.Sign == AS_RESTRICTION)
4433 S = isl_set_params(isl_set_intersect(S, Dom));
4434 else /* (AS.Sign == AS_ASSUMPTION) */
4435 S = isl_set_params(isl_set_subtract(Dom, S));
4437 addAssumption(AS.Kind, S, AS.Loc, AS_RESTRICTION, AS.BB);
4441 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
4442 addAssumption(Kind, isl_set_empty(getParamSpace()), Loc, AS_ASSUMPTION, BB);
4445 __isl_give isl_set *Scop::getInvalidContext() const {
4446 return isl_set_copy(InvalidContext);
4449 void Scop::printContext(raw_ostream &OS) const {
4450 OS << "Context:\n";
4451 OS.indent(4) << Context << "\n";
4453 OS.indent(4) << "Assumed Context:\n";
4454 OS.indent(4) << AssumedContext << "\n";
4456 OS.indent(4) << "Invalid Context:\n";
4457 OS.indent(4) << InvalidContext << "\n";
4459 unsigned Dim = 0;
4460 for (const SCEV *Parameter : Parameters)
4461 OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4464 void Scop::printAliasAssumptions(raw_ostream &OS) const {
4465 int noOfGroups = 0;
4466 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4467 if (Pair.second.size() == 0)
4468 noOfGroups += 1;
4469 else
4470 noOfGroups += Pair.second.size();
4473 OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4474 if (MinMaxAliasGroups.empty()) {
4475 OS.indent(8) << "n/a\n";
4476 return;
4479 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4481 // If the group has no read only accesses print the write accesses.
4482 if (Pair.second.empty()) {
4483 OS.indent(8) << "[[";
4484 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4485 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4486 << ">";
4488 OS << " ]]\n";
4491 for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4492 OS.indent(8) << "[[";
4493 OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4494 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4495 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4496 << ">";
4498 OS << " ]]\n";
4503 void Scop::printStatements(raw_ostream &OS) const {
4504 OS << "Statements {\n";
4506 for (const ScopStmt &Stmt : *this)
4507 OS.indent(4) << Stmt;
4509 OS.indent(4) << "}\n";
4512 void Scop::printArrayInfo(raw_ostream &OS) const {
4513 OS << "Arrays {\n";
4515 for (auto &Array : arrays())
4516 Array->print(OS);
4518 OS.indent(4) << "}\n";
4520 OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4522 for (auto &Array : arrays())
4523 Array->print(OS, /* SizeAsPwAff */ true);
4525 OS.indent(4) << "}\n";
4528 void Scop::print(raw_ostream &OS) const {
4529 OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4530 OS.indent(4) << "Region: " << getNameStr() << "\n";
4531 OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4532 OS.indent(4) << "Invariant Accesses: {\n";
4533 for (const auto &IAClass : InvariantEquivClasses) {
4534 const auto &MAs = IAClass.InvariantAccesses;
4535 if (MAs.empty()) {
4536 OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4537 } else {
4538 MAs.front()->print(OS);
4539 OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4540 << "\n";
4543 OS.indent(4) << "}\n";
4544 printContext(OS.indent(4));
4545 printArrayInfo(OS.indent(4));
4546 printAliasAssumptions(OS);
4547 printStatements(OS.indent(4));
4550 void Scop::dump() const { print(dbgs()); }
4552 isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
4554 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4555 bool NonNegative) {
4556 // First try to use the SCEVAffinator to generate a piecewise defined
4557 // affine function from @p E in the context of @p BB. If that tasks becomes to
4558 // complex the affinator might return a nullptr. In such a case we invalidate
4559 // the SCoP and return a dummy value. This way we do not need to add error
4560 // handling code to all users of this function.
4561 auto PWAC = Affinator.getPwAff(E, BB);
4562 if (PWAC.first) {
4563 // TODO: We could use a heuristic and either use:
4564 // SCEVAffinator::takeNonNegativeAssumption
4565 // or
4566 // SCEVAffinator::interpretAsUnsigned
4567 // to deal with unsigned or "NonNegative" SCEVs.
4568 if (NonNegative)
4569 Affinator.takeNonNegativeAssumption(PWAC);
4570 return PWAC;
4573 auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
4574 invalidate(COMPLEXITY, DL, BB);
4575 return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4578 __isl_give isl_union_set *Scop::getDomains() const {
4579 isl_space *EmptySpace = isl_space_params_alloc(getIslCtx(), 0);
4580 isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4582 for (const ScopStmt &Stmt : *this)
4583 Domain = isl_union_set_add_set(Domain, Stmt.getDomain());
4585 return Domain;
4588 __isl_give isl_pw_aff *Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4589 PWACtx PWAC = getPwAff(E, BB);
4590 isl_set_free(PWAC.second);
4591 return PWAC.first;
4594 __isl_give isl_union_map *
4595 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4596 isl_union_map *Accesses = isl_union_map_empty(getParamSpace());
4598 for (ScopStmt &Stmt : *this) {
4599 for (MemoryAccess *MA : Stmt) {
4600 if (!Predicate(*MA))
4601 continue;
4603 isl_set *Domain = Stmt.getDomain();
4604 isl_map *AccessDomain = MA->getAccessRelation();
4605 AccessDomain = isl_map_intersect_domain(AccessDomain, Domain);
4606 Accesses = isl_union_map_add_map(Accesses, AccessDomain);
4609 return isl_union_map_coalesce(Accesses);
4612 __isl_give isl_union_map *Scop::getMustWrites() {
4613 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4616 __isl_give isl_union_map *Scop::getMayWrites() {
4617 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4620 __isl_give isl_union_map *Scop::getWrites() {
4621 return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4624 __isl_give isl_union_map *Scop::getReads() {
4625 return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4628 __isl_give isl_union_map *Scop::getAccesses() {
4629 return getAccessesOfType([](MemoryAccess &MA) { return true; });
4632 // Check whether @p Node is an extension node.
4634 // @return true if @p Node is an extension node.
4635 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4636 if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4637 return isl_bool_error;
4638 else
4639 return isl_bool_true;
4642 bool Scop::containsExtensionNode(__isl_keep isl_schedule *Schedule) {
4643 return isl_schedule_foreach_schedule_node_top_down(Schedule, isNotExtNode,
4644 nullptr) == isl_stat_error;
4647 __isl_give isl_union_map *Scop::getSchedule() const {
4648 auto *Tree = getScheduleTree();
4649 if (containsExtensionNode(Tree)) {
4650 isl_schedule_free(Tree);
4651 return nullptr;
4653 auto *S = isl_schedule_get_map(Tree);
4654 isl_schedule_free(Tree);
4655 return S;
4658 __isl_give isl_schedule *Scop::getScheduleTree() const {
4659 return isl_schedule_intersect_domain(isl_schedule_copy(Schedule),
4660 getDomains());
4663 void Scop::setSchedule(__isl_take isl_union_map *NewSchedule) {
4664 auto *S = isl_schedule_from_domain(getDomains());
4665 S = isl_schedule_insert_partial_schedule(
4666 S, isl_multi_union_pw_aff_from_union_map(NewSchedule));
4667 isl_schedule_free(Schedule);
4668 Schedule = S;
4671 void Scop::setScheduleTree(__isl_take isl_schedule *NewSchedule) {
4672 isl_schedule_free(Schedule);
4673 Schedule = NewSchedule;
4676 bool Scop::restrictDomains(__isl_take isl_union_set *Domain) {
4677 bool Changed = false;
4678 for (ScopStmt &Stmt : *this) {
4679 isl_union_set *StmtDomain = isl_union_set_from_set(Stmt.getDomain());
4680 isl_union_set *NewStmtDomain = isl_union_set_intersect(
4681 isl_union_set_copy(StmtDomain), isl_union_set_copy(Domain));
4683 if (isl_union_set_is_subset(StmtDomain, NewStmtDomain)) {
4684 isl_union_set_free(StmtDomain);
4685 isl_union_set_free(NewStmtDomain);
4686 continue;
4689 Changed = true;
4691 isl_union_set_free(StmtDomain);
4692 NewStmtDomain = isl_union_set_coalesce(NewStmtDomain);
4694 if (isl_union_set_is_empty(NewStmtDomain)) {
4695 Stmt.restrictDomain(isl_set_empty(Stmt.getDomainSpace()));
4696 isl_union_set_free(NewStmtDomain);
4697 } else
4698 Stmt.restrictDomain(isl_set_from_union_set(NewStmtDomain));
4700 isl_union_set_free(Domain);
4701 return Changed;
4704 ScalarEvolution *Scop::getSE() const { return SE; }
4706 // Create an isl_multi_union_aff that defines an identity mapping from the
4707 // elements of USet to their N-th dimension.
4709 // # Example:
4711 // Domain: { A[i,j]; B[i,j,k] }
4712 // N: 1
4714 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4716 // @param USet A union set describing the elements for which to generate a
4717 // mapping.
4718 // @param N The dimension to map to.
4719 // @returns A mapping from USet to its N-th dimension.
4720 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
4721 assert(N >= 0);
4722 assert(USet);
4723 assert(!USet.is_empty());
4725 auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
4727 auto Lambda = [&Result, N](isl::set S) -> isl::stat {
4728 int Dim = S.dim(isl::dim::set);
4729 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
4730 N, Dim - N);
4731 if (N > 1)
4732 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
4734 Result = Result.add_pw_multi_aff(PMA);
4735 return isl::stat::ok;
4738 isl::stat Res = USet.foreach_set(Lambda);
4739 (void)Res;
4741 assert(Res == isl::stat::ok);
4743 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
4746 void Scop::addScopStmt(BasicBlock *BB, Loop *SurroundingLoop,
4747 std::vector<Instruction *> Instructions) {
4748 assert(BB && "Unexpected nullptr!");
4749 Stmts.emplace_back(*this, *BB, SurroundingLoop, Instructions);
4750 auto *Stmt = &Stmts.back();
4751 StmtMap[BB].push_back(Stmt);
4754 void Scop::addScopStmt(Region *R, Loop *SurroundingLoop) {
4755 assert(R && "Unexpected nullptr!");
4756 Stmts.emplace_back(*this, *R, SurroundingLoop);
4757 auto *Stmt = &Stmts.back();
4758 for (BasicBlock *BB : R->blocks())
4759 StmtMap[BB].push_back(Stmt);
4762 ScopStmt *Scop::addScopStmt(__isl_take isl_map *SourceRel,
4763 __isl_take isl_map *TargetRel,
4764 __isl_take isl_set *Domain) {
4765 #ifndef NDEBUG
4766 isl_set *SourceDomain = isl_map_domain(isl_map_copy(SourceRel));
4767 isl_set *TargetDomain = isl_map_domain(isl_map_copy(TargetRel));
4768 assert(isl_set_is_subset(Domain, TargetDomain) &&
4769 "Target access not defined for complete statement domain");
4770 assert(isl_set_is_subset(Domain, SourceDomain) &&
4771 "Source access not defined for complete statement domain");
4772 isl_set_free(SourceDomain);
4773 isl_set_free(TargetDomain);
4774 #endif
4775 Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4776 CopyStmtsNum++;
4777 return &(Stmts.back());
4780 void Scop::buildSchedule(LoopInfo &LI) {
4781 Loop *L = getLoopSurroundingScop(*this, LI);
4782 LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4783 buildSchedule(getRegion().getNode(), LoopStack, LI);
4784 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4785 Schedule = LoopStack[0].Schedule;
4788 /// To generate a schedule for the elements in a Region we traverse the Region
4789 /// in reverse-post-order and add the contained RegionNodes in traversal order
4790 /// to the schedule of the loop that is currently at the top of the LoopStack.
4791 /// For loop-free codes, this results in a correct sequential ordering.
4793 /// Example:
4794 /// bb1(0)
4795 /// / \.
4796 /// bb2(1) bb3(2)
4797 /// \ / \.
4798 /// bb4(3) bb5(4)
4799 /// \ /
4800 /// bb6(5)
4802 /// Including loops requires additional processing. Whenever a loop header is
4803 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4804 /// from an empty schedule, we first process all RegionNodes that are within
4805 /// this loop and complete the sequential schedule at this loop-level before
4806 /// processing about any other nodes. To implement this
4807 /// loop-nodes-first-processing, the reverse post-order traversal is
4808 /// insufficient. Hence, we additionally check if the traversal yields
4809 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4810 /// These region-nodes are then queue and only traverse after the all nodes
4811 /// within the current loop have been processed.
4812 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4813 Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4815 ReversePostOrderTraversal<Region *> RTraversal(R);
4816 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4817 std::deque<RegionNode *> DelayList;
4818 bool LastRNWaiting = false;
4820 // Iterate over the region @p R in reverse post-order but queue
4821 // sub-regions/blocks iff they are not part of the last encountered but not
4822 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4823 // that we queued the last sub-region/block from the reverse post-order
4824 // iterator. If it is set we have to explore the next sub-region/block from
4825 // the iterator (if any) to guarantee progress. If it is not set we first try
4826 // the next queued sub-region/blocks.
4827 while (!WorkList.empty() || !DelayList.empty()) {
4828 RegionNode *RN;
4830 if ((LastRNWaiting && !WorkList.empty()) || DelayList.size() == 0) {
4831 RN = WorkList.front();
4832 WorkList.pop_front();
4833 LastRNWaiting = false;
4834 } else {
4835 RN = DelayList.front();
4836 DelayList.pop_front();
4839 Loop *L = getRegionNodeLoop(RN, LI);
4840 if (!contains(L))
4841 L = OuterScopLoop;
4843 Loop *LastLoop = LoopStack.back().L;
4844 if (LastLoop != L) {
4845 if (LastLoop && !LastLoop->contains(L)) {
4846 LastRNWaiting = true;
4847 DelayList.push_back(RN);
4848 continue;
4850 LoopStack.push_back({L, nullptr, 0});
4852 buildSchedule(RN, LoopStack, LI);
4855 return;
4858 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
4860 if (RN->isSubRegion()) {
4861 auto *LocalRegion = RN->getNodeAs<Region>();
4862 if (!isNonAffineSubRegion(LocalRegion)) {
4863 buildSchedule(LocalRegion, LoopStack, LI);
4864 return;
4868 auto &LoopData = LoopStack.back();
4869 LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN);
4871 if (auto *Stmt = getStmtFor(RN)) {
4872 auto *UDomain = isl_union_set_from_set(Stmt->getDomain());
4873 auto *StmtSchedule = isl_schedule_from_domain(UDomain);
4874 LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule);
4877 // Check if we just processed the last node in this loop. If we did, finalize
4878 // the loop by:
4880 // - adding new schedule dimensions
4881 // - folding the resulting schedule into the parent loop schedule
4882 // - dropping the loop schedule from the LoopStack.
4884 // Then continue to check surrounding loops, which might also have been
4885 // completed by this node.
4886 while (LoopData.L &&
4887 LoopData.NumBlocksProcessed == getNumBlocksInLoop(LoopData.L)) {
4888 auto *Schedule = LoopData.Schedule;
4889 auto NumBlocksProcessed = LoopData.NumBlocksProcessed;
4891 LoopStack.pop_back();
4892 auto &NextLoopData = LoopStack.back();
4894 if (Schedule) {
4895 isl::union_set Domain = give(isl_schedule_get_domain(Schedule));
4896 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, LoopStack.size());
4897 Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA.release());
4898 NextLoopData.Schedule =
4899 combineInSequence(NextLoopData.Schedule, Schedule);
4902 NextLoopData.NumBlocksProcessed += NumBlocksProcessed;
4903 LoopData = NextLoopData;
4907 ScopStmt *Scop::getStmtFor(BasicBlock *BB) const {
4908 auto StmtMapIt = StmtMap.find(BB);
4909 if (StmtMapIt == StmtMap.end())
4910 return nullptr;
4911 assert(StmtMapIt->second.size() == 1);
4912 return StmtMapIt->second.front();
4915 ScopStmt *Scop::getStmtFor(RegionNode *RN) const {
4916 if (RN->isSubRegion())
4917 return getStmtFor(RN->getNodeAs<Region>());
4918 return getStmtFor(RN->getNodeAs<BasicBlock>());
4921 ScopStmt *Scop::getStmtFor(Region *R) const {
4922 ScopStmt *Stmt = getStmtFor(R->getEntry());
4923 assert(!Stmt || Stmt->getRegion() == R);
4924 return Stmt;
4927 int Scop::getRelativeLoopDepth(const Loop *L) const {
4928 if (!L || !R.contains(L))
4929 return -1;
4930 // outermostLoopInRegion always returns nullptr for top level regions
4931 if (R.isTopLevelRegion()) {
4932 // LoopInfo's depths start at 1, we start at 0
4933 return L->getLoopDepth() - 1;
4934 } else {
4935 Loop *OuterLoop = R.outermostLoopInRegion(const_cast<Loop *>(L));
4936 assert(OuterLoop);
4937 return L->getLoopDepth() - OuterLoop->getLoopDepth();
4941 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
4942 for (auto &SAI : arrays()) {
4943 if (SAI->getName() == BaseName)
4944 return SAI;
4946 return nullptr;
4949 //===----------------------------------------------------------------------===//
4950 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
4951 AU.addRequired<LoopInfoWrapperPass>();
4952 AU.addRequired<RegionInfoPass>();
4953 AU.addRequired<DominatorTreeWrapperPass>();
4954 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
4955 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
4956 AU.addRequired<AAResultsWrapperPass>();
4957 AU.addRequired<AssumptionCacheTracker>();
4958 AU.setPreservesAll();
4961 void updateLoopCountStatistic(ScopDetection::LoopStats Stats) {
4962 NumLoopsInScop += Stats.NumLoops;
4963 MaxNumLoopsInScop =
4964 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
4966 if (Stats.MaxDepth == 1)
4967 NumScopsDepthOne++;
4968 else if (Stats.MaxDepth == 2)
4969 NumScopsDepthTwo++;
4970 else if (Stats.MaxDepth == 3)
4971 NumScopsDepthThree++;
4972 else if (Stats.MaxDepth == 4)
4973 NumScopsDepthFour++;
4974 else if (Stats.MaxDepth == 5)
4975 NumScopsDepthFive++;
4976 else
4977 NumScopsDepthLarger++;
4980 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
4981 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
4983 if (!SD.isMaxRegionInScop(*R))
4984 return false;
4986 Function *F = R->getEntry()->getParent();
4987 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4988 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
4989 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4990 auto const &DL = F->getParent()->getDataLayout();
4991 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4992 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
4994 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE);
4995 S = SB.getScop(); // take ownership of scop object
4997 if (S) {
4998 ScopDetection::LoopStats Stats =
4999 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5000 updateLoopCountStatistic(Stats);
5003 return false;
5006 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
5007 if (S)
5008 S->print(OS);
5009 else
5010 OS << "Invalid Scop!\n";
5013 char ScopInfoRegionPass::ID = 0;
5015 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5017 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops",
5018 "Polly - Create polyhedral description of Scops", false,
5019 false);
5020 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5021 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5022 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5023 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5024 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5025 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5026 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5027 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
5028 "Polly - Create polyhedral description of Scops", false,
5029 false)
5031 //===----------------------------------------------------------------------===//
5032 ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE,
5033 LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT,
5034 AssumptionCache &AC) {
5035 /// Create polyhedral description of scops for all the valid regions of a
5036 /// function.
5037 for (auto &It : SD) {
5038 Region *R = const_cast<Region *>(It);
5039 if (!SD.isMaxRegionInScop(*R))
5040 continue;
5042 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE);
5043 std::unique_ptr<Scop> S = SB.getScop();
5044 if (!S)
5045 continue;
5046 bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
5047 assert(Inserted && "Building Scop for the same region twice!");
5048 (void)Inserted;
5052 AnalysisKey ScopInfoAnalysis::Key;
5054 ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F,
5055 FunctionAnalysisManager &FAM) {
5056 auto &SD = FAM.getResult<ScopAnalysis>(F);
5057 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
5058 auto &LI = FAM.getResult<LoopAnalysis>(F);
5059 auto &AA = FAM.getResult<AAManager>(F);
5060 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
5061 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
5062 auto &DL = F.getParent()->getDataLayout();
5063 return {DL, SD, SE, LI, AA, DT, AC};
5066 PreservedAnalyses ScopInfoPrinterPass::run(Function &F,
5067 FunctionAnalysisManager &FAM) {
5068 auto &SI = FAM.getResult<ScopInfoAnalysis>(F);
5069 for (auto &It : SI) {
5070 if (It.second)
5071 It.second->print(Stream);
5072 else
5073 Stream << "Invalid Scop!\n";
5075 return PreservedAnalyses::all();
5078 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5079 AU.addRequired<LoopInfoWrapperPass>();
5080 AU.addRequired<RegionInfoPass>();
5081 AU.addRequired<DominatorTreeWrapperPass>();
5082 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5083 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5084 AU.addRequired<AAResultsWrapperPass>();
5085 AU.addRequired<AssumptionCacheTracker>();
5086 AU.setPreservesAll();
5089 bool ScopInfoWrapperPass::runOnFunction(Function &F) {
5090 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5091 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5092 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5093 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5094 auto const &DL = F.getParent()->getDataLayout();
5095 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5096 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5098 Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC});
5099 return false;
5102 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
5103 for (auto &It : *Result) {
5104 if (It.second)
5105 It.second->print(OS);
5106 else
5107 OS << "Invalid Scop!\n";
5111 char ScopInfoWrapperPass::ID = 0;
5113 Pass *polly::createScopInfoWrapperPassPass() {
5114 return new ScopInfoWrapperPass();
5117 INITIALIZE_PASS_BEGIN(
5118 ScopInfoWrapperPass, "polly-function-scops",
5119 "Polly - Create polyhedral description of all Scops of a function", false,
5120 false);
5121 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5122 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5123 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5124 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5125 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5126 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5127 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5128 INITIALIZE_PASS_END(
5129 ScopInfoWrapperPass, "polly-function-scops",
5130 "Polly - Create polyhedral description of all Scops of a function", false,
5131 false)