Port SCEVAffinator to the isl c++ bindings
[polly-mirror.git] / lib / Analysis / ScopInfo.cpp
blobd2bc71fb6d527df34b7d2041bf88e426996a13c6
1 //===- ScopInfo.cpp -------------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
82 #include "isl/aff.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
85 #include "isl/map.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
90 #include "isl/set.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
93 #include "isl/val.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdlib>
97 #include <cstring>
98 #include <deque>
99 #include <iterator>
100 #include <memory>
101 #include <string>
102 #include <tuple>
103 #include <utility>
104 #include <vector>
106 using namespace llvm;
107 using namespace polly;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
130 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
131 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
132 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
133 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
134 STATISTIC(NumScopsDepthLarger,
135 "Number of scops with maximal loop depth 6 and larger");
136 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
138 STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
139 STATISTIC(
140 NumValueWritesInLoops,
141 "Number of scalar value writes nested in affine loops after ScopInfo");
142 STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
143 STATISTIC(NumPHIWritesInLoops,
144 "Number of scalar phi writes nested in affine loops after ScopInfo");
145 STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
146 STATISTIC(NumSingletonWritesInLoops,
147 "Number of singleton writes nested in affine loops after ScopInfo");
149 // The maximal number of basic sets we allow during domain construction to
150 // be created. More complex scops will result in very high compile time and
151 // are also unlikely to result in good code
152 static int const MaxDisjunctsInDomain = 20;
154 // The number of disjunct in the context after which we stop to add more
155 // disjuncts. This parameter is there to avoid exponential growth in the
156 // number of disjunct when adding non-convex sets to the context.
157 static int const MaxDisjunctsInContext = 4;
159 // The maximal number of dimensions we allow during invariant load construction.
160 // More complex access ranges will result in very high compile time and are also
161 // unlikely to result in good code. This value is very high and should only
162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
163 static int const MaxDimensionsInAccessRange = 9;
165 static cl::opt<int>
166 OptComputeOut("polly-analysis-computeout",
167 cl::desc("Bound the scop analysis by a maximal amount of "
168 "computational steps (0 means no bound)"),
169 cl::Hidden, cl::init(800000), cl::ZeroOrMore,
170 cl::cat(PollyCategory));
172 static cl::opt<bool> PollyRemarksMinimal(
173 "polly-remarks-minimal",
174 cl::desc("Do not emit remarks about assumptions that are known"),
175 cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
177 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
178 "polly-rtc-max-array-disjuncts",
179 cl::desc("The maximal number of disjunts allowed in memory accesses to "
180 "to build RTCs."),
181 cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
183 static cl::opt<unsigned> RunTimeChecksMaxParameters(
184 "polly-rtc-max-parameters",
185 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
186 cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
188 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
189 "polly-rtc-max-arrays-per-group",
190 cl::desc("The maximal number of arrays to compare in each alias group."),
191 cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
193 static cl::opt<std::string> UserContextStr(
194 "polly-context", cl::value_desc("isl parameter set"),
195 cl::desc("Provide additional constraints on the context parameters"),
196 cl::init(""), cl::cat(PollyCategory));
198 static cl::opt<bool>
199 IslOnErrorAbort("polly-on-isl-error-abort",
200 cl::desc("Abort if an isl error is encountered"),
201 cl::init(true), cl::cat(PollyCategory));
203 static cl::opt<bool> PollyPreciseInbounds(
204 "polly-precise-inbounds",
205 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
206 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
208 static cl::opt<bool>
209 PollyIgnoreInbounds("polly-ignore-inbounds",
210 cl::desc("Do not take inbounds assumptions at all"),
211 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
213 static cl::opt<bool> PollyIgnoreParamBounds(
214 "polly-ignore-parameter-bounds",
215 cl::desc(
216 "Do not add parameter bounds and do no gist simplify sets accordingly"),
217 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
219 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
220 "polly-allow-dereference-of-all-function-parameters",
221 cl::desc(
222 "Treat all parameters to functions that are pointers as dereferencible."
223 " This is useful for invariant load hoisting, since we can generate"
224 " less runtime checks. This is only valid if all pointers to functions"
225 " are always initialized, so that Polly can choose to hoist"
226 " their loads. "),
227 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
229 static cl::opt<bool> PollyPreciseFoldAccesses(
230 "polly-precise-fold-accesses",
231 cl::desc("Fold memory accesses to model more possible delinearizations "
232 "(does not scale well)"),
233 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
235 bool polly::UseInstructionNames;
237 static cl::opt<bool, true> XUseInstructionNames(
238 "polly-use-llvm-names",
239 cl::desc("Use LLVM-IR names when deriving statement names"),
240 cl::location(UseInstructionNames), cl::Hidden, cl::init(false),
241 cl::ZeroOrMore, cl::cat(PollyCategory));
243 static cl::opt<bool> PollyPrintInstructions(
244 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
245 cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory));
247 //===----------------------------------------------------------------------===//
249 // Create a sequence of two schedules. Either argument may be null and is
250 // interpreted as the empty schedule. Can also return null if both schedules are
251 // empty.
252 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
253 if (!Prev)
254 return Succ;
255 if (!Succ)
256 return Prev;
258 return Prev.sequence(Succ);
261 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
262 int dim, isl::dim type) {
263 isl::val V;
264 isl::ctx Ctx = S.get_ctx();
266 // The upper and lower bound for a parameter value is derived either from
267 // the data type of the parameter or from the - possibly more restrictive -
268 // range metadata.
269 V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true);
270 S = S.lower_bound_val(type, dim, V);
271 V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true);
272 S = S.upper_bound_val(type, dim, V);
274 if (Range.isFullSet())
275 return S;
277 if (isl_set_n_basic_set(S.get()) > MaxDisjunctsInContext)
278 return S;
280 // In case of signed wrapping, we can refine the set of valid values by
281 // excluding the part not covered by the wrapping range.
282 if (Range.isSignWrappedSet()) {
283 V = valFromAPInt(Ctx.get(), Range.getLower(), true);
284 isl::set SLB = S.lower_bound_val(type, dim, V);
286 V = valFromAPInt(Ctx.get(), Range.getUpper(), true);
287 V = V.sub_ui(1);
288 isl::set SUB = S.upper_bound_val(type, dim, V);
289 S = SLB.unite(SUB);
292 return S;
295 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
296 LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
297 if (!BasePtrLI)
298 return nullptr;
300 if (!S->contains(BasePtrLI))
301 return nullptr;
303 ScalarEvolution &SE = *S->getSE();
305 auto *OriginBaseSCEV =
306 SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
307 if (!OriginBaseSCEV)
308 return nullptr;
310 auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
311 if (!OriginBaseSCEVUnknown)
312 return nullptr;
314 return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
315 MemoryKind::Array);
318 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx,
319 ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
320 const DataLayout &DL, Scop *S,
321 const char *BaseName)
322 : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
323 std::string BasePtrName =
324 BaseName ? BaseName
325 : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
326 Kind == MemoryKind::PHI ? "__phi" : "",
327 UseInstructionNames);
328 Id = isl::id::alloc(Ctx, BasePtrName, this);
330 updateSizes(Sizes);
332 if (!BasePtr || Kind != MemoryKind::Array) {
333 BasePtrOriginSAI = nullptr;
334 return;
337 BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
338 if (BasePtrOriginSAI)
339 const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
342 ScopArrayInfo::~ScopArrayInfo() = default;
344 isl::space ScopArrayInfo::getSpace() const {
345 auto Space = isl::space(Id.get_ctx(), 0, getNumberOfDimensions());
346 Space = Space.set_tuple_id(isl::dim::set, Id);
347 return Space;
350 bool ScopArrayInfo::isReadOnly() {
351 isl::union_set WriteSet = S.getWrites().range();
352 isl::space Space = getSpace();
353 WriteSet = WriteSet.extract_set(Space);
355 return bool(WriteSet.is_empty());
358 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const {
359 if (Array->getElementType() != getElementType())
360 return false;
362 if (Array->getNumberOfDimensions() != getNumberOfDimensions())
363 return false;
365 for (unsigned i = 0; i < getNumberOfDimensions(); i++)
366 if (Array->getDimensionSize(i) != getDimensionSize(i))
367 return false;
369 return true;
372 void ScopArrayInfo::updateElementType(Type *NewElementType) {
373 if (NewElementType == ElementType)
374 return;
376 auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
377 auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
379 if (NewElementSize == OldElementSize || NewElementSize == 0)
380 return;
382 if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) {
383 ElementType = NewElementType;
384 } else {
385 auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
386 ElementType = IntegerType::get(ElementType->getContext(), GCD);
390 /// Make the ScopArrayInfo model a Fortran Array
391 void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
392 assert(FAD && "got invalid Fortran array descriptor");
393 if (this->FAD) {
394 assert(this->FAD == FAD &&
395 "receiving different array descriptors for same array");
396 return;
399 assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
400 assert(!this->FAD);
401 this->FAD = FAD;
403 isl::space Space(S.getIslCtx(), 1, 0);
405 std::string param_name = getName();
406 param_name += "_fortranarr_size";
407 isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this);
409 Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff);
410 isl::pw_aff PwAff =
411 isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0);
413 DimensionSizesPw[0] = PwAff;
416 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
417 bool CheckConsistency) {
418 int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
419 int ExtraDimsNew = NewSizes.size() - SharedDims;
420 int ExtraDimsOld = DimensionSizes.size() - SharedDims;
422 if (CheckConsistency) {
423 for (int i = 0; i < SharedDims; i++) {
424 auto *NewSize = NewSizes[i + ExtraDimsNew];
425 auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
426 if (NewSize && KnownSize && NewSize != KnownSize)
427 return false;
430 if (DimensionSizes.size() >= NewSizes.size())
431 return true;
434 DimensionSizes.clear();
435 DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
436 NewSizes.end());
437 DimensionSizesPw.clear();
438 for (const SCEV *Expr : DimensionSizes) {
439 if (!Expr) {
440 DimensionSizesPw.push_back(nullptr);
441 continue;
443 isl::pw_aff Size = S.getPwAffOnly(Expr);
444 DimensionSizesPw.push_back(Size);
446 return true;
449 std::string ScopArrayInfo::getName() const { return Id.get_name(); }
451 int ScopArrayInfo::getElemSizeInBytes() const {
452 return DL.getTypeAllocSize(ElementType);
455 isl::id ScopArrayInfo::getBasePtrId() const { return Id; }
457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
458 LLVM_DUMP_METHOD void ScopArrayInfo::dump() const { print(errs()); }
459 #endif
461 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
462 OS.indent(8) << *getElementType() << " " << getName();
463 unsigned u = 0;
464 // If this is a Fortran array, then we can print the outermost dimension
465 // as a isl_pw_aff even though there is no SCEV information.
466 bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
468 if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
469 !getDimensionSize(0)) {
470 OS << "[*]";
471 u++;
473 for (; u < getNumberOfDimensions(); u++) {
474 OS << "[";
476 if (SizeAsPwAff) {
477 isl::pw_aff Size = getDimensionSizePw(u);
478 OS << " " << Size << " ";
479 } else {
480 OS << *getDimensionSize(u);
483 OS << "]";
486 OS << ";";
488 if (BasePtrOriginSAI)
489 OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
491 OS << " // Element size " << getElemSizeInBytes() << "\n";
494 const ScopArrayInfo *
495 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA) {
496 isl::id Id = PMA.get_tuple_id(isl::dim::out);
497 assert(!Id.is_null() && "Output dimension didn't have an ID");
498 return getFromId(Id);
501 const ScopArrayInfo *ScopArrayInfo::getFromId(isl::id Id) {
502 void *User = Id.get_user();
503 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
504 return SAI;
507 void MemoryAccess::wrapConstantDimensions() {
508 auto *SAI = getScopArrayInfo();
509 isl::space ArraySpace = SAI->getSpace();
510 isl::ctx Ctx = ArraySpace.get_ctx();
511 unsigned DimsArray = SAI->getNumberOfDimensions();
513 isl::multi_aff DivModAff = isl::multi_aff::identity(
514 ArraySpace.map_from_domain_and_range(ArraySpace));
515 isl::local_space LArraySpace = isl::local_space(ArraySpace);
517 // Begin with last dimension, to iteratively carry into higher dimensions.
518 for (int i = DimsArray - 1; i > 0; i--) {
519 auto *DimSize = SAI->getDimensionSize(i);
520 auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
522 // This transformation is not applicable to dimensions with dynamic size.
523 if (!DimSizeCst)
524 continue;
526 // This transformation is not applicable to dimensions of size zero.
527 if (DimSize->isZero())
528 continue;
530 isl::val DimSizeVal =
531 valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false);
532 isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i);
533 isl::aff PrevVar =
534 isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1);
536 // Compute: index % size
537 // Modulo must apply in the divide of the previous iteration, if any.
538 isl::aff Modulo = Var.mod(DimSizeVal);
539 Modulo = Modulo.pullback(DivModAff);
541 // Compute: floor(index / size)
542 isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal));
543 Divide = Divide.floor();
544 Divide = Divide.add(PrevVar);
545 Divide = Divide.pullback(DivModAff);
547 // Apply Modulo and Divide.
548 DivModAff = DivModAff.set_aff(i, Modulo);
549 DivModAff = DivModAff.set_aff(i - 1, Divide);
552 // Apply all modulo/divides on the accesses.
553 isl::map Relation = AccessRelation;
554 Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff));
555 Relation = Relation.detect_equalities();
556 AccessRelation = Relation;
559 void MemoryAccess::updateDimensionality() {
560 auto *SAI = getScopArrayInfo();
561 isl::space ArraySpace = SAI->getSpace();
562 isl::space AccessSpace = AccessRelation.get_space().range();
563 isl::ctx Ctx = ArraySpace.get_ctx();
565 auto DimsArray = ArraySpace.dim(isl::dim::set);
566 auto DimsAccess = AccessSpace.dim(isl::dim::set);
567 auto DimsMissing = DimsArray - DimsAccess;
569 auto *BB = getStatement()->getEntryBlock();
570 auto &DL = BB->getModule()->getDataLayout();
571 unsigned ArrayElemSize = SAI->getElemSizeInBytes();
572 unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
574 isl::map Map = isl::map::from_domain_and_range(
575 isl::set::universe(AccessSpace), isl::set::universe(ArraySpace));
577 for (unsigned i = 0; i < DimsMissing; i++)
578 Map = Map.fix_si(isl::dim::out, i, 0);
580 for (unsigned i = DimsMissing; i < DimsArray; i++)
581 Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i);
583 AccessRelation = AccessRelation.apply_range(Map);
585 // For the non delinearized arrays, divide the access function of the last
586 // subscript by the size of the elements in the array.
588 // A stride one array access in C expressed as A[i] is expressed in
589 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
590 // two subsequent values of 'i' index two values that are stored next to
591 // each other in memory. By this division we make this characteristic
592 // obvious again. If the base pointer was accessed with offsets not divisible
593 // by the accesses element size, we will have chosen a smaller ArrayElemSize
594 // that divides the offsets of all accesses to this base pointer.
595 if (DimsAccess == 1) {
596 isl::val V = isl::val(Ctx, ArrayElemSize);
597 AccessRelation = AccessRelation.floordiv_val(V);
600 // We currently do this only if we added at least one dimension, which means
601 // some dimension's indices have not been specified, an indicator that some
602 // index values have been added together.
603 // TODO: Investigate general usefulness; Effect on unit tests is to make index
604 // expressions more complicated.
605 if (DimsMissing)
606 wrapConstantDimensions();
608 if (!isAffine())
609 computeBoundsOnAccessRelation(ArrayElemSize);
611 // Introduce multi-element accesses in case the type loaded by this memory
612 // access is larger than the canonical element type of the array.
614 // An access ((float *)A)[i] to an array char *A is modeled as
615 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
616 if (ElemBytes > ArrayElemSize) {
617 assert(ElemBytes % ArrayElemSize == 0 &&
618 "Loaded element size should be multiple of canonical element size");
619 isl::map Map = isl::map::from_domain_and_range(
620 isl::set::universe(ArraySpace), isl::set::universe(ArraySpace));
621 for (unsigned i = 0; i < DimsArray - 1; i++)
622 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
624 isl::constraint C;
625 isl::local_space LS;
627 LS = isl::local_space(Map.get_space());
628 int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
630 C = isl::constraint::alloc_inequality(LS);
631 C = C.set_constant_val(isl::val(Ctx, Num - 1));
632 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1);
633 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1);
634 Map = Map.add_constraint(C);
636 C = isl::constraint::alloc_inequality(LS);
637 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1);
638 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1);
639 C = C.set_constant_val(isl::val(Ctx, 0));
640 Map = Map.add_constraint(C);
641 AccessRelation = AccessRelation.apply_range(Map);
645 const std::string
646 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
647 switch (RT) {
648 case MemoryAccess::RT_NONE:
649 llvm_unreachable("Requested a reduction operator string for a memory "
650 "access which isn't a reduction");
651 case MemoryAccess::RT_ADD:
652 return "+";
653 case MemoryAccess::RT_MUL:
654 return "*";
655 case MemoryAccess::RT_BOR:
656 return "|";
657 case MemoryAccess::RT_BXOR:
658 return "^";
659 case MemoryAccess::RT_BAND:
660 return "&";
662 llvm_unreachable("Unknown reduction type");
665 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
666 isl::id ArrayId = getArrayId();
667 void *User = ArrayId.get_user();
668 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
669 return SAI;
672 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
673 isl::id ArrayId = getLatestArrayId();
674 void *User = ArrayId.get_user();
675 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
676 return SAI;
679 isl::id MemoryAccess::getOriginalArrayId() const {
680 return AccessRelation.get_tuple_id(isl::dim::out);
683 isl::id MemoryAccess::getLatestArrayId() const {
684 if (!hasNewAccessRelation())
685 return getOriginalArrayId();
686 return NewAccessRelation.get_tuple_id(isl::dim::out);
689 isl::map MemoryAccess::getAddressFunction() const {
690 return getAccessRelation().lexmin();
693 isl::pw_multi_aff
694 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule) const {
695 isl::map Schedule, ScheduledAccRel;
696 isl::union_set UDomain;
698 UDomain = getStatement()->getDomain();
699 USchedule = USchedule.intersect_domain(UDomain);
700 Schedule = isl::map::from_union_map(USchedule);
701 ScheduledAccRel = getAddressFunction().apply_domain(Schedule);
702 return isl::pw_multi_aff::from_map(ScheduledAccRel);
705 isl::map MemoryAccess::getOriginalAccessRelation() const {
706 return AccessRelation;
709 std::string MemoryAccess::getOriginalAccessRelationStr() const {
710 return AccessRelation.to_str();
713 isl::space MemoryAccess::getOriginalAccessRelationSpace() const {
714 return AccessRelation.get_space();
717 isl::map MemoryAccess::getNewAccessRelation() const {
718 return NewAccessRelation;
721 std::string MemoryAccess::getNewAccessRelationStr() const {
722 return NewAccessRelation.to_str();
725 std::string MemoryAccess::getAccessRelationStr() const {
726 return getAccessRelation().to_str();
729 isl::basic_map MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
730 isl::space Space = isl::space(Statement->getIslCtx(), 0, 1);
731 Space = Space.align_params(Statement->getDomainSpace());
733 return isl::basic_map::from_domain_and_range(
734 isl::basic_set::universe(Statement->getDomainSpace()),
735 isl::basic_set::universe(Space));
738 // Formalize no out-of-bound access assumption
740 // When delinearizing array accesses we optimistically assume that the
741 // delinearized accesses do not access out of bound locations (the subscript
742 // expression of each array evaluates for each statement instance that is
743 // executed to a value that is larger than zero and strictly smaller than the
744 // size of the corresponding dimension). The only exception is the outermost
745 // dimension for which we do not need to assume any upper bound. At this point
746 // we formalize this assumption to ensure that at code generation time the
747 // relevant run-time checks can be generated.
749 // To find the set of constraints necessary to avoid out of bound accesses, we
750 // first build the set of data locations that are not within array bounds. We
751 // then apply the reverse access relation to obtain the set of iterations that
752 // may contain invalid accesses and reduce this set of iterations to the ones
753 // that are actually executed by intersecting them with the domain of the
754 // statement. If we now project out all loop dimensions, we obtain a set of
755 // parameters that may cause statement instances to be executed that may
756 // possibly yield out of bound memory accesses. The complement of these
757 // constraints is the set of constraints that needs to be assumed to ensure such
758 // statement instances are never executed.
759 void MemoryAccess::assumeNoOutOfBound() {
760 if (PollyIgnoreInbounds)
761 return;
762 auto *SAI = getScopArrayInfo();
763 isl::space Space = getOriginalAccessRelationSpace().range();
764 isl::set Outside = isl::set::empty(Space);
765 for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) {
766 isl::local_space LS(Space);
767 isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i);
768 isl::pw_aff Zero = isl::pw_aff(LS);
770 isl::set DimOutside = Var.lt_set(Zero);
771 isl::pw_aff SizeE = SAI->getDimensionSizePw(i);
772 SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set));
773 SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set));
774 DimOutside = DimOutside.unite(SizeE.le_set(Var));
776 Outside = Outside.unite(DimOutside);
779 Outside = Outside.apply(getAccessRelation().reverse());
780 Outside = Outside.intersect(Statement->getDomain());
781 Outside = Outside.params();
783 // Remove divs to avoid the construction of overly complicated assumptions.
784 // Doing so increases the set of parameter combinations that are assumed to
785 // not appear. This is always save, but may make the resulting run-time check
786 // bail out more often than strictly necessary.
787 Outside = Outside.remove_divs();
788 Outside = Outside.complement();
789 const auto &Loc = getAccessInstruction()
790 ? getAccessInstruction()->getDebugLoc()
791 : DebugLoc();
792 if (!PollyPreciseInbounds)
793 Outside = Outside.gist_params(Statement->getDomain().params());
794 Statement->getParent()->recordAssumption(INBOUNDS, Outside, Loc,
795 AS_ASSUMPTION);
798 void MemoryAccess::buildMemIntrinsicAccessRelation() {
799 assert(isMemoryIntrinsic());
800 assert(Subscripts.size() == 2 && Sizes.size() == 1);
802 isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]);
803 isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA);
805 isl::map LengthMap;
806 if (Subscripts[1] == nullptr) {
807 LengthMap = isl::map::universe(SubscriptMap.get_space());
808 } else {
809 isl::pw_aff LengthPWA = getPwAff(Subscripts[1]);
810 LengthMap = isl::map::from_pw_aff(LengthPWA);
811 isl::space RangeSpace = LengthMap.get_space().range();
812 LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace));
814 LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0);
815 LengthMap = LengthMap.align_params(SubscriptMap.get_space());
816 SubscriptMap = SubscriptMap.align_params(LengthMap.get_space());
817 LengthMap = LengthMap.sum(SubscriptMap);
818 AccessRelation =
819 LengthMap.set_tuple_id(isl::dim::in, getStatement()->getDomainId());
822 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
823 ScalarEvolution *SE = Statement->getParent()->getSE();
825 auto MAI = MemAccInst(getAccessInstruction());
826 if (isa<MemIntrinsic>(MAI))
827 return;
829 Value *Ptr = MAI.getPointerOperand();
830 if (!Ptr || !SE->isSCEVable(Ptr->getType()))
831 return;
833 auto *PtrSCEV = SE->getSCEV(Ptr);
834 if (isa<SCEVCouldNotCompute>(PtrSCEV))
835 return;
837 auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
838 if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV))
839 PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
841 const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
842 if (Range.isFullSet())
843 return;
845 if (Range.isWrappedSet() || Range.isSignWrappedSet())
846 return;
848 bool isWrapping = Range.isSignWrappedSet();
850 unsigned BW = Range.getBitWidth();
851 const auto One = APInt(BW, 1);
852 const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin();
853 const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax();
855 auto Min = LB.sdiv(APInt(BW, ElementSize));
856 auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
858 assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
860 isl::map Relation = AccessRelation;
861 isl::set AccessRange = Relation.range();
862 AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0,
863 isl::dim::set);
864 AccessRelation = Relation.intersect_range(AccessRange);
867 void MemoryAccess::foldAccessRelation() {
868 if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1]))
869 return;
871 int Size = Subscripts.size();
873 isl::map NewAccessRelation = AccessRelation;
875 for (int i = Size - 2; i >= 0; --i) {
876 isl::space Space;
877 isl::map MapOne, MapTwo;
878 isl::pw_aff DimSize = getPwAff(Sizes[i + 1]);
880 isl::space SpaceSize = DimSize.get_space();
881 isl::id ParamId =
882 give(isl_space_get_dim_id(SpaceSize.get(), isl_dim_param, 0));
884 Space = AccessRelation.get_space();
885 Space = Space.range().map_from_set();
886 Space = Space.align_params(SpaceSize);
888 int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId);
890 MapOne = isl::map::universe(Space);
891 for (int j = 0; j < Size; ++j)
892 MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j);
893 MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0);
895 MapTwo = isl::map::universe(Space);
896 for (int j = 0; j < Size; ++j)
897 if (j < i || j > i + 1)
898 MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j);
900 isl::local_space LS(Space);
901 isl::constraint C;
902 C = isl::constraint::alloc_equality(LS);
903 C = C.set_constant_si(-1);
904 C = C.set_coefficient_si(isl::dim::in, i, 1);
905 C = C.set_coefficient_si(isl::dim::out, i, -1);
906 MapTwo = MapTwo.add_constraint(C);
907 C = isl::constraint::alloc_equality(LS);
908 C = C.set_coefficient_si(isl::dim::in, i + 1, 1);
909 C = C.set_coefficient_si(isl::dim::out, i + 1, -1);
910 C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1);
911 MapTwo = MapTwo.add_constraint(C);
912 MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1);
914 MapOne = MapOne.unite(MapTwo);
915 NewAccessRelation = NewAccessRelation.apply_range(MapOne);
918 isl::id BaseAddrId = getScopArrayInfo()->getBasePtrId();
919 isl::space Space = Statement->getDomainSpace();
920 NewAccessRelation = NewAccessRelation.set_tuple_id(
921 isl::dim::in, Space.get_tuple_id(isl::dim::set));
922 NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
923 NewAccessRelation = NewAccessRelation.gist_domain(Statement->getDomain());
925 // Access dimension folding might in certain cases increase the number of
926 // disjuncts in the memory access, which can possibly complicate the generated
927 // run-time checks and can lead to costly compilation.
928 if (!PollyPreciseFoldAccesses &&
929 isl_map_n_basic_map(NewAccessRelation.get()) >
930 isl_map_n_basic_map(AccessRelation.get())) {
931 } else {
932 AccessRelation = NewAccessRelation;
936 /// Check if @p Expr is divisible by @p Size.
937 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
938 assert(Size != 0);
939 if (Size == 1)
940 return true;
942 // Only one factor needs to be divisible.
943 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
944 for (auto *FactorExpr : MulExpr->operands())
945 if (isDivisible(FactorExpr, Size, SE))
946 return true;
947 return false;
950 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
951 // to be divisible.
952 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
953 for (auto *OpExpr : NAryExpr->operands())
954 if (!isDivisible(OpExpr, Size, SE))
955 return false;
956 return true;
959 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
960 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
961 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
962 return MulSCEV == Expr;
965 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
966 assert(AccessRelation.is_null() && "AccessRelation already built");
968 // Initialize the invalid domain which describes all iterations for which the
969 // access relation is not modeled correctly.
970 isl::set StmtInvalidDomain = getStatement()->getInvalidDomain();
971 InvalidDomain = isl::set::empty(StmtInvalidDomain.get_space());
973 isl::ctx Ctx = Id.get_ctx();
974 isl::id BaseAddrId = SAI->getBasePtrId();
976 if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) {
977 buildMemIntrinsicAccessRelation();
978 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
979 return;
982 if (!isAffine()) {
983 // We overapproximate non-affine accesses with a possible access to the
984 // whole array. For read accesses it does not make a difference, if an
985 // access must or may happen. However, for write accesses it is important to
986 // differentiate between writes that must happen and writes that may happen.
987 if (AccessRelation.is_null())
988 AccessRelation = createBasicAccessMap(Statement);
990 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
991 return;
994 isl::space Space = isl::space(Ctx, 0, Statement->getNumIterators(), 0);
995 AccessRelation = isl::map::universe(Space);
997 for (int i = 0, Size = Subscripts.size(); i < Size; ++i) {
998 isl::pw_aff Affine = getPwAff(Subscripts[i]);
999 isl::map SubscriptMap = isl::map::from_pw_aff(Affine);
1000 AccessRelation = AccessRelation.flat_range_product(SubscriptMap);
1003 Space = Statement->getDomainSpace();
1004 AccessRelation = AccessRelation.set_tuple_id(
1005 isl::dim::in, Space.get_tuple_id(isl::dim::set));
1006 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1008 AccessRelation = AccessRelation.gist_domain(Statement->getDomain());
1011 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
1012 AccessType AccType, Value *BaseAddress,
1013 Type *ElementType, bool Affine,
1014 ArrayRef<const SCEV *> Subscripts,
1015 ArrayRef<const SCEV *> Sizes, Value *AccessValue,
1016 MemoryKind Kind)
1017 : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr),
1018 BaseAddr(BaseAddress), ElementType(ElementType),
1019 Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst),
1020 AccessValue(AccessValue), IsAffine(Affine),
1021 Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
1022 NewAccessRelation(nullptr), FAD(nullptr) {
1023 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1024 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1026 std::string IdName = Stmt->getBaseName() + Access;
1027 Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1030 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel)
1031 : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt),
1032 InvalidDomain(nullptr), AccessRelation(nullptr),
1033 NewAccessRelation(AccRel), FAD(nullptr) {
1034 isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out);
1035 auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
1036 Sizes.push_back(nullptr);
1037 for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++)
1038 Sizes.push_back(SAI->getDimensionSize(i));
1039 ElementType = SAI->getElementType();
1040 BaseAddr = SAI->getBasePtr();
1041 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1042 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1044 std::string IdName = Stmt->getBaseName() + Access;
1045 Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1048 MemoryAccess::~MemoryAccess() = default;
1050 void MemoryAccess::realignParams() {
1051 isl::set Ctx = Statement->getParent()->getContext();
1052 InvalidDomain = InvalidDomain.gist_params(Ctx);
1053 AccessRelation = AccessRelation.gist_params(Ctx);
1056 const std::string MemoryAccess::getReductionOperatorStr() const {
1057 return MemoryAccess::getReductionOperatorStr(getReductionType());
1060 isl::id MemoryAccess::getId() const { return Id; }
1062 raw_ostream &polly::operator<<(raw_ostream &OS,
1063 MemoryAccess::ReductionType RT) {
1064 if (RT == MemoryAccess::RT_NONE)
1065 OS << "NONE";
1066 else
1067 OS << MemoryAccess::getReductionOperatorStr(RT);
1068 return OS;
1071 void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; }
1073 void MemoryAccess::print(raw_ostream &OS) const {
1074 switch (AccType) {
1075 case READ:
1076 OS.indent(12) << "ReadAccess :=\t";
1077 break;
1078 case MUST_WRITE:
1079 OS.indent(12) << "MustWriteAccess :=\t";
1080 break;
1081 case MAY_WRITE:
1082 OS.indent(12) << "MayWriteAccess :=\t";
1083 break;
1086 OS << "[Reduction Type: " << getReductionType() << "] ";
1088 if (FAD) {
1089 OS << "[Fortran array descriptor: " << FAD->getName();
1090 OS << "] ";
1093 OS << "[Scalar: " << isScalarKind() << "]\n";
1094 OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1095 if (hasNewAccessRelation())
1096 OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1099 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1100 LLVM_DUMP_METHOD void MemoryAccess::dump() const { print(errs()); }
1101 #endif
1103 isl::pw_aff MemoryAccess::getPwAff(const SCEV *E) {
1104 auto *Stmt = getStatement();
1105 PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1106 isl::set StmtDom = getStatement()->getDomain();
1107 StmtDom = StmtDom.reset_tuple_id();
1108 isl::set NewInvalidDom = StmtDom.intersect(PWAC.second);
1109 InvalidDomain = InvalidDomain.unite(NewInvalidDom);
1110 return PWAC.first;
1113 // Create a map in the size of the provided set domain, that maps from the
1114 // one element of the provided set domain to another element of the provided
1115 // set domain.
1116 // The mapping is limited to all points that are equal in all but the last
1117 // dimension and for which the last dimension of the input is strict smaller
1118 // than the last dimension of the output.
1120 // getEqualAndLarger(set[i0, i1, ..., iX]):
1122 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1123 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1125 static isl::map getEqualAndLarger(isl::space SetDomain) {
1126 isl::space Space = SetDomain.map_from_set();
1127 isl::map Map = isl::map::universe(Space);
1128 unsigned lastDimension = Map.dim(isl::dim::in) - 1;
1130 // Set all but the last dimension to be equal for the input and output
1132 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1133 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1134 for (unsigned i = 0; i < lastDimension; ++i)
1135 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
1137 // Set the last dimension of the input to be strict smaller than the
1138 // last dimension of the output.
1140 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1141 Map = Map.order_lt(isl::dim::in, lastDimension, isl::dim::out, lastDimension);
1142 return Map;
1145 isl::set MemoryAccess::getStride(isl::map Schedule) const {
1146 isl::map AccessRelation = getAccessRelation();
1147 isl::space Space = Schedule.get_space().range();
1148 isl::map NextScatt = getEqualAndLarger(Space);
1150 Schedule = Schedule.reverse();
1151 NextScatt = NextScatt.lexmin();
1153 NextScatt = NextScatt.apply_range(Schedule);
1154 NextScatt = NextScatt.apply_range(AccessRelation);
1155 NextScatt = NextScatt.apply_domain(Schedule);
1156 NextScatt = NextScatt.apply_domain(AccessRelation);
1158 isl::set Deltas = NextScatt.deltas();
1159 return Deltas;
1162 bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const {
1163 isl::set Stride, StrideX;
1164 bool IsStrideX;
1166 Stride = getStride(Schedule);
1167 StrideX = isl::set::universe(Stride.get_space());
1168 for (unsigned i = 0; i < StrideX.dim(isl::dim::set) - 1; i++)
1169 StrideX = StrideX.fix_si(isl::dim::set, i, 0);
1170 StrideX = StrideX.fix_si(isl::dim::set, StrideX.dim(isl::dim::set) - 1,
1171 StrideWidth);
1172 IsStrideX = Stride.is_subset(StrideX);
1174 return IsStrideX;
1177 bool MemoryAccess::isStrideZero(isl::map Schedule) const {
1178 return isStrideX(Schedule, 0);
1181 bool MemoryAccess::isStrideOne(isl::map Schedule) const {
1182 return isStrideX(Schedule, 1);
1185 void MemoryAccess::setAccessRelation(isl::map NewAccess) {
1186 AccessRelation = NewAccess;
1189 void MemoryAccess::setNewAccessRelation(isl::map NewAccess) {
1190 assert(NewAccess);
1192 #ifndef NDEBUG
1193 // Check domain space compatibility.
1194 isl::space NewSpace = NewAccess.get_space();
1195 isl::space NewDomainSpace = NewSpace.domain();
1196 isl::space OriginalDomainSpace = getStatement()->getDomainSpace();
1197 assert(OriginalDomainSpace.has_equal_tuples(NewDomainSpace));
1199 // Reads must be executed unconditionally. Writes might be executed in a
1200 // subdomain only.
1201 if (isRead()) {
1202 // Check whether there is an access for every statement instance.
1203 isl::set StmtDomain = getStatement()->getDomain();
1204 StmtDomain =
1205 StmtDomain.intersect_params(getStatement()->getParent()->getContext());
1206 isl::set NewDomain = NewAccess.domain();
1207 assert(StmtDomain.is_subset(NewDomain) &&
1208 "Partial READ accesses not supported");
1211 isl::space NewAccessSpace = NewAccess.get_space();
1212 assert(NewAccessSpace.has_tuple_id(isl::dim::set) &&
1213 "Must specify the array that is accessed");
1214 isl::id NewArrayId = NewAccessSpace.get_tuple_id(isl::dim::set);
1215 auto *SAI = static_cast<ScopArrayInfo *>(NewArrayId.get_user());
1216 assert(SAI && "Must set a ScopArrayInfo");
1218 if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1219 InvariantEquivClassTy *EqClass =
1220 getStatement()->getParent()->lookupInvariantEquivClass(
1221 SAI->getBasePtr());
1222 assert(EqClass &&
1223 "Access functions to indirect arrays must have an invariant and "
1224 "hoisted base pointer");
1227 // Check whether access dimensions correspond to number of dimensions of the
1228 // accesses array.
1229 auto Dims = SAI->getNumberOfDimensions();
1230 assert(NewAccessSpace.dim(isl::dim::set) == Dims &&
1231 "Access dims must match array dims");
1232 #endif
1234 NewAccess = NewAccess.gist_domain(getStatement()->getDomain());
1235 NewAccessRelation = NewAccess;
1238 bool MemoryAccess::isLatestPartialAccess() const {
1239 isl::set StmtDom = getStatement()->getDomain();
1240 isl::set AccDom = getLatestAccessRelation().domain();
1242 return isl_set_is_subset(StmtDom.keep(), AccDom.keep()) == isl_bool_false;
1245 //===----------------------------------------------------------------------===//
1247 isl::map ScopStmt::getSchedule() const {
1248 isl::set Domain = getDomain();
1249 if (Domain.is_empty())
1250 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1251 auto Schedule = getParent()->getSchedule();
1252 if (!Schedule)
1253 return nullptr;
1254 Schedule = Schedule.intersect_domain(isl::union_set(Domain));
1255 if (Schedule.is_empty())
1256 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1257 isl::map M = M.from_union_map(Schedule);
1258 M = M.coalesce();
1259 M = M.gist_domain(Domain);
1260 M = M.coalesce();
1261 return M;
1264 void ScopStmt::restrictDomain(isl::set NewDomain) {
1265 assert(NewDomain.is_subset(Domain) &&
1266 "New domain is not a subset of old domain!");
1267 Domain = NewDomain;
1270 void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
1271 Instruction *AccessInst = Access->getAccessInstruction();
1273 if (Access->isArrayKind()) {
1274 MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1275 MAL.emplace_front(Access);
1276 } else if (Access->isValueKind() && Access->isWrite()) {
1277 Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1278 assert(!ValueWrites.lookup(AccessVal));
1280 ValueWrites[AccessVal] = Access;
1281 } else if (Access->isValueKind() && Access->isRead()) {
1282 Value *AccessVal = Access->getAccessValue();
1283 assert(!ValueReads.lookup(AccessVal));
1285 ValueReads[AccessVal] = Access;
1286 } else if (Access->isAnyPHIKind() && Access->isWrite()) {
1287 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1288 assert(!PHIWrites.lookup(PHI));
1290 PHIWrites[PHI] = Access;
1291 } else if (Access->isAnyPHIKind() && Access->isRead()) {
1292 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1293 assert(!PHIReads.lookup(PHI));
1295 PHIReads[PHI] = Access;
1298 if (Prepend) {
1299 MemAccs.insert(MemAccs.begin(), Access);
1300 return;
1302 MemAccs.push_back(Access);
1305 void ScopStmt::realignParams() {
1306 for (MemoryAccess *MA : *this)
1307 MA->realignParams();
1309 isl::set Ctx = Parent.getContext();
1310 InvalidDomain = InvalidDomain.gist_params(Ctx);
1311 Domain = Domain.gist_params(Ctx);
1314 /// Add @p BSet to the set @p User if @p BSet is bounded.
1315 static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet,
1316 void *User) {
1317 isl_set **BoundedParts = static_cast<isl_set **>(User);
1318 if (isl_basic_set_is_bounded(BSet))
1319 *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet));
1320 else
1321 isl_basic_set_free(BSet);
1322 return isl_stat_ok;
1325 /// Return the bounded parts of @p S.
1326 static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) {
1327 isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S));
1328 isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts);
1329 isl_set_free(S);
1330 return BoundedParts;
1333 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1335 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1336 /// both with regards to the dimension @p Dim.
1337 static std::pair<__isl_give isl_set *, __isl_give isl_set *>
1338 partitionSetParts(__isl_take isl_set *S, unsigned Dim) {
1339 for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++)
1340 S = isl_set_lower_bound_si(S, isl_dim_set, u, 0);
1342 unsigned NumDimsS = isl_set_n_dim(S);
1343 isl_set *OnlyDimS = isl_set_copy(S);
1345 // Remove dimensions that are greater than Dim as they are not interesting.
1346 assert(NumDimsS >= Dim + 1);
1347 OnlyDimS =
1348 isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1);
1350 // Create artificial parametric upper bounds for dimensions smaller than Dim
1351 // as we are not interested in them.
1352 OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim);
1353 for (unsigned u = 0; u < Dim; u++) {
1354 isl_constraint *C = isl_inequality_alloc(
1355 isl_local_space_from_space(isl_set_get_space(OnlyDimS)));
1356 C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1);
1357 C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1);
1358 OnlyDimS = isl_set_add_constraint(OnlyDimS, C);
1361 // Collect all bounded parts of OnlyDimS.
1362 isl_set *BoundedParts = collectBoundedParts(OnlyDimS);
1364 // Create the dimensions greater than Dim again.
1365 BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1,
1366 NumDimsS - Dim - 1);
1368 // Remove the artificial upper bound parameters again.
1369 BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim);
1371 isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts));
1372 return std::make_pair(UnboundedParts, BoundedParts);
1375 /// Set the dimension Ids from @p From in @p To.
1376 static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From,
1377 __isl_take isl_set *To) {
1378 for (unsigned u = 0, e = isl_set_n_dim(From); u < e; u++) {
1379 isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u);
1380 To = isl_set_set_dim_id(To, isl_dim_set, u, DimId);
1382 return To;
1385 /// Create the conditions under which @p L @p Pred @p R is true.
1386 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1387 __isl_take isl_pw_aff *L,
1388 __isl_take isl_pw_aff *R) {
1389 switch (Pred) {
1390 case ICmpInst::ICMP_EQ:
1391 return isl_pw_aff_eq_set(L, R);
1392 case ICmpInst::ICMP_NE:
1393 return isl_pw_aff_ne_set(L, R);
1394 case ICmpInst::ICMP_SLT:
1395 return isl_pw_aff_lt_set(L, R);
1396 case ICmpInst::ICMP_SLE:
1397 return isl_pw_aff_le_set(L, R);
1398 case ICmpInst::ICMP_SGT:
1399 return isl_pw_aff_gt_set(L, R);
1400 case ICmpInst::ICMP_SGE:
1401 return isl_pw_aff_ge_set(L, R);
1402 case ICmpInst::ICMP_ULT:
1403 return isl_pw_aff_lt_set(L, R);
1404 case ICmpInst::ICMP_UGT:
1405 return isl_pw_aff_gt_set(L, R);
1406 case ICmpInst::ICMP_ULE:
1407 return isl_pw_aff_le_set(L, R);
1408 case ICmpInst::ICMP_UGE:
1409 return isl_pw_aff_ge_set(L, R);
1410 default:
1411 llvm_unreachable("Non integer predicate not supported");
1415 /// Create the conditions under which @p L @p Pred @p R is true.
1417 /// Helper function that will make sure the dimensions of the result have the
1418 /// same isl_id's as the @p Domain.
1419 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1420 __isl_take isl_pw_aff *L,
1421 __isl_take isl_pw_aff *R,
1422 __isl_keep isl_set *Domain) {
1423 isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R);
1424 return setDimensionIds(Domain, ConsequenceCondSet);
1427 /// Compute the isl representation for the SCEV @p E in this BB.
1429 /// @param S The Scop in which @p BB resides in.
1430 /// @param BB The BB for which isl representation is to be
1431 /// computed.
1432 /// @param InvalidDomainMap A map of BB to their invalid domains.
1433 /// @param E The SCEV that should be translated.
1434 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1436 /// Note that this function will also adjust the invalid context accordingly.
1438 __isl_give isl_pw_aff *
1439 getPwAff(Scop &S, BasicBlock *BB,
1440 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, const SCEV *E,
1441 bool NonNegative = false) {
1442 PWACtx PWAC = S.getPwAff(E, BB, NonNegative);
1443 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
1444 return PWAC.first.take();
1447 /// Build the conditions sets for the switch @p SI in the @p Domain.
1449 /// This will fill @p ConditionSets with the conditions under which control
1450 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1451 /// have as many elements as @p SI has successors.
1452 bool buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L,
1453 __isl_keep isl_set *Domain,
1454 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1455 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1456 Value *Condition = getConditionFromTerminator(SI);
1457 assert(Condition && "No condition for switch");
1459 ScalarEvolution &SE = *S.getSE();
1460 isl_pw_aff *LHS, *RHS;
1461 LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
1463 unsigned NumSuccessors = SI->getNumSuccessors();
1464 ConditionSets.resize(NumSuccessors);
1465 for (auto &Case : SI->cases()) {
1466 unsigned Idx = Case.getSuccessorIndex();
1467 ConstantInt *CaseValue = Case.getCaseValue();
1469 RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue));
1470 isl_set *CaseConditionSet =
1471 buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain);
1472 ConditionSets[Idx] = isl_set_coalesce(
1473 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1476 assert(ConditionSets[0] == nullptr && "Default condition set was set");
1477 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1478 for (unsigned u = 2; u < NumSuccessors; u++)
1479 ConditionSetUnion =
1480 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1481 ConditionSets[0] = setDimensionIds(
1482 Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion));
1484 isl_pw_aff_free(LHS);
1486 return true;
1489 /// Build condition sets for unsigned ICmpInst(s).
1490 /// Special handling is required for unsigned operands to ensure that if
1491 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1492 /// it should wrap around.
1494 /// @param IsStrictUpperBound holds information on the predicate relation
1495 /// between TestVal and UpperBound, i.e,
1496 /// TestVal < UpperBound OR TestVal <= UpperBound
1497 __isl_give isl_set *
1498 buildUnsignedConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1499 __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal,
1500 const SCEV *SCEV_UpperBound,
1501 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1502 bool IsStrictUpperBound) {
1503 // Do not take NonNeg assumption on TestVal
1504 // as it might have MSB (Sign bit) set.
1505 isl_pw_aff *TestVal = getPwAff(S, BB, InvalidDomainMap, SCEV_TestVal, false);
1506 // Take NonNeg assumption on UpperBound.
1507 isl_pw_aff *UpperBound =
1508 getPwAff(S, BB, InvalidDomainMap, SCEV_UpperBound, true);
1510 // 0 <= TestVal
1511 isl_set *First =
1512 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1513 isl_pw_aff_get_domain_space(TestVal))),
1514 isl_pw_aff_copy(TestVal));
1516 isl_set *Second;
1517 if (IsStrictUpperBound)
1518 // TestVal < UpperBound
1519 Second = isl_pw_aff_lt_set(TestVal, UpperBound);
1520 else
1521 // TestVal <= UpperBound
1522 Second = isl_pw_aff_le_set(TestVal, UpperBound);
1524 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
1525 ConsequenceCondSet = setDimensionIds(Domain, ConsequenceCondSet);
1526 return ConsequenceCondSet;
1529 /// Build the conditions sets for the branch condition @p Condition in
1530 /// the @p Domain.
1532 /// This will fill @p ConditionSets with the conditions under which control
1533 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1534 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1535 /// context under which @p Condition is true/false will be returned as the
1536 /// new elements of @p ConditionSets.
1537 bool buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1538 TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain,
1539 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1540 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1541 ScalarEvolution &SE = *S.getSE();
1542 isl_set *ConsequenceCondSet = nullptr;
1544 if (auto Load = dyn_cast<LoadInst>(Condition)) {
1545 const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
1546 const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
1547 bool NonNeg = false;
1548 isl_pw_aff *LHS = getPwAff(S, BB, InvalidDomainMap, LHSSCEV, NonNeg);
1549 isl_pw_aff *RHS = getPwAff(S, BB, InvalidDomainMap, RHSSCEV, NonNeg);
1550 ConsequenceCondSet =
1551 buildConditionSet(ICmpInst::ICMP_SLE, LHS, RHS, Domain);
1552 } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
1553 auto *Unique = dyn_cast<ConstantInt>(
1554 getUniqueNonErrorValue(PHI, &S.getRegion(), *S.getLI(), *S.getDT()));
1556 if (Unique->isZero())
1557 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1558 else
1559 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1560 } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
1561 if (CCond->isZero())
1562 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1563 else
1564 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1565 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
1566 auto Opcode = BinOp->getOpcode();
1567 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1569 bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain,
1570 InvalidDomainMap, ConditionSets) &&
1571 buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain,
1572 InvalidDomainMap, ConditionSets);
1573 if (!Valid) {
1574 while (!ConditionSets.empty())
1575 isl_set_free(ConditionSets.pop_back_val());
1576 return false;
1579 isl_set_free(ConditionSets.pop_back_val());
1580 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1581 isl_set_free(ConditionSets.pop_back_val());
1582 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1584 if (Opcode == Instruction::And)
1585 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1586 else
1587 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1588 } else {
1589 auto *ICond = dyn_cast<ICmpInst>(Condition);
1590 assert(ICond &&
1591 "Condition of exiting branch was neither constant nor ICmp!");
1593 LoopInfo &LI = *S.getLI();
1594 DominatorTree &DT = *S.getDT();
1595 Region &R = S.getRegion();
1597 isl_pw_aff *LHS, *RHS;
1598 // For unsigned comparisons we assumed the signed bit of neither operand
1599 // to be set. The comparison is equal to a signed comparison under this
1600 // assumption.
1601 bool NonNeg = ICond->isUnsigned();
1602 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
1603 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
1605 LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, LI, DT);
1606 RightOperand = tryForwardThroughPHI(RightOperand, R, SE, LI, DT);
1608 switch (ICond->getPredicate()) {
1609 case ICmpInst::ICMP_ULT:
1610 ConsequenceCondSet =
1611 buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1612 RightOperand, InvalidDomainMap, true);
1613 break;
1614 case ICmpInst::ICMP_ULE:
1615 ConsequenceCondSet =
1616 buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1617 RightOperand, InvalidDomainMap, false);
1618 break;
1619 case ICmpInst::ICMP_UGT:
1620 ConsequenceCondSet =
1621 buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1622 LeftOperand, InvalidDomainMap, true);
1623 break;
1624 case ICmpInst::ICMP_UGE:
1625 ConsequenceCondSet =
1626 buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1627 LeftOperand, InvalidDomainMap, false);
1628 break;
1629 default:
1630 LHS = getPwAff(S, BB, InvalidDomainMap, LeftOperand, NonNeg);
1631 RHS = getPwAff(S, BB, InvalidDomainMap, RightOperand, NonNeg);
1632 ConsequenceCondSet =
1633 buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain);
1634 break;
1638 // If no terminator was given we are only looking for parameter constraints
1639 // under which @p Condition is true/false.
1640 if (!TI)
1641 ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1642 assert(ConsequenceCondSet);
1643 ConsequenceCondSet = isl_set_coalesce(
1644 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1646 isl_set *AlternativeCondSet = nullptr;
1647 bool TooComplex =
1648 isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1650 if (!TooComplex) {
1651 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1652 isl_set_copy(ConsequenceCondSet));
1653 TooComplex =
1654 isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1657 if (TooComplex) {
1658 S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
1659 TI ? TI->getParent() : nullptr /* BasicBlock */);
1660 isl_set_free(AlternativeCondSet);
1661 isl_set_free(ConsequenceCondSet);
1662 return false;
1665 ConditionSets.push_back(ConsequenceCondSet);
1666 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1668 return true;
1671 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1673 /// This will fill @p ConditionSets with the conditions under which control
1674 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1675 /// have as many elements as @p TI has successors.
1676 bool buildConditionSets(Scop &S, BasicBlock *BB, TerminatorInst *TI, Loop *L,
1677 __isl_keep isl_set *Domain,
1678 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1679 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1680 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1681 return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap,
1682 ConditionSets);
1684 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1686 if (TI->getNumSuccessors() == 1) {
1687 ConditionSets.push_back(isl_set_copy(Domain));
1688 return true;
1691 Value *Condition = getConditionFromTerminator(TI);
1692 assert(Condition && "No condition for Terminator");
1694 return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap,
1695 ConditionSets);
1698 ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop,
1699 std::vector<Instruction *> EntryBlockInstructions)
1700 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R),
1701 Build(nullptr), SurroundingLoop(SurroundingLoop),
1702 Instructions(EntryBlockInstructions) {
1703 BaseName = getIslCompatibleName(
1704 "Stmt", R.getNameStr(), parent.getNextStmtIdx(), "", UseInstructionNames);
1707 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, Loop *SurroundingLoop,
1708 std::vector<Instruction *> Instructions, int Count)
1709 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1710 Build(nullptr), SurroundingLoop(SurroundingLoop),
1711 Instructions(Instructions) {
1712 std::string S = "";
1713 if (Count != 0)
1714 S += std::to_string(Count);
1715 BaseName = getIslCompatibleName("Stmt", &bb, parent.getNextStmtIdx(), S,
1716 UseInstructionNames);
1719 ScopStmt::ScopStmt(Scop &parent, isl::map SourceRel, isl::map TargetRel,
1720 isl::set NewDomain)
1721 : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain),
1722 Build(nullptr) {
1723 BaseName = getIslCompatibleName("CopyStmt_", "",
1724 std::to_string(parent.getCopyStmtsNum()));
1725 isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1726 Domain = Domain.set_tuple_id(Id);
1727 TargetRel = TargetRel.set_tuple_id(isl::dim::in, Id);
1728 auto *Access =
1729 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1730 parent.addAccessFunction(Access);
1731 addAccess(Access);
1732 SourceRel = SourceRel.set_tuple_id(isl::dim::in, Id);
1733 Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1734 parent.addAccessFunction(Access);
1735 addAccess(Access);
1738 ScopStmt::~ScopStmt() = default;
1740 std::string ScopStmt::getDomainStr() const { return Domain.to_str(); }
1742 std::string ScopStmt::getScheduleStr() const {
1743 auto *S = getSchedule().release();
1744 if (!S)
1745 return {};
1746 auto Str = stringFromIslObj(S);
1747 isl_map_free(S);
1748 return Str;
1751 void ScopStmt::setInvalidDomain(isl::set ID) { InvalidDomain = ID; }
1753 BasicBlock *ScopStmt::getEntryBlock() const {
1754 if (isBlockStmt())
1755 return getBasicBlock();
1756 return getRegion()->getEntry();
1759 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1761 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1763 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1764 return NestLoops[Dimension];
1767 isl::ctx ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1769 isl::set ScopStmt::getDomain() const { return Domain; }
1771 isl::space ScopStmt::getDomainSpace() const { return Domain.get_space(); }
1773 isl::id ScopStmt::getDomainId() const { return Domain.get_tuple_id(); }
1775 void ScopStmt::printInstructions(raw_ostream &OS) const {
1776 OS << "Instructions {\n";
1778 for (Instruction *Inst : Instructions)
1779 OS.indent(16) << *Inst << "\n";
1781 OS.indent(12) << "}\n";
1784 void ScopStmt::print(raw_ostream &OS, bool PrintInstructions) const {
1785 OS << "\t" << getBaseName() << "\n";
1786 OS.indent(12) << "Domain :=\n";
1788 if (Domain) {
1789 OS.indent(16) << getDomainStr() << ";\n";
1790 } else
1791 OS.indent(16) << "n/a\n";
1793 OS.indent(12) << "Schedule :=\n";
1795 if (Domain) {
1796 OS.indent(16) << getScheduleStr() << ";\n";
1797 } else
1798 OS.indent(16) << "n/a\n";
1800 for (MemoryAccess *Access : MemAccs)
1801 Access->print(OS);
1803 if (PrintInstructions)
1804 printInstructions(OS.indent(12));
1807 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1808 LLVM_DUMP_METHOD void ScopStmt::dump() const { print(dbgs(), true); }
1809 #endif
1811 void ScopStmt::removeAccessData(MemoryAccess *MA) {
1812 if (MA->isRead() && MA->isOriginalValueKind()) {
1813 bool Found = ValueReads.erase(MA->getAccessValue());
1814 (void)Found;
1815 assert(Found && "Expected access data not found");
1817 if (MA->isWrite() && MA->isOriginalValueKind()) {
1818 bool Found = ValueWrites.erase(cast<Instruction>(MA->getAccessValue()));
1819 (void)Found;
1820 assert(Found && "Expected access data not found");
1822 if (MA->isWrite() && MA->isOriginalAnyPHIKind()) {
1823 bool Found = PHIWrites.erase(cast<PHINode>(MA->getAccessInstruction()));
1824 (void)Found;
1825 assert(Found && "Expected access data not found");
1827 if (MA->isRead() && MA->isOriginalAnyPHIKind()) {
1828 bool Found = PHIReads.erase(cast<PHINode>(MA->getAccessInstruction()));
1829 (void)Found;
1830 assert(Found && "Expected access data not found");
1834 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
1835 // Remove the memory accesses from this statement together with all scalar
1836 // accesses that were caused by it. MemoryKind::Value READs have no access
1837 // instruction, hence would not be removed by this function. However, it is
1838 // only used for invariant LoadInst accesses, its arguments are always affine,
1839 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1840 // accesses to be removed.
1841 auto Predicate = [&](MemoryAccess *Acc) {
1842 return Acc->getAccessInstruction() == MA->getAccessInstruction();
1844 for (auto *MA : MemAccs) {
1845 if (Predicate(MA)) {
1846 removeAccessData(MA);
1847 Parent.removeAccessData(MA);
1850 MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
1851 MemAccs.end());
1852 InstructionToAccess.erase(MA->getAccessInstruction());
1855 void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) {
1856 auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
1857 assert(MAIt != MemAccs.end());
1858 MemAccs.erase(MAIt);
1860 removeAccessData(MA);
1861 Parent.removeAccessData(MA);
1863 auto It = InstructionToAccess.find(MA->getAccessInstruction());
1864 if (It != InstructionToAccess.end()) {
1865 It->second.remove(MA);
1866 if (It->second.empty())
1867 InstructionToAccess.erase(MA->getAccessInstruction());
1871 MemoryAccess *ScopStmt::ensureValueRead(Value *V) {
1872 MemoryAccess *Access = lookupInputAccessOf(V);
1873 if (Access)
1874 return Access;
1876 ScopArrayInfo *SAI =
1877 Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value);
1878 Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(),
1879 true, {}, {}, V, MemoryKind::Value);
1880 Parent.addAccessFunction(Access);
1881 Access->buildAccessRelation(SAI);
1882 addAccess(Access);
1883 Parent.addAccessData(Access);
1884 return Access;
1887 raw_ostream &polly::operator<<(raw_ostream &OS, const ScopStmt &S) {
1888 S.print(OS, PollyPrintInstructions);
1889 return OS;
1892 //===----------------------------------------------------------------------===//
1893 /// Scop class implement
1895 void Scop::setContext(isl::set NewContext) {
1896 Context = NewContext.align_params(Context.get_space());
1899 namespace {
1901 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1902 struct SCEVSensitiveParameterRewriter
1903 : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
1904 const ValueToValueMap &VMap;
1906 public:
1907 SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap,
1908 ScalarEvolution &SE)
1909 : SCEVRewriteVisitor(SE), VMap(VMap) {}
1911 static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
1912 const ValueToValueMap &VMap) {
1913 SCEVSensitiveParameterRewriter SSPR(VMap, SE);
1914 return SSPR.visit(E);
1917 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
1918 auto *Start = visit(E->getStart());
1919 auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
1920 visit(E->getStepRecurrence(SE)),
1921 E->getLoop(), SCEV::FlagAnyWrap);
1922 return SE.getAddExpr(Start, AddRec);
1925 const SCEV *visitUnknown(const SCEVUnknown *E) {
1926 if (auto *NewValue = VMap.lookup(E->getValue()))
1927 return SE.getUnknown(NewValue);
1928 return E;
1932 /// Check whether we should remap a SCEV expression.
1933 struct SCEVFindInsideScop : public SCEVTraversal<SCEVFindInsideScop> {
1934 const ValueToValueMap &VMap;
1935 bool FoundInside = false;
1936 const Scop *S;
1938 public:
1939 SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE,
1940 const Scop *S)
1941 : SCEVTraversal(*this), VMap(VMap), S(S) {}
1943 static bool hasVariant(const SCEV *E, ScalarEvolution &SE,
1944 const ValueToValueMap &VMap, const Scop *S) {
1945 SCEVFindInsideScop SFIS(VMap, SE, S);
1946 SFIS.visitAll(E);
1947 return SFIS.FoundInside;
1950 bool follow(const SCEV *E) {
1951 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E)) {
1952 FoundInside |= S->getRegion().contains(AddRec->getLoop());
1953 } else if (auto *Unknown = dyn_cast<SCEVUnknown>(E)) {
1954 if (Instruction *I = dyn_cast<Instruction>(Unknown->getValue()))
1955 FoundInside |= S->getRegion().contains(I) && !VMap.count(I);
1957 return !FoundInside;
1960 bool isDone() { return FoundInside; }
1963 } // end anonymous namespace
1965 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const {
1966 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
1967 // doesn't like addition between an AddRec and an expression that
1968 // doesn't have a dominance relationship with it.)
1969 if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this))
1970 return E;
1972 // Rewrite SCEV.
1973 return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
1976 // This table of function names is used to translate parameter names in more
1977 // human-readable names. This makes it easier to interpret Polly analysis
1978 // results.
1979 StringMap<std::string> KnownNames = {
1980 {"_Z13get_global_idj", "global_id"},
1981 {"_Z12get_local_idj", "local_id"},
1982 {"_Z15get_global_sizej", "global_size"},
1983 {"_Z14get_local_sizej", "local_size"},
1984 {"_Z12get_work_dimv", "work_dim"},
1985 {"_Z17get_global_offsetj", "global_offset"},
1986 {"_Z12get_group_idj", "group_id"},
1987 {"_Z14get_num_groupsj", "num_groups"},
1990 static std::string getCallParamName(CallInst *Call) {
1991 std::string Result;
1992 raw_string_ostream OS(Result);
1993 std::string Name = Call->getCalledFunction()->getName();
1995 auto Iterator = KnownNames.find(Name);
1996 if (Iterator != KnownNames.end())
1997 Name = "__" + Iterator->getValue();
1998 OS << Name;
1999 for (auto &Operand : Call->arg_operands()) {
2000 ConstantInt *Op = cast<ConstantInt>(&Operand);
2001 OS << "_" << Op->getValue();
2003 OS.flush();
2004 return Result;
2007 void Scop::createParameterId(const SCEV *Parameter) {
2008 assert(Parameters.count(Parameter));
2009 assert(!ParameterIds.count(Parameter));
2011 std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
2013 if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
2014 Value *Val = ValueParameter->getValue();
2015 CallInst *Call = dyn_cast<CallInst>(Val);
2017 if (Call && isConstCall(Call)) {
2018 ParameterName = getCallParamName(Call);
2019 } else if (UseInstructionNames) {
2020 // If this parameter references a specific Value and this value has a name
2021 // we use this name as it is likely to be unique and more useful than just
2022 // a number.
2023 if (Val->hasName())
2024 ParameterName = Val->getName();
2025 else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) {
2026 auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
2027 if (LoadOrigin->hasName()) {
2028 ParameterName += "_loaded_from_";
2029 ParameterName +=
2030 LI->getPointerOperand()->stripInBoundsOffsets()->getName();
2035 ParameterName = getIslCompatibleName("", ParameterName, "");
2038 isl::id Id = isl::id::alloc(getIslCtx(), ParameterName,
2039 const_cast<void *>((const void *)Parameter));
2040 ParameterIds[Parameter] = Id;
2043 void Scop::addParams(const ParameterSetTy &NewParameters) {
2044 for (const SCEV *Parameter : NewParameters) {
2045 // Normalize the SCEV to get the representing element for an invariant load.
2046 Parameter = extractConstantFactor(Parameter, *SE).second;
2047 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2049 if (Parameters.insert(Parameter))
2050 createParameterId(Parameter);
2054 isl::id Scop::getIdForParam(const SCEV *Parameter) const {
2055 // Normalize the SCEV to get the representing element for an invariant load.
2056 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2057 return ParameterIds.lookup(Parameter);
2060 isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
2061 isl_set *DomainContext = isl_union_set_params(getDomains().release());
2062 return isl::manage(isl_set_intersect_params(C.release(), DomainContext));
2065 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
2066 return DT.dominates(BB, getEntry());
2069 void Scop::addUserAssumptions(
2070 AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
2071 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2072 for (auto &Assumption : AC.assumptions()) {
2073 auto *CI = dyn_cast_or_null<CallInst>(Assumption);
2074 if (!CI || CI->getNumArgOperands() != 1)
2075 continue;
2077 bool InScop = contains(CI);
2078 if (!InScop && !isDominatedBy(DT, CI->getParent()))
2079 continue;
2081 auto *L = LI.getLoopFor(CI->getParent());
2082 auto *Val = CI->getArgOperand(0);
2083 ParameterSetTy DetectedParams;
2084 if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) {
2085 ORE.emit(
2086 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
2087 << "Non-affine user assumption ignored.");
2088 continue;
2091 // Collect all newly introduced parameters.
2092 ParameterSetTy NewParams;
2093 for (auto *Param : DetectedParams) {
2094 Param = extractConstantFactor(Param, *SE).second;
2095 Param = getRepresentingInvariantLoadSCEV(Param);
2096 if (Parameters.count(Param))
2097 continue;
2098 NewParams.insert(Param);
2101 SmallVector<isl_set *, 2> ConditionSets;
2102 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
2103 BasicBlock *BB = InScop ? CI->getParent() : getRegion().getEntry();
2104 auto *Dom = InScop ? DomainMap[BB].copy() : Context.copy();
2105 assert(Dom && "Cannot propagate a nullptr.");
2106 bool Valid = buildConditionSets(*this, BB, Val, TI, L, Dom,
2107 InvalidDomainMap, ConditionSets);
2108 isl_set_free(Dom);
2110 if (!Valid)
2111 continue;
2113 isl_set *AssumptionCtx = nullptr;
2114 if (InScop) {
2115 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
2116 isl_set_free(ConditionSets[0]);
2117 } else {
2118 AssumptionCtx = isl_set_complement(ConditionSets[1]);
2119 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
2122 // Project out newly introduced parameters as they are not otherwise useful.
2123 if (!NewParams.empty()) {
2124 for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
2125 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2126 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2127 isl_id_free(Id);
2129 if (!NewParams.count(Param))
2130 continue;
2132 AssumptionCtx =
2133 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2136 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
2137 << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
2138 Context = Context.intersect(isl::manage(AssumptionCtx));
2142 void Scop::addUserContext() {
2143 if (UserContextStr.empty())
2144 return;
2146 isl_set *UserContext =
2147 isl_set_read_from_str(getIslCtx().get(), UserContextStr.c_str());
2148 isl_space *Space = getParamSpace().release();
2149 if (isl_space_dim(Space, isl_dim_param) !=
2150 isl_set_dim(UserContext, isl_dim_param)) {
2151 auto SpaceStr = isl_space_to_str(Space);
2152 errs() << "Error: the context provided in -polly-context has not the same "
2153 << "number of dimensions than the computed context. Due to this "
2154 << "mismatch, the -polly-context option is ignored. Please provide "
2155 << "the context in the parameter space: " << SpaceStr << ".\n";
2156 free(SpaceStr);
2157 isl_set_free(UserContext);
2158 isl_space_free(Space);
2159 return;
2162 for (unsigned i = 0; i < isl_space_dim(Space, isl_dim_param); i++) {
2163 std::string NameContext = Context.get_dim_name(isl::dim::param, i);
2164 std::string NameUserContext =
2165 isl_set_get_dim_name(UserContext, isl_dim_param, i);
2167 if (NameContext != NameUserContext) {
2168 auto SpaceStr = isl_space_to_str(Space);
2169 errs() << "Error: the name of dimension " << i
2170 << " provided in -polly-context "
2171 << "is '" << NameUserContext << "', but the name in the computed "
2172 << "context is '" << NameContext
2173 << "'. Due to this name mismatch, "
2174 << "the -polly-context option is ignored. Please provide "
2175 << "the context in the parameter space: " << SpaceStr << ".\n";
2176 free(SpaceStr);
2177 isl_set_free(UserContext);
2178 isl_space_free(Space);
2179 return;
2182 UserContext =
2183 isl_set_set_dim_id(UserContext, isl_dim_param, i,
2184 isl_space_get_dim_id(Space, isl_dim_param, i));
2187 Context = Context.intersect(isl::manage(UserContext));
2188 isl_space_free(Space);
2191 void Scop::buildInvariantEquivalenceClasses() {
2192 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2194 const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2195 for (LoadInst *LInst : RIL) {
2196 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2198 Type *Ty = LInst->getType();
2199 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2200 if (ClassRep) {
2201 InvEquivClassVMap[LInst] = ClassRep;
2202 continue;
2205 ClassRep = LInst;
2206 InvariantEquivClasses.emplace_back(
2207 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2211 void Scop::buildContext() {
2212 isl::space Space = isl::space::params_alloc(getIslCtx(), 0);
2213 Context = isl::set::universe(Space);
2214 InvalidContext = isl::set::empty(Space);
2215 AssumedContext = isl::set::universe(Space);
2218 void Scop::addParameterBounds() {
2219 unsigned PDim = 0;
2220 for (auto *Parameter : Parameters) {
2221 ConstantRange SRange = SE->getSignedRange(Parameter);
2222 Context = addRangeBoundsToSet(Context, SRange, PDim++, isl::dim::param);
2226 static std::vector<isl::id> getFortranArrayIds(Scop::array_range Arrays) {
2227 std::vector<isl::id> OutermostSizeIds;
2228 for (auto Array : Arrays) {
2229 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2230 // for its outermost dimension. Fortran arrays will have this since the
2231 // outermost dimension size can be picked up from their runtime description.
2232 // TODO: actually need to check if it has a FAD, but for now this works.
2233 if (Array->getNumberOfDimensions() > 0) {
2234 isl::pw_aff PwAff = Array->getDimensionSizePw(0);
2235 if (!PwAff)
2236 continue;
2238 isl::id Id =
2239 isl::manage(isl_pw_aff_get_dim_id(PwAff.get(), isl_dim_param, 0));
2240 assert(!Id.is_null() &&
2241 "Invalid Id for PwAff expression in Fortran array");
2242 Id.dump();
2243 OutermostSizeIds.push_back(Id);
2246 return OutermostSizeIds;
2249 // The FORTRAN array size parameters are known to be non-negative.
2250 static isl::set boundFortranArrayParams(isl::set Context,
2251 Scop::array_range Arrays) {
2252 std::vector<isl::id> OutermostSizeIds;
2253 OutermostSizeIds = getFortranArrayIds(Arrays);
2255 for (isl::id Id : OutermostSizeIds) {
2256 int dim = Context.find_dim_by_id(isl::dim::param, Id);
2257 Context = Context.lower_bound_si(isl::dim::param, dim, 0);
2260 return Context;
2263 void Scop::realignParams() {
2264 if (PollyIgnoreParamBounds)
2265 return;
2267 // Add all parameters into a common model.
2268 isl::space Space = getFullParamSpace();
2270 // Align the parameters of all data structures to the model.
2271 Context = Context.align_params(Space);
2273 // Bound the size of the fortran array dimensions.
2274 Context = boundFortranArrayParams(Context, arrays());
2276 // As all parameters are known add bounds to them.
2277 addParameterBounds();
2279 for (ScopStmt &Stmt : *this)
2280 Stmt.realignParams();
2281 // Simplify the schedule according to the context too.
2282 Schedule = Schedule.gist_domain_params(getContext());
2285 static isl::set simplifyAssumptionContext(isl::set AssumptionContext,
2286 const Scop &S) {
2287 // If we have modeled all blocks in the SCoP that have side effects we can
2288 // simplify the context with the constraints that are needed for anything to
2289 // be executed at all. However, if we have error blocks in the SCoP we already
2290 // assumed some parameter combinations cannot occur and removed them from the
2291 // domains, thus we cannot use the remaining domain to simplify the
2292 // assumptions.
2293 if (!S.hasErrorBlock()) {
2294 auto DomainParameters = S.getDomains().params();
2295 AssumptionContext = AssumptionContext.gist_params(DomainParameters);
2298 AssumptionContext = AssumptionContext.gist_params(S.getContext());
2299 return AssumptionContext;
2302 void Scop::simplifyContexts() {
2303 // The parameter constraints of the iteration domains give us a set of
2304 // constraints that need to hold for all cases where at least a single
2305 // statement iteration is executed in the whole scop. We now simplify the
2306 // assumed context under the assumption that such constraints hold and at
2307 // least a single statement iteration is executed. For cases where no
2308 // statement instances are executed, the assumptions we have taken about
2309 // the executed code do not matter and can be changed.
2311 // WARNING: This only holds if the assumptions we have taken do not reduce
2312 // the set of statement instances that are executed. Otherwise we
2313 // may run into a case where the iteration domains suggest that
2314 // for a certain set of parameter constraints no code is executed,
2315 // but in the original program some computation would have been
2316 // performed. In such a case, modifying the run-time conditions and
2317 // possibly influencing the run-time check may cause certain scops
2318 // to not be executed.
2320 // Example:
2322 // When delinearizing the following code:
2324 // for (long i = 0; i < 100; i++)
2325 // for (long j = 0; j < m; j++)
2326 // A[i+p][j] = 1.0;
2328 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2329 // otherwise we would access out of bound data. Now, knowing that code is
2330 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2331 AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2332 InvalidContext = InvalidContext.align_params(getParamSpace());
2335 /// Add the minimal/maximal access in @p Set to @p User.
2336 static isl::stat
2337 buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2338 isl::pw_multi_aff MinPMA, MaxPMA;
2339 isl::pw_aff LastDimAff;
2340 isl::aff OneAff;
2341 unsigned Pos;
2342 isl::ctx Ctx = Set.get_ctx();
2344 Set = Set.remove_divs();
2346 if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain)
2347 return isl::stat::error;
2349 // Restrict the number of parameters involved in the access as the lexmin/
2350 // lexmax computation will take too long if this number is high.
2352 // Experiments with a simple test case using an i7 4800MQ:
2354 // #Parameters involved | Time (in sec)
2355 // 6 | 0.01
2356 // 7 | 0.04
2357 // 8 | 0.12
2358 // 9 | 0.40
2359 // 10 | 1.54
2360 // 11 | 6.78
2361 // 12 | 30.38
2363 if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2364 unsigned InvolvedParams = 0;
2365 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
2366 if (Set.involves_dims(isl::dim::param, u, 1))
2367 InvolvedParams++;
2369 if (InvolvedParams > RunTimeChecksMaxParameters)
2370 return isl::stat::error;
2373 if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts)
2374 return isl::stat::error;
2376 MinPMA = Set.lexmin_pw_multi_aff();
2377 MaxPMA = Set.lexmax_pw_multi_aff();
2379 if (isl_ctx_last_error(Ctx.get()) == isl_error_quota)
2380 return isl::stat::error;
2382 MinPMA = MinPMA.coalesce();
2383 MaxPMA = MaxPMA.coalesce();
2385 // Adjust the last dimension of the maximal access by one as we want to
2386 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2387 // we test during code generation might now point after the end of the
2388 // allocated array but we will never dereference it anyway.
2389 assert(MaxPMA.dim(isl::dim::out) && "Assumed at least one output dimension");
2390 Pos = MaxPMA.dim(isl::dim::out) - 1;
2391 LastDimAff = MaxPMA.get_pw_aff(Pos);
2392 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2393 OneAff = OneAff.add_constant_si(1);
2394 LastDimAff = LastDimAff.add(OneAff);
2395 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2397 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
2399 return isl::stat::ok;
2402 static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) {
2403 isl_set *Domain = MA->getStatement()->getDomain().release();
2404 Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain));
2405 return isl_set_reset_tuple_id(Domain);
2408 /// Wrapper function to calculate minimal/maximal accesses to each array.
2409 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2410 Scop::MinMaxVectorTy &MinMaxAccesses) {
2411 MinMaxAccesses.reserve(AliasGroup.size());
2413 isl::union_set Domains = S.getDomains();
2414 isl::union_map Accesses = isl::union_map::empty(S.getParamSpace());
2416 for (MemoryAccess *MA : AliasGroup)
2417 Accesses = Accesses.add_map(give(MA->getAccessRelation().release()));
2419 Accesses = Accesses.intersect_domain(Domains);
2420 isl::union_set Locations = Accesses.range();
2421 Locations = Locations.coalesce();
2422 Locations = Locations.detect_equalities();
2424 auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat {
2425 return buildMinMaxAccess(Set, MinMaxAccesses, S);
2427 return Locations.foreach_set(Lambda) == isl::stat::ok;
2430 /// Helper to treat non-affine regions and basic blocks the same.
2432 ///{
2434 /// Return the block that is the representing block for @p RN.
2435 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2436 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2437 : RN->getNodeAs<BasicBlock>();
2440 /// Return the @p idx'th block that is executed after @p RN.
2441 static inline BasicBlock *
2442 getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) {
2443 if (RN->isSubRegion()) {
2444 assert(idx == 0);
2445 return RN->getNodeAs<Region>()->getExit();
2447 return TI->getSuccessor(idx);
2450 /// Return the smallest loop surrounding @p RN.
2451 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2452 if (!RN->isSubRegion()) {
2453 BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2454 Loop *L = LI.getLoopFor(BB);
2456 // Unreachable statements are not considered to belong to a LLVM loop, as
2457 // they are not part of an actual loop in the control flow graph.
2458 // Nevertheless, we handle certain unreachable statements that are common
2459 // when modeling run-time bounds checks as being part of the loop to be
2460 // able to model them and to later eliminate the run-time bounds checks.
2462 // Specifically, for basic blocks that terminate in an unreachable and
2463 // where the immediate predecessor is part of a loop, we assume these
2464 // basic blocks belong to the loop the predecessor belongs to. This
2465 // allows us to model the following code.
2467 // for (i = 0; i < N; i++) {
2468 // if (i > 1024)
2469 // abort(); <- this abort might be translated to an
2470 // unreachable
2472 // A[i] = ...
2473 // }
2474 if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
2475 L = LI.getLoopFor(BB->getPrevNode());
2476 return L;
2479 Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2480 Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2481 while (L && NonAffineSubRegion->contains(L))
2482 L = L->getParentLoop();
2483 return L;
2486 /// Get the number of blocks in @p L.
2488 /// The number of blocks in a loop are the number of basic blocks actually
2489 /// belonging to the loop, as well as all single basic blocks that the loop
2490 /// exits to and which terminate in an unreachable instruction. We do not
2491 /// allow such basic blocks in the exit of a scop, hence they belong to the
2492 /// scop and represent run-time conditions which we want to model and
2493 /// subsequently speculate away.
2495 /// @see getRegionNodeLoop for additional details.
2496 unsigned getNumBlocksInLoop(Loop *L) {
2497 unsigned NumBlocks = L->getNumBlocks();
2498 SmallVector<BasicBlock *, 4> ExitBlocks;
2499 L->getExitBlocks(ExitBlocks);
2501 for (auto ExitBlock : ExitBlocks) {
2502 if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2503 NumBlocks++;
2505 return NumBlocks;
2508 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2509 if (!RN->isSubRegion())
2510 return 1;
2512 Region *R = RN->getNodeAs<Region>();
2513 return std::distance(R->block_begin(), R->block_end());
2516 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2517 const DominatorTree &DT) {
2518 if (!RN->isSubRegion())
2519 return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2520 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2521 if (isErrorBlock(*BB, R, LI, DT))
2522 return true;
2523 return false;
2526 ///}
2528 static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain,
2529 unsigned Dim, Loop *L) {
2530 Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1);
2531 isl_id *DimId =
2532 isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L));
2533 return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId);
2536 isl::set Scop::getDomainConditions(const ScopStmt *Stmt) const {
2537 return getDomainConditions(Stmt->getEntryBlock());
2540 isl::set Scop::getDomainConditions(BasicBlock *BB) const {
2541 auto DIt = DomainMap.find(BB);
2542 if (DIt != DomainMap.end())
2543 return DIt->getSecond();
2545 auto &RI = *R.getRegionInfo();
2546 auto *BBR = RI.getRegionFor(BB);
2547 while (BBR->getEntry() == BB)
2548 BBR = BBR->getParent();
2549 return getDomainConditions(BBR->getEntry());
2552 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI,
2553 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2554 bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2555 auto *EntryBB = R->getEntry();
2556 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
2557 int LD = getRelativeLoopDepth(L);
2558 auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD + 1));
2560 while (LD-- >= 0) {
2561 S = addDomainDimId(S, LD + 1, L);
2562 L = L->getParentLoop();
2565 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
2566 DomainMap[EntryBB] = isl::manage(S);
2568 if (IsOnlyNonAffineRegion)
2569 return !containsErrorBlock(R->getNode(), *R, LI, DT);
2571 if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap))
2572 return false;
2574 if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap))
2575 return false;
2577 // Error blocks and blocks dominated by them have been assumed to never be
2578 // executed. Representing them in the Scop does not add any value. In fact,
2579 // it is likely to cause issues during construction of the ScopStmts. The
2580 // contents of error blocks have not been verified to be expressible and
2581 // will cause problems when building up a ScopStmt for them.
2582 // Furthermore, basic blocks dominated by error blocks may reference
2583 // instructions in the error block which, if the error block is not modeled,
2584 // can themselves not be constructed properly. To this end we will replace
2585 // the domains of error blocks and those only reachable via error blocks
2586 // with an empty set. Additionally, we will record for each block under which
2587 // parameter combination it would be reached via an error block in its
2588 // InvalidDomain. This information is needed during load hoisting.
2589 if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap))
2590 return false;
2592 return true;
2595 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2596 /// to be compatible to domains constructed for loop @p NewL.
2598 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2599 /// edge from @p OldL to @p NewL.
2600 static __isl_give isl_set *adjustDomainDimensions(Scop &S,
2601 __isl_take isl_set *Dom,
2602 Loop *OldL, Loop *NewL) {
2603 // If the loops are the same there is nothing to do.
2604 if (NewL == OldL)
2605 return Dom;
2607 int OldDepth = S.getRelativeLoopDepth(OldL);
2608 int NewDepth = S.getRelativeLoopDepth(NewL);
2609 // If both loops are non-affine loops there is nothing to do.
2610 if (OldDepth == -1 && NewDepth == -1)
2611 return Dom;
2613 // Distinguish three cases:
2614 // 1) The depth is the same but the loops are not.
2615 // => One loop was left one was entered.
2616 // 2) The depth increased from OldL to NewL.
2617 // => One loop was entered, none was left.
2618 // 3) The depth decreased from OldL to NewL.
2619 // => Loops were left were difference of the depths defines how many.
2620 if (OldDepth == NewDepth) {
2621 assert(OldL->getParentLoop() == NewL->getParentLoop());
2622 Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1);
2623 Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2624 Dom = addDomainDimId(Dom, NewDepth, NewL);
2625 } else if (OldDepth < NewDepth) {
2626 assert(OldDepth + 1 == NewDepth);
2627 auto &R = S.getRegion();
2628 (void)R;
2629 assert(NewL->getParentLoop() == OldL ||
2630 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2631 Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2632 Dom = addDomainDimId(Dom, NewDepth, NewL);
2633 } else {
2634 assert(OldDepth > NewDepth);
2635 int Diff = OldDepth - NewDepth;
2636 int NumDim = isl_set_n_dim(Dom);
2637 assert(NumDim >= Diff);
2638 Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff);
2641 return Dom;
2644 bool Scop::propagateInvalidStmtDomains(
2645 Region *R, DominatorTree &DT, LoopInfo &LI,
2646 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2647 ReversePostOrderTraversal<Region *> RTraversal(R);
2648 for (auto *RN : RTraversal) {
2650 // Recurse for affine subregions but go on for basic blocks and non-affine
2651 // subregions.
2652 if (RN->isSubRegion()) {
2653 Region *SubRegion = RN->getNodeAs<Region>();
2654 if (!isNonAffineSubRegion(SubRegion)) {
2655 propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap);
2656 continue;
2660 bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2661 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2662 isl::set &Domain = DomainMap[BB];
2663 assert(Domain && "Cannot propagate a nullptr");
2665 isl::set InvalidDomain = InvalidDomainMap[BB];
2667 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
2669 if (!IsInvalidBlock) {
2670 InvalidDomain = InvalidDomain.intersect(Domain);
2671 } else {
2672 InvalidDomain = Domain;
2673 isl::set DomPar = Domain.params();
2674 recordAssumption(ERRORBLOCK, DomPar, BB->getTerminator()->getDebugLoc(),
2675 AS_RESTRICTION);
2676 Domain = nullptr;
2679 if (InvalidDomain.is_empty()) {
2680 InvalidDomainMap[BB] = InvalidDomain;
2681 continue;
2684 auto *BBLoop = getRegionNodeLoop(RN, LI);
2685 auto *TI = BB->getTerminator();
2686 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
2687 for (unsigned u = 0; u < NumSuccs; u++) {
2688 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2690 // Skip successors outside the SCoP.
2691 if (!contains(SuccBB))
2692 continue;
2694 // Skip backedges.
2695 if (DT.dominates(SuccBB, BB))
2696 continue;
2698 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2700 auto *AdjustedInvalidDomain = adjustDomainDimensions(
2701 *this, InvalidDomain.copy(), BBLoop, SuccBBLoop);
2703 auto *SuccInvalidDomain = InvalidDomainMap[SuccBB].copy();
2704 SuccInvalidDomain =
2705 isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain);
2706 SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain);
2707 unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain);
2709 InvalidDomainMap[SuccBB] = isl::manage(SuccInvalidDomain);
2711 // Check if the maximal number of domain disjunctions was reached.
2712 // In case this happens we will bail.
2713 if (NumConjucts < MaxDisjunctsInDomain)
2714 continue;
2716 InvalidDomainMap.erase(BB);
2717 invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
2718 return false;
2721 InvalidDomainMap[BB] = InvalidDomain;
2724 return true;
2727 void Scop::propagateDomainConstraintsToRegionExit(
2728 BasicBlock *BB, Loop *BBLoop,
2729 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI,
2730 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2731 // Check if the block @p BB is the entry of a region. If so we propagate it's
2732 // domain to the exit block of the region. Otherwise we are done.
2733 auto *RI = R.getRegionInfo();
2734 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
2735 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
2736 if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB))
2737 return;
2739 // Do not propagate the domain if there is a loop backedge inside the region
2740 // that would prevent the exit block from being executed.
2741 auto *L = BBLoop;
2742 while (L && contains(L)) {
2743 SmallVector<BasicBlock *, 4> LatchBBs;
2744 BBLoop->getLoopLatches(LatchBBs);
2745 for (auto *LatchBB : LatchBBs)
2746 if (BB != LatchBB && BBReg->contains(LatchBB))
2747 return;
2748 L = L->getParentLoop();
2751 isl::set Domain = DomainMap[BB];
2752 assert(Domain && "Cannot propagate a nullptr");
2754 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops());
2756 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2757 // adjust the domain before we can propagate it.
2758 isl::set AdjustedDomain = isl::manage(
2759 adjustDomainDimensions(*this, Domain.copy(), BBLoop, ExitBBLoop));
2760 isl::set &ExitDomain = DomainMap[ExitBB];
2762 // If the exit domain is not yet created we set it otherwise we "add" the
2763 // current domain.
2764 ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
2766 // Initialize the invalid domain.
2767 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
2769 FinishedExitBlocks.insert(ExitBB);
2772 bool Scop::buildDomainsWithBranchConstraints(
2773 Region *R, DominatorTree &DT, LoopInfo &LI,
2774 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2775 // To create the domain for each block in R we iterate over all blocks and
2776 // subregions in R and propagate the conditions under which the current region
2777 // element is executed. To this end we iterate in reverse post order over R as
2778 // it ensures that we first visit all predecessors of a region node (either a
2779 // basic block or a subregion) before we visit the region node itself.
2780 // Initially, only the domain for the SCoP region entry block is set and from
2781 // there we propagate the current domain to all successors, however we add the
2782 // condition that the successor is actually executed next.
2783 // As we are only interested in non-loop carried constraints here we can
2784 // simply skip loop back edges.
2786 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2787 ReversePostOrderTraversal<Region *> RTraversal(R);
2788 for (auto *RN : RTraversal) {
2789 // Recurse for affine subregions but go on for basic blocks and non-affine
2790 // subregions.
2791 if (RN->isSubRegion()) {
2792 Region *SubRegion = RN->getNodeAs<Region>();
2793 if (!isNonAffineSubRegion(SubRegion)) {
2794 if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI,
2795 InvalidDomainMap))
2796 return false;
2797 continue;
2801 if (containsErrorBlock(RN, getRegion(), LI, DT))
2802 HasErrorBlock = true;
2804 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2805 TerminatorInst *TI = BB->getTerminator();
2807 if (isa<UnreachableInst>(TI))
2808 continue;
2810 isl::set Domain = DomainMap.lookup(BB);
2811 if (!Domain)
2812 continue;
2813 MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get()));
2815 auto *BBLoop = getRegionNodeLoop(RN, LI);
2816 // Propagate the domain from BB directly to blocks that have a superset
2817 // domain, at the moment only region exit nodes of regions that start in BB.
2818 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI,
2819 InvalidDomainMap);
2821 // If all successors of BB have been set a domain through the propagation
2822 // above we do not need to build condition sets but can just skip this
2823 // block. However, it is important to note that this is a local property
2824 // with regards to the region @p R. To this end FinishedExitBlocks is a
2825 // local variable.
2826 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
2827 return FinishedExitBlocks.count(SuccBB);
2829 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
2830 continue;
2832 // Build the condition sets for the successor nodes of the current region
2833 // node. If it is a non-affine subregion we will always execute the single
2834 // exit node, hence the single entry node domain is the condition set. For
2835 // basic blocks we use the helper function buildConditionSets.
2836 SmallVector<isl_set *, 8> ConditionSets;
2837 if (RN->isSubRegion())
2838 ConditionSets.push_back(Domain.copy());
2839 else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(),
2840 InvalidDomainMap, ConditionSets))
2841 return false;
2843 // Now iterate over the successors and set their initial domain based on
2844 // their condition set. We skip back edges here and have to be careful when
2845 // we leave a loop not to keep constraints over a dimension that doesn't
2846 // exist anymore.
2847 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
2848 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
2849 isl::set CondSet = isl::manage(ConditionSets[u]);
2850 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2852 // Skip blocks outside the region.
2853 if (!contains(SuccBB))
2854 continue;
2856 // If we propagate the domain of some block to "SuccBB" we do not have to
2857 // adjust the domain.
2858 if (FinishedExitBlocks.count(SuccBB))
2859 continue;
2861 // Skip back edges.
2862 if (DT.dominates(SuccBB, BB))
2863 continue;
2865 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2867 CondSet = isl::manage(
2868 adjustDomainDimensions(*this, CondSet.copy(), BBLoop, SuccBBLoop));
2870 // Set the domain for the successor or merge it with an existing domain in
2871 // case there are multiple paths (without loop back edges) to the
2872 // successor block.
2873 isl::set &SuccDomain = DomainMap[SuccBB];
2875 if (SuccDomain) {
2876 SuccDomain = SuccDomain.unite(CondSet).coalesce();
2877 } else {
2878 // Initialize the invalid domain.
2879 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
2880 SuccDomain = CondSet;
2883 SuccDomain = SuccDomain.detect_equalities();
2885 // Check if the maximal number of domain disjunctions was reached.
2886 // In case this happens we will clean up and bail.
2887 if (isl_set_n_basic_set(SuccDomain.get()) < MaxDisjunctsInDomain)
2888 continue;
2890 invalidate(COMPLEXITY, DebugLoc());
2891 while (++u < ConditionSets.size())
2892 isl_set_free(ConditionSets[u]);
2893 return false;
2897 return true;
2900 isl::set Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain,
2901 DominatorTree &DT,
2902 LoopInfo &LI) {
2903 // If @p BB is the ScopEntry we are done
2904 if (R.getEntry() == BB)
2905 return isl::set::universe(Domain.get_space());
2907 // The region info of this function.
2908 auto &RI = *R.getRegionInfo();
2910 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops());
2912 // A domain to collect all predecessor domains, thus all conditions under
2913 // which the block is executed. To this end we start with the empty domain.
2914 isl::set PredDom = isl::set::empty(Domain.get_space());
2916 // Set of regions of which the entry block domain has been propagated to BB.
2917 // all predecessors inside any of the regions can be skipped.
2918 SmallSet<Region *, 8> PropagatedRegions;
2920 for (auto *PredBB : predecessors(BB)) {
2921 // Skip backedges.
2922 if (DT.dominates(BB, PredBB))
2923 continue;
2925 // If the predecessor is in a region we used for propagation we can skip it.
2926 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
2927 if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
2928 PredBBInRegion)) {
2929 continue;
2932 // Check if there is a valid region we can use for propagation, thus look
2933 // for a region that contains the predecessor and has @p BB as exit block.
2934 auto *PredR = RI.getRegionFor(PredBB);
2935 while (PredR->getExit() != BB && !PredR->contains(BB))
2936 PredR->getParent();
2938 // If a valid region for propagation was found use the entry of that region
2939 // for propagation, otherwise the PredBB directly.
2940 if (PredR->getExit() == BB) {
2941 PredBB = PredR->getEntry();
2942 PropagatedRegions.insert(PredR);
2945 auto *PredBBDom = getDomainConditions(PredBB).release();
2946 Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops());
2948 PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
2950 PredDom = PredDom.unite(isl::manage(PredBBDom));
2953 return PredDom;
2956 bool Scop::propagateDomainConstraints(
2957 Region *R, DominatorTree &DT, LoopInfo &LI,
2958 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2959 // Iterate over the region R and propagate the domain constrains from the
2960 // predecessors to the current node. In contrast to the
2961 // buildDomainsWithBranchConstraints function, this one will pull the domain
2962 // information from the predecessors instead of pushing it to the successors.
2963 // Additionally, we assume the domains to be already present in the domain
2964 // map here. However, we iterate again in reverse post order so we know all
2965 // predecessors have been visited before a block or non-affine subregion is
2966 // visited.
2968 ReversePostOrderTraversal<Region *> RTraversal(R);
2969 for (auto *RN : RTraversal) {
2970 // Recurse for affine subregions but go on for basic blocks and non-affine
2971 // subregions.
2972 if (RN->isSubRegion()) {
2973 Region *SubRegion = RN->getNodeAs<Region>();
2974 if (!isNonAffineSubRegion(SubRegion)) {
2975 if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap))
2976 return false;
2977 continue;
2981 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2982 isl::set &Domain = DomainMap[BB];
2983 assert(Domain);
2985 // Under the union of all predecessor conditions we can reach this block.
2986 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI);
2987 Domain = Domain.intersect(PredDom).coalesce();
2988 Domain = Domain.align_params(getParamSpace());
2990 Loop *BBLoop = getRegionNodeLoop(RN, LI);
2991 if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop))
2992 if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap))
2993 return false;
2996 return true;
2999 /// Create a map to map from a given iteration to a subsequent iteration.
3001 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3002 /// is incremented by one and all other dimensions are equal, e.g.,
3003 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3005 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3006 static __isl_give isl_map *
3007 createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) {
3008 auto *MapSpace = isl_space_map_from_set(SetSpace);
3009 auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace));
3010 for (unsigned u = 0; u < isl_map_dim(NextIterationMap, isl_dim_in); u++)
3011 if (u != Dim)
3012 NextIterationMap =
3013 isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u);
3014 auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace));
3015 C = isl_constraint_set_constant_si(C, 1);
3016 C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1);
3017 C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1);
3018 NextIterationMap = isl_map_add_constraint(NextIterationMap, C);
3019 return NextIterationMap;
3022 bool Scop::addLoopBoundsToHeaderDomain(
3023 Loop *L, LoopInfo &LI, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3024 int LoopDepth = getRelativeLoopDepth(L);
3025 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
3027 BasicBlock *HeaderBB = L->getHeader();
3028 assert(DomainMap.count(HeaderBB));
3029 isl::set &HeaderBBDom = DomainMap[HeaderBB];
3031 isl::map NextIterationMap = isl::manage(
3032 createNextIterationMap(HeaderBBDom.get_space().release(), LoopDepth));
3034 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
3036 SmallVector<BasicBlock *, 4> LatchBlocks;
3037 L->getLoopLatches(LatchBlocks);
3039 for (BasicBlock *LatchBB : LatchBlocks) {
3040 // If the latch is only reachable via error statements we skip it.
3041 isl::set LatchBBDom = DomainMap.lookup(LatchBB);
3042 if (!LatchBBDom)
3043 continue;
3045 isl::set BackedgeCondition = nullptr;
3047 TerminatorInst *TI = LatchBB->getTerminator();
3048 BranchInst *BI = dyn_cast<BranchInst>(TI);
3049 assert(BI && "Only branch instructions allowed in loop latches");
3051 if (BI->isUnconditional())
3052 BackedgeCondition = LatchBBDom;
3053 else {
3054 SmallVector<isl_set *, 8> ConditionSets;
3055 int idx = BI->getSuccessor(0) != HeaderBB;
3056 if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(),
3057 InvalidDomainMap, ConditionSets))
3058 return false;
3060 // Free the non back edge condition set as we do not need it.
3061 isl_set_free(ConditionSets[1 - idx]);
3063 BackedgeCondition = isl::manage(ConditionSets[idx]);
3066 int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
3067 assert(LatchLoopDepth >= LoopDepth);
3068 BackedgeCondition = BackedgeCondition.project_out(
3069 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
3070 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
3073 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
3074 for (int i = 0; i < LoopDepth; i++)
3075 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
3077 isl::set UnionBackedgeConditionComplement =
3078 UnionBackedgeCondition.complement();
3079 UnionBackedgeConditionComplement =
3080 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
3082 UnionBackedgeConditionComplement =
3083 UnionBackedgeConditionComplement.apply(ForwardMap);
3084 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
3085 HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
3087 auto Parts = partitionSetParts(HeaderBBDom.copy(), LoopDepth);
3088 HeaderBBDom = isl::manage(Parts.second);
3090 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3091 // the bounded assumptions to the context as they are already implied by the
3092 // <nsw> tag.
3093 if (Affinator.hasNSWAddRecForLoop(L)) {
3094 isl_set_free(Parts.first);
3095 return true;
3098 isl::set UnboundedCtx = isl::manage(Parts.first).params();
3099 recordAssumption(INFINITELOOP, UnboundedCtx,
3100 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
3101 return true;
3104 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
3105 Value *PointerBase = MA->getOriginalBaseAddr();
3107 auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
3108 if (!PointerBaseInst)
3109 return nullptr;
3111 auto *BasePtrStmt = getStmtFor(PointerBaseInst);
3112 if (!BasePtrStmt)
3113 return nullptr;
3115 return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
3118 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
3119 isl::union_map Writes) {
3120 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) {
3121 return getNonHoistableCtx(BasePtrMA, Writes).is_null();
3124 Value *BaseAddr = MA->getOriginalBaseAddr();
3125 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
3126 if (!isa<LoadInst>(BasePtrInst))
3127 return contains(BasePtrInst);
3129 return false;
3132 bool Scop::buildAliasChecks(AliasAnalysis &AA) {
3133 if (!PollyUseRuntimeAliasChecks)
3134 return true;
3136 if (buildAliasGroups(AA)) {
3137 // Aliasing assumptions do not go through addAssumption but we still want to
3138 // collect statistics so we do it here explicitly.
3139 if (MinMaxAliasGroups.size())
3140 AssumptionsAliasing++;
3141 return true;
3144 // If a problem occurs while building the alias groups we need to delete
3145 // this SCoP and pretend it wasn't valid in the first place. To this end
3146 // we make the assumed context infeasible.
3147 invalidate(ALIASING, DebugLoc());
3149 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3150 << " could not be created as the number of parameters involved "
3151 "is too high. The SCoP will be "
3152 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3153 "the maximal number of parameters but be advised that the "
3154 "compile time might increase exponentially.\n\n");
3155 return false;
3158 std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3159 Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3160 AliasSetTracker AST(AA);
3162 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3163 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3164 for (ScopStmt &Stmt : *this) {
3166 isl_set *StmtDomain = Stmt.getDomain().release();
3167 bool StmtDomainEmpty = isl_set_is_empty(StmtDomain);
3168 isl_set_free(StmtDomain);
3170 // Statements with an empty domain will never be executed.
3171 if (StmtDomainEmpty)
3172 continue;
3174 for (MemoryAccess *MA : Stmt) {
3175 if (MA->isScalarKind())
3176 continue;
3177 if (!MA->isRead())
3178 HasWriteAccess.insert(MA->getScopArrayInfo());
3179 MemAccInst Acc(MA->getAccessInstruction());
3180 if (MA->isRead() && isa<MemTransferInst>(Acc))
3181 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3182 else
3183 PtrToAcc[Acc.getPointerOperand()] = MA;
3184 AST.add(Acc);
3188 AliasGroupVectorTy AliasGroups;
3189 for (AliasSet &AS : AST) {
3190 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3191 continue;
3192 AliasGroupTy AG;
3193 for (auto &PR : AS)
3194 AG.push_back(PtrToAcc[PR.getValue()]);
3195 if (AG.size() < 2)
3196 continue;
3197 AliasGroups.push_back(std::move(AG));
3200 return std::make_tuple(AliasGroups, HasWriteAccess);
3203 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3204 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3205 AliasGroupTy NewAG;
3206 AliasGroupTy &AG = AliasGroups[u];
3207 AliasGroupTy::iterator AGI = AG.begin();
3208 isl_set *AGDomain = getAccessDomain(*AGI);
3209 while (AGI != AG.end()) {
3210 MemoryAccess *MA = *AGI;
3211 isl_set *MADomain = getAccessDomain(MA);
3212 if (isl_set_is_disjoint(AGDomain, MADomain)) {
3213 NewAG.push_back(MA);
3214 AGI = AG.erase(AGI);
3215 isl_set_free(MADomain);
3216 } else {
3217 AGDomain = isl_set_union(AGDomain, MADomain);
3218 AGI++;
3221 if (NewAG.size() > 1)
3222 AliasGroups.push_back(std::move(NewAG));
3223 isl_set_free(AGDomain);
3227 bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3228 // To create sound alias checks we perform the following steps:
3229 // o) We partition each group into read only and non read only accesses.
3230 // o) For each group with more than one base pointer we then compute minimal
3231 // and maximal accesses to each array of a group in read only and non
3232 // read only partitions separately.
3233 AliasGroupVectorTy AliasGroups;
3234 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3236 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3238 splitAliasGroupsByDomain(AliasGroups);
3240 for (AliasGroupTy &AG : AliasGroups) {
3241 if (!hasFeasibleRuntimeContext())
3242 return false;
3245 IslMaxOperationsGuard MaxOpGuard(getIslCtx().get(), OptComputeOut);
3246 bool Valid = buildAliasGroup(AG, HasWriteAccess);
3247 if (!Valid)
3248 return false;
3250 if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota) {
3251 invalidate(COMPLEXITY, DebugLoc());
3252 return false;
3256 return true;
3259 bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3260 DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3261 AliasGroupTy ReadOnlyAccesses;
3262 AliasGroupTy ReadWriteAccesses;
3263 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3264 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3266 if (AliasGroup.size() < 2)
3267 return true;
3269 for (MemoryAccess *Access : AliasGroup) {
3270 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3271 Access->getAccessInstruction())
3272 << "Possibly aliasing pointer, use restrict keyword.");
3273 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3274 if (HasWriteAccess.count(Array)) {
3275 ReadWriteArrays.insert(Array);
3276 ReadWriteAccesses.push_back(Access);
3277 } else {
3278 ReadOnlyArrays.insert(Array);
3279 ReadOnlyAccesses.push_back(Access);
3283 // If there are no read-only pointers, and less than two read-write pointers,
3284 // no alias check is needed.
3285 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3286 return true;
3288 // If there is no read-write pointer, no alias check is needed.
3289 if (ReadWriteArrays.empty())
3290 return true;
3292 // For non-affine accesses, no alias check can be generated as we cannot
3293 // compute a sufficiently tight lower and upper bound: bail out.
3294 for (MemoryAccess *MA : AliasGroup) {
3295 if (!MA->isAffine()) {
3296 invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3297 MA->getAccessInstruction()->getParent());
3298 return false;
3302 // Ensure that for all memory accesses for which we generate alias checks,
3303 // their base pointers are available.
3304 for (MemoryAccess *MA : AliasGroup) {
3305 if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3306 addRequiredInvariantLoad(
3307 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3310 MinMaxAliasGroups.emplace_back();
3311 MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3312 MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3313 MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3315 bool Valid;
3317 Valid =
3318 calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3320 if (!Valid)
3321 return false;
3323 // Bail out if the number of values we need to compare is too large.
3324 // This is important as the number of comparisons grows quadratically with
3325 // the number of values we need to compare.
3326 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3327 RunTimeChecksMaxArraysPerGroup)
3328 return false;
3330 Valid =
3331 calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3333 if (!Valid)
3334 return false;
3336 return true;
3339 /// Get the smallest loop that contains @p S but is not in @p S.
3340 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3341 // Start with the smallest loop containing the entry and expand that
3342 // loop until it contains all blocks in the region. If there is a loop
3343 // containing all blocks in the region check if it is itself contained
3344 // and if so take the parent loop as it will be the smallest containing
3345 // the region but not contained by it.
3346 Loop *L = LI.getLoopFor(S.getEntry());
3347 while (L) {
3348 bool AllContained = true;
3349 for (auto *BB : S.blocks())
3350 AllContained &= L->contains(BB);
3351 if (AllContained)
3352 break;
3353 L = L->getParentLoop();
3356 return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
3359 int Scop::NextScopID = 0;
3361 std::string Scop::CurrentFunc;
3363 int Scop::getNextID(std::string ParentFunc) {
3364 if (ParentFunc != CurrentFunc) {
3365 CurrentFunc = ParentFunc;
3366 NextScopID = 0;
3368 return NextScopID++;
3371 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3372 DominatorTree &DT, ScopDetection::DetectionContext &DC,
3373 OptimizationRemarkEmitter &ORE)
3374 : IslCtx(isl_ctx_alloc(), isl_ctx_free), SE(&ScalarEvolution), DT(&DT),
3375 R(R), name(R.getNameStr()), HasSingleExitEdge(R.getExitingBlock()),
3376 DC(DC), ORE(ORE), Affinator(this, LI),
3377 ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
3378 if (IslOnErrorAbort)
3379 isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT);
3380 buildContext();
3383 Scop::~Scop() = default;
3385 void Scop::foldSizeConstantsToRight() {
3386 isl_union_set *Accessed = isl_union_map_range(getAccesses().release());
3388 for (auto Array : arrays()) {
3389 if (Array->getNumberOfDimensions() <= 1)
3390 continue;
3392 isl_space *Space = Array->getSpace().release();
3394 Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed));
3396 if (!isl_union_set_contains(Accessed, Space)) {
3397 isl_space_free(Space);
3398 continue;
3401 isl_set *Elements = isl_union_set_extract_set(Accessed, Space);
3403 isl_map *Transform =
3404 isl_map_universe(isl_space_map_from_set(Array->getSpace().release()));
3406 std::vector<int> Int;
3408 int Dims = isl_set_dim(Elements, isl_dim_set);
3409 for (int i = 0; i < Dims; i++) {
3410 isl_set *DimOnly =
3411 isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i);
3412 DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1);
3413 DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0);
3415 isl_basic_set *DimHull = isl_set_affine_hull(DimOnly);
3417 if (i == Dims - 1) {
3418 Int.push_back(1);
3419 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3420 isl_basic_set_free(DimHull);
3421 continue;
3424 if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) {
3425 isl_aff *Diff = isl_basic_set_get_div(DimHull, 0);
3426 isl_val *Val = isl_aff_get_denominator_val(Diff);
3427 isl_aff_free(Diff);
3429 int ValInt = 1;
3431 if (isl_val_is_int(Val))
3432 ValInt = isl_val_get_num_si(Val);
3433 isl_val_free(Val);
3435 Int.push_back(ValInt);
3437 isl_constraint *C = isl_constraint_alloc_equality(
3438 isl_local_space_from_space(isl_map_get_space(Transform)));
3439 C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt);
3440 C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1);
3441 Transform = isl_map_add_constraint(Transform, C);
3442 isl_basic_set_free(DimHull);
3443 continue;
3446 isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull);
3447 ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0);
3449 int ValInt = 1;
3450 if (isl_basic_set_is_equal(ZeroSet, DimHull)) {
3451 ValInt = 0;
3454 Int.push_back(ValInt);
3455 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3456 isl_basic_set_free(DimHull);
3457 isl_basic_set_free(ZeroSet);
3460 isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform));
3462 if (!isl_set_is_subset(Elements, MappedElements)) {
3463 isl_set_free(Elements);
3464 isl_set_free(MappedElements);
3465 isl_map_free(Transform);
3466 continue;
3469 isl_set_free(MappedElements);
3471 bool CanFold = true;
3473 if (Int[0] <= 1)
3474 CanFold = false;
3476 unsigned NumDims = Array->getNumberOfDimensions();
3477 for (unsigned i = 1; i < NumDims - 1; i++)
3478 if (Int[0] != Int[i] && Int[i])
3479 CanFold = false;
3481 if (!CanFold) {
3482 isl_set_free(Elements);
3483 isl_map_free(Transform);
3484 continue;
3487 for (auto &Access : AccessFunctions)
3488 if (Access->getScopArrayInfo() == Array)
3489 Access->setAccessRelation(Access->getAccessRelation().apply_range(
3490 isl::manage(isl_map_copy(Transform))));
3492 isl_map_free(Transform);
3494 std::vector<const SCEV *> Sizes;
3495 for (unsigned i = 0; i < NumDims; i++) {
3496 auto Size = Array->getDimensionSize(i);
3498 if (i == NumDims - 1)
3499 Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3500 Sizes.push_back(Size);
3503 Array->updateSizes(Sizes, false /* CheckConsistency */);
3505 isl_set_free(Elements);
3507 isl_union_set_free(Accessed);
3510 void Scop::markFortranArrays() {
3511 for (ScopStmt &Stmt : Stmts) {
3512 for (MemoryAccess *MemAcc : Stmt) {
3513 Value *FAD = MemAcc->getFortranArrayDescriptor();
3514 if (!FAD)
3515 continue;
3517 // TODO: const_cast-ing to edit
3518 ScopArrayInfo *SAI =
3519 const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
3520 assert(SAI && "memory access into a Fortran array does not "
3521 "have an associated ScopArrayInfo");
3522 SAI->applyAndSetFAD(FAD);
3527 void Scop::finalizeAccesses() {
3528 updateAccessDimensionality();
3529 foldSizeConstantsToRight();
3530 foldAccessRelations();
3531 assumeNoOutOfBounds();
3532 markFortranArrays();
3535 void Scop::updateAccessDimensionality() {
3536 // Check all array accesses for each base pointer and find a (virtual) element
3537 // size for the base pointer that divides all access functions.
3538 for (ScopStmt &Stmt : *this)
3539 for (MemoryAccess *Access : Stmt) {
3540 if (!Access->isArrayKind())
3541 continue;
3542 ScopArrayInfo *Array =
3543 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3545 if (Array->getNumberOfDimensions() != 1)
3546 continue;
3547 unsigned DivisibleSize = Array->getElemSizeInBytes();
3548 const SCEV *Subscript = Access->getSubscript(0);
3549 while (!isDivisible(Subscript, DivisibleSize, *SE))
3550 DivisibleSize /= 2;
3551 auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3552 Array->updateElementType(Ty);
3555 for (auto &Stmt : *this)
3556 for (auto &Access : Stmt)
3557 Access->updateDimensionality();
3560 void Scop::foldAccessRelations() {
3561 for (auto &Stmt : *this)
3562 for (auto &Access : Stmt)
3563 Access->foldAccessRelation();
3566 void Scop::assumeNoOutOfBounds() {
3567 for (auto &Stmt : *this)
3568 for (auto &Access : Stmt)
3569 Access->assumeNoOutOfBound();
3572 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
3573 for (Instruction *Inst : Stmt.getInstructions())
3574 InstStmtMap.erase(Inst);
3576 if (Stmt.isRegionStmt()) {
3577 for (BasicBlock *BB : Stmt.getRegion()->blocks()) {
3578 StmtMap.erase(BB);
3579 // Skip entry basic block, as its instructions are already deleted as
3580 // part of the statement's instruction list.
3581 if (BB == Stmt.getEntryBlock())
3582 continue;
3583 for (Instruction &Inst : *BB)
3584 InstStmtMap.erase(&Inst);
3586 } else {
3587 auto StmtMapIt = StmtMap.find(Stmt.getBasicBlock());
3588 if (StmtMapIt != StmtMap.end())
3589 StmtMapIt->second.erase(std::remove(StmtMapIt->second.begin(),
3590 StmtMapIt->second.end(), &Stmt),
3591 StmtMapIt->second.end());
3592 for (Instruction *Inst : Stmt.getInstructions())
3593 InstStmtMap.erase(Inst);
3597 void Scop::removeStmts(std::function<bool(ScopStmt &)> ShouldDelete) {
3598 for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
3599 if (!ShouldDelete(*StmtIt)) {
3600 StmtIt++;
3601 continue;
3604 removeFromStmtMap(*StmtIt);
3605 StmtIt = Stmts.erase(StmtIt);
3609 void Scop::removeStmtNotInDomainMap() {
3610 auto ShouldDelete = [this](ScopStmt &Stmt) -> bool {
3611 return !this->DomainMap.lookup(Stmt.getEntryBlock());
3613 removeStmts(ShouldDelete);
3616 void Scop::simplifySCoP(bool AfterHoisting) {
3617 auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool {
3618 bool RemoveStmt = Stmt.isEmpty();
3620 // Remove read only statements only after invariant load hoisting.
3621 if (!RemoveStmt && AfterHoisting) {
3622 bool OnlyRead = true;
3623 for (MemoryAccess *MA : Stmt) {
3624 if (MA->isRead())
3625 continue;
3627 OnlyRead = false;
3628 break;
3631 RemoveStmt = OnlyRead;
3633 return RemoveStmt;
3636 removeStmts(ShouldDelete);
3639 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3640 LoadInst *LInst = dyn_cast<LoadInst>(Val);
3641 if (!LInst)
3642 return nullptr;
3644 if (Value *Rep = InvEquivClassVMap.lookup(LInst))
3645 LInst = cast<LoadInst>(Rep);
3647 Type *Ty = LInst->getType();
3648 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3649 for (auto &IAClass : InvariantEquivClasses) {
3650 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3651 continue;
3653 auto &MAs = IAClass.InvariantAccesses;
3654 for (auto *MA : MAs)
3655 if (MA->getAccessInstruction() == Val)
3656 return &IAClass;
3659 return nullptr;
3662 bool isAParameter(llvm::Value *maybeParam, const Function &F) {
3663 for (const llvm::Argument &Arg : F.args())
3664 if (&Arg == maybeParam)
3665 return true;
3667 return false;
3670 bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3671 bool MAInvalidCtxIsEmpty,
3672 bool NonHoistableCtxIsEmpty) {
3673 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3674 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3675 if (PollyAllowDereferenceOfAllFunctionParams &&
3676 isAParameter(LInst->getPointerOperand(), getFunction()))
3677 return true;
3679 // TODO: We can provide more information for better but more expensive
3680 // results.
3681 if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3682 LInst->getAlignment(), DL))
3683 return false;
3685 // If the location might be overwritten we do not hoist it unconditionally.
3687 // TODO: This is probably too conservative.
3688 if (!NonHoistableCtxIsEmpty)
3689 return false;
3691 // If a dereferenceable load is in a statement that is modeled precisely we
3692 // can hoist it.
3693 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3694 return true;
3696 // Even if the statement is not modeled precisely we can hoist the load if it
3697 // does not involve any parameters that might have been specialized by the
3698 // statement domain.
3699 for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
3700 if (!isa<SCEVConstant>(MA->getSubscript(u)))
3701 return false;
3702 return true;
3705 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3706 if (InvMAs.empty())
3707 return;
3709 isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3710 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3712 // Get the context under which the statement is executed but remove the error
3713 // context under which this statement is reached.
3714 isl::set DomainCtx = Stmt.getDomain().params();
3715 DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3717 if (isl_set_n_basic_set(DomainCtx.get()) >= MaxDisjunctsInDomain) {
3718 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3719 invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3720 return;
3723 // Project out all parameters that relate to loads in the statement. Otherwise
3724 // we could have cyclic dependences on the constraints under which the
3725 // hoisted loads are executed and we could not determine an order in which to
3726 // pre-load them. This happens because not only lower bounds are part of the
3727 // domain but also upper bounds.
3728 for (auto &InvMA : InvMAs) {
3729 auto *MA = InvMA.MA;
3730 Instruction *AccInst = MA->getAccessInstruction();
3731 if (SE->isSCEVable(AccInst->getType())) {
3732 SetVector<Value *> Values;
3733 for (const SCEV *Parameter : Parameters) {
3734 Values.clear();
3735 findValues(Parameter, *SE, Values);
3736 if (!Values.count(AccInst))
3737 continue;
3739 if (isl::id ParamId = getIdForParam(Parameter)) {
3740 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3741 if (Dim >= 0)
3742 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3748 for (auto &InvMA : InvMAs) {
3749 auto *MA = InvMA.MA;
3750 isl::set NHCtx = InvMA.NonHoistableCtx;
3752 // Check for another invariant access that accesses the same location as
3753 // MA and if found consolidate them. Otherwise create a new equivalence
3754 // class at the end of InvariantEquivClasses.
3755 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3756 Type *Ty = LInst->getType();
3757 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3759 isl::set MAInvalidCtx = MA->getInvalidContext();
3760 bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3761 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3763 isl::set MACtx;
3764 // Check if we know that this pointer can be speculatively accessed.
3765 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3766 NonHoistableCtxIsEmpty)) {
3767 MACtx = isl::set::universe(DomainCtx.get_space());
3768 } else {
3769 MACtx = DomainCtx;
3770 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3771 MACtx = MACtx.gist_params(getContext());
3774 bool Consolidated = false;
3775 for (auto &IAClass : InvariantEquivClasses) {
3776 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3777 continue;
3779 // If the pointer and the type is equal check if the access function wrt.
3780 // to the domain is equal too. It can happen that the domain fixes
3781 // parameter values and these can be different for distinct part of the
3782 // SCoP. If this happens we cannot consolidate the loads but need to
3783 // create a new invariant load equivalence class.
3784 auto &MAs = IAClass.InvariantAccesses;
3785 if (!MAs.empty()) {
3786 auto *LastMA = MAs.front();
3788 isl::set AR = MA->getAccessRelation().range();
3789 isl::set LastAR = LastMA->getAccessRelation().range();
3790 bool SameAR = AR.is_equal(LastAR);
3792 if (!SameAR)
3793 continue;
3796 // Add MA to the list of accesses that are in this class.
3797 MAs.push_front(MA);
3799 Consolidated = true;
3801 // Unify the execution context of the class and this statement.
3802 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3803 if (IAClassDomainCtx)
3804 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
3805 else
3806 IAClassDomainCtx = MACtx;
3807 IAClass.ExecutionContext = IAClassDomainCtx;
3808 break;
3811 if (Consolidated)
3812 continue;
3814 // If we did not consolidate MA, thus did not find an equivalence class
3815 // for it, we create a new one.
3816 InvariantEquivClasses.emplace_back(
3817 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3821 /// Check if an access range is too complex.
3823 /// An access range is too complex, if it contains either many disjuncts or
3824 /// very complex expressions. As a simple heuristic, we assume if a set to
3825 /// be too complex if the sum of existentially quantified dimensions and
3826 /// set dimensions is larger than a threshold. This reliably detects both
3827 /// sets with many disjuncts as well as sets with many divisions as they
3828 /// arise in h264.
3830 /// @param AccessRange The range to check for complexity.
3832 /// @returns True if the access range is too complex.
3833 static bool isAccessRangeTooComplex(isl::set AccessRange) {
3834 unsigned NumTotalDims = 0;
3836 auto CountDimensions = [&NumTotalDims](isl::basic_set BSet) -> isl::stat {
3837 NumTotalDims += BSet.dim(isl::dim::div);
3838 NumTotalDims += BSet.dim(isl::dim::set);
3839 return isl::stat::ok;
3842 AccessRange.foreach_basic_set(CountDimensions);
3844 if (NumTotalDims > MaxDimensionsInAccessRange)
3845 return true;
3847 return false;
3850 isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
3851 // TODO: Loads that are not loop carried, hence are in a statement with
3852 // zero iterators, are by construction invariant, though we
3853 // currently "hoist" them anyway. This is necessary because we allow
3854 // them to be treated as parameters (e.g., in conditions) and our code
3855 // generation would otherwise use the old value.
3857 auto &Stmt = *Access->getStatement();
3858 BasicBlock *BB = Stmt.getEntryBlock();
3860 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
3861 Access->isMemoryIntrinsic())
3862 return nullptr;
3864 // Skip accesses that have an invariant base pointer which is defined but
3865 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3866 // returns a pointer that is used as a base address. However, as we want
3867 // to hoist indirect pointers, we allow the base pointer to be defined in
3868 // the region if it is also a memory access. Each ScopArrayInfo object
3869 // that has a base pointer origin has a base pointer that is loaded and
3870 // that it is invariant, thus it will be hoisted too. However, if there is
3871 // no base pointer origin we check that the base pointer is defined
3872 // outside the region.
3873 auto *LI = cast<LoadInst>(Access->getAccessInstruction());
3874 if (hasNonHoistableBasePtrInScop(Access, Writes))
3875 return nullptr;
3877 isl::map AccessRelation = give(Access->getAccessRelation().release());
3878 assert(!AccessRelation.is_empty());
3880 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
3881 return nullptr;
3883 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
3884 isl::set SafeToLoad;
3886 auto &DL = getFunction().getParent()->getDataLayout();
3887 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
3888 DL)) {
3889 SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
3890 } else if (BB != LI->getParent()) {
3891 // Skip accesses in non-affine subregions as they might not be executed
3892 // under the same condition as the entry of the non-affine subregion.
3893 return nullptr;
3894 } else {
3895 SafeToLoad = AccessRelation.range();
3898 if (isAccessRangeTooComplex(AccessRelation.range()))
3899 return nullptr;
3901 isl::union_map Written = Writes.intersect_range(SafeToLoad);
3902 isl::set WrittenCtx = Written.params();
3903 bool IsWritten = !WrittenCtx.is_empty();
3905 if (!IsWritten)
3906 return WrittenCtx;
3908 WrittenCtx = WrittenCtx.remove_divs();
3909 bool TooComplex =
3910 isl_set_n_basic_set(WrittenCtx.get()) >= MaxDisjunctsInDomain;
3911 if (TooComplex || !isRequiredInvariantLoad(LI))
3912 return nullptr;
3914 addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), AS_RESTRICTION,
3915 LI->getParent());
3916 return WrittenCtx;
3919 void Scop::verifyInvariantLoads() {
3920 auto &RIL = getRequiredInvariantLoads();
3921 for (LoadInst *LI : RIL) {
3922 assert(LI && contains(LI));
3923 // If there exists a statement in the scop which has a memory access for
3924 // @p LI, then mark this scop as infeasible for optimization.
3925 for (ScopStmt &Stmt : Stmts)
3926 if (Stmt.getArrayAccessOrNULLFor(LI)) {
3927 invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
3928 return;
3933 void Scop::hoistInvariantLoads() {
3934 if (!PollyInvariantLoadHoisting)
3935 return;
3937 isl::union_map Writes = getWrites();
3938 for (ScopStmt &Stmt : *this) {
3939 InvariantAccessesTy InvariantAccesses;
3941 for (MemoryAccess *Access : Stmt)
3942 if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
3943 InvariantAccesses.push_back({Access, NHCtx});
3945 // Transfer the memory access from the statement to the SCoP.
3946 for (auto InvMA : InvariantAccesses)
3947 Stmt.removeMemoryAccess(InvMA.MA);
3948 addInvariantLoads(Stmt, InvariantAccesses);
3952 /// Find the canonical scop array info object for a set of invariant load
3953 /// hoisted loads. The canonical array is the one that corresponds to the
3954 /// first load in the list of accesses which is used as base pointer of a
3955 /// scop array.
3956 static const ScopArrayInfo *findCanonicalArray(Scop *S,
3957 MemoryAccessList &Accesses) {
3958 for (MemoryAccess *Access : Accesses) {
3959 const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
3960 Access->getAccessInstruction(), MemoryKind::Array);
3961 if (CanonicalArray)
3962 return CanonicalArray;
3964 return nullptr;
3967 /// Check if @p Array severs as base array in an invariant load.
3968 static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
3969 for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
3970 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3971 if (Access2->getScopArrayInfo() == Array)
3972 return true;
3973 return false;
3976 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3977 /// with a reference to @p New.
3978 static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
3979 const ScopArrayInfo *New) {
3980 for (ScopStmt &Stmt : *S)
3981 for (MemoryAccess *Access : Stmt) {
3982 if (Access->getLatestScopArrayInfo() != Old)
3983 continue;
3985 isl::id Id = New->getBasePtrId();
3986 isl::map Map = Access->getAccessRelation();
3987 Map = Map.set_tuple_id(isl::dim::out, Id);
3988 Access->setAccessRelation(Map);
3992 void Scop::canonicalizeDynamicBasePtrs() {
3993 for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
3994 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3996 const ScopArrayInfo *CanonicalBasePtrSAI =
3997 findCanonicalArray(this, BasePtrAccesses);
3999 if (!CanonicalBasePtrSAI)
4000 continue;
4002 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
4003 const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
4004 BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
4005 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
4006 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
4007 continue;
4009 // we currently do not canonicalize arrays where some accesses are
4010 // hoisted as invariant loads. If we would, we need to update the access
4011 // function of the invariant loads as well. However, as this is not a
4012 // very common situation, we leave this for now to avoid further
4013 // complexity increases.
4014 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
4015 continue;
4017 replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
4022 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
4023 ArrayRef<const SCEV *> Sizes,
4024 MemoryKind Kind,
4025 const char *BaseName) {
4026 assert((BasePtr || BaseName) &&
4027 "BasePtr and BaseName can not be nullptr at the same time.");
4028 assert(!(BasePtr && BaseName) && "BaseName is redundant.");
4029 auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
4030 : ScopArrayNameMap[BaseName];
4031 if (!SAI) {
4032 auto &DL = getFunction().getParent()->getDataLayout();
4033 SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
4034 DL, this, BaseName));
4035 ScopArrayInfoSet.insert(SAI.get());
4036 } else {
4037 SAI->updateElementType(ElementType);
4038 // In case of mismatching array sizes, we bail out by setting the run-time
4039 // context to false.
4040 if (!SAI->updateSizes(Sizes))
4041 invalidate(DELINEARIZATION, DebugLoc());
4043 return SAI.get();
4046 ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
4047 const std::string &BaseName,
4048 const std::vector<unsigned> &Sizes) {
4049 auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
4050 std::vector<const SCEV *> SCEVSizes;
4052 for (auto size : Sizes)
4053 if (size)
4054 SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
4055 else
4056 SCEVSizes.push_back(nullptr);
4058 auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
4059 MemoryKind::Array, BaseName.c_str());
4060 return SAI;
4063 const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr,
4064 MemoryKind Kind) {
4065 auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
4066 return SAI;
4069 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
4070 auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind);
4071 assert(SAI && "No ScopArrayInfo available for this base pointer");
4072 return SAI;
4075 std::string Scop::getContextStr() const { return getContext().to_str(); }
4077 std::string Scop::getAssumedContextStr() const {
4078 assert(AssumedContext && "Assumed context not yet built");
4079 return AssumedContext.to_str();
4082 std::string Scop::getInvalidContextStr() const {
4083 return InvalidContext.to_str();
4086 std::string Scop::getNameStr() const {
4087 std::string ExitName, EntryName;
4088 std::tie(EntryName, ExitName) = getEntryExitStr();
4089 return EntryName + "---" + ExitName;
4092 std::pair<std::string, std::string> Scop::getEntryExitStr() const {
4093 std::string ExitName, EntryName;
4094 raw_string_ostream ExitStr(ExitName);
4095 raw_string_ostream EntryStr(EntryName);
4097 R.getEntry()->printAsOperand(EntryStr, false);
4098 EntryStr.str();
4100 if (R.getExit()) {
4101 R.getExit()->printAsOperand(ExitStr, false);
4102 ExitStr.str();
4103 } else
4104 ExitName = "FunctionExit";
4106 return std::make_pair(EntryName, ExitName);
4109 isl::set Scop::getContext() const { return Context; }
4110 isl::space Scop::getParamSpace() const { return getContext().get_space(); }
4112 isl::space Scop::getFullParamSpace() const {
4113 std::vector<isl::id> FortranIDs;
4114 FortranIDs = getFortranArrayIds(arrays());
4116 isl::space Space = isl::space::params_alloc(
4117 getIslCtx(), ParameterIds.size() + FortranIDs.size());
4119 unsigned PDim = 0;
4120 for (const SCEV *Parameter : Parameters) {
4121 isl::id Id = getIdForParam(Parameter);
4122 Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4125 for (isl::id Id : FortranIDs)
4126 Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4128 return Space;
4131 isl::set Scop::getAssumedContext() const {
4132 assert(AssumedContext && "Assumed context not yet built");
4133 return AssumedContext;
4136 bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
4137 if (PollyProcessUnprofitable)
4138 return true;
4140 if (isEmpty())
4141 return false;
4143 unsigned OptimizableStmtsOrLoops = 0;
4144 for (auto &Stmt : *this) {
4145 if (Stmt.getNumIterators() == 0)
4146 continue;
4148 bool ContainsArrayAccs = false;
4149 bool ContainsScalarAccs = false;
4150 for (auto *MA : Stmt) {
4151 if (MA->isRead())
4152 continue;
4153 ContainsArrayAccs |= MA->isLatestArrayKind();
4154 ContainsScalarAccs |= MA->isLatestScalarKind();
4157 if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
4158 OptimizableStmtsOrLoops += Stmt.getNumIterators();
4161 return OptimizableStmtsOrLoops > 1;
4164 bool Scop::hasFeasibleRuntimeContext() const {
4165 auto PositiveContext = getAssumedContext();
4166 auto NegativeContext = getInvalidContext();
4167 PositiveContext = addNonEmptyDomainConstraints(PositiveContext);
4168 // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain
4169 if (!PositiveContext)
4170 return false;
4172 bool IsFeasible = !(PositiveContext.is_empty() ||
4173 PositiveContext.is_subset(NegativeContext));
4174 if (!IsFeasible)
4175 return false;
4177 auto DomainContext = getDomains().params();
4178 IsFeasible = !DomainContext.is_subset(NegativeContext);
4179 IsFeasible &= !Context.is_subset(NegativeContext);
4181 return IsFeasible;
4184 static std::string toString(AssumptionKind Kind) {
4185 switch (Kind) {
4186 case ALIASING:
4187 return "No-aliasing";
4188 case INBOUNDS:
4189 return "Inbounds";
4190 case WRAPPING:
4191 return "No-overflows";
4192 case UNSIGNED:
4193 return "Signed-unsigned";
4194 case COMPLEXITY:
4195 return "Low complexity";
4196 case PROFITABLE:
4197 return "Profitable";
4198 case ERRORBLOCK:
4199 return "No-error";
4200 case INFINITELOOP:
4201 return "Finite loop";
4202 case INVARIANTLOAD:
4203 return "Invariant load";
4204 case DELINEARIZATION:
4205 return "Delinearization";
4207 llvm_unreachable("Unknown AssumptionKind!");
4210 bool Scop::isEffectiveAssumption(isl::set Set, AssumptionSign Sign) {
4211 if (Sign == AS_ASSUMPTION) {
4212 if (Context.is_subset(Set))
4213 return false;
4215 if (AssumedContext.is_subset(Set))
4216 return false;
4217 } else {
4218 if (Set.is_disjoint(Context))
4219 return false;
4221 if (Set.is_subset(InvalidContext))
4222 return false;
4224 return true;
4227 bool Scop::trackAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
4228 AssumptionSign Sign, BasicBlock *BB) {
4229 if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign))
4230 return false;
4232 // Do never emit trivial assumptions as they only clutter the output.
4233 if (!PollyRemarksMinimal) {
4234 isl::set Univ;
4235 if (Sign == AS_ASSUMPTION)
4236 Univ = isl::set::universe(Set.get_space());
4238 bool IsTrivial = (Sign == AS_RESTRICTION && Set.is_empty()) ||
4239 (Sign == AS_ASSUMPTION && Univ.is_equal(Set));
4241 if (IsTrivial)
4242 return false;
4245 switch (Kind) {
4246 case ALIASING:
4247 AssumptionsAliasing++;
4248 break;
4249 case INBOUNDS:
4250 AssumptionsInbounds++;
4251 break;
4252 case WRAPPING:
4253 AssumptionsWrapping++;
4254 break;
4255 case UNSIGNED:
4256 AssumptionsUnsigned++;
4257 break;
4258 case COMPLEXITY:
4259 AssumptionsComplexity++;
4260 break;
4261 case PROFITABLE:
4262 AssumptionsUnprofitable++;
4263 break;
4264 case ERRORBLOCK:
4265 AssumptionsErrorBlock++;
4266 break;
4267 case INFINITELOOP:
4268 AssumptionsInfiniteLoop++;
4269 break;
4270 case INVARIANTLOAD:
4271 AssumptionsInvariantLoad++;
4272 break;
4273 case DELINEARIZATION:
4274 AssumptionsDelinearization++;
4275 break;
4278 auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t";
4279 std::string Msg = toString(Kind) + Suffix + Set.to_str();
4280 if (BB)
4281 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB)
4282 << Msg);
4283 else
4284 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc,
4285 R.getEntry())
4286 << Msg);
4287 return true;
4290 void Scop::addAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
4291 AssumptionSign Sign, BasicBlock *BB) {
4292 // Simplify the assumptions/restrictions first.
4293 Set = Set.gist_params(getContext());
4295 if (!trackAssumption(Kind, Set, Loc, Sign, BB))
4296 return;
4298 if (Sign == AS_ASSUMPTION)
4299 AssumedContext = AssumedContext.intersect(Set).coalesce();
4300 else
4301 InvalidContext = InvalidContext.unite(Set).coalesce();
4304 void Scop::recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
4305 AssumptionSign Sign, BasicBlock *BB) {
4306 assert((Set.is_params() || BB) &&
4307 "Assumptions without a basic block must be parameter sets");
4308 RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4311 void Scop::addRecordedAssumptions() {
4312 while (!RecordedAssumptions.empty()) {
4313 Assumption AS = RecordedAssumptions.pop_back_val();
4315 if (!AS.BB) {
4316 addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
4317 continue;
4320 // If the domain was deleted the assumptions are void.
4321 isl_set *Dom = getDomainConditions(AS.BB).release();
4322 if (!Dom)
4323 continue;
4325 // If a basic block was given use its domain to simplify the assumption.
4326 // In case of restrictions we know they only have to hold on the domain,
4327 // thus we can intersect them with the domain of the block. However, for
4328 // assumptions the domain has to imply them, thus:
4329 // _ _____
4330 // Dom => S <==> A v B <==> A - B
4332 // To avoid the complement we will register A - B as a restriction not an
4333 // assumption.
4334 isl_set *S = AS.Set.copy();
4335 if (AS.Sign == AS_RESTRICTION)
4336 S = isl_set_params(isl_set_intersect(S, Dom));
4337 else /* (AS.Sign == AS_ASSUMPTION) */
4338 S = isl_set_params(isl_set_subtract(Dom, S));
4340 addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB);
4344 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
4345 DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n");
4346 addAssumption(Kind, isl::set::empty(getParamSpace()), Loc, AS_ASSUMPTION, BB);
4349 isl::set Scop::getInvalidContext() const { return InvalidContext; }
4351 void Scop::printContext(raw_ostream &OS) const {
4352 OS << "Context:\n";
4353 OS.indent(4) << Context << "\n";
4355 OS.indent(4) << "Assumed Context:\n";
4356 OS.indent(4) << AssumedContext << "\n";
4358 OS.indent(4) << "Invalid Context:\n";
4359 OS.indent(4) << InvalidContext << "\n";
4361 unsigned Dim = 0;
4362 for (const SCEV *Parameter : Parameters)
4363 OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4366 void Scop::printAliasAssumptions(raw_ostream &OS) const {
4367 int noOfGroups = 0;
4368 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4369 if (Pair.second.size() == 0)
4370 noOfGroups += 1;
4371 else
4372 noOfGroups += Pair.second.size();
4375 OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4376 if (MinMaxAliasGroups.empty()) {
4377 OS.indent(8) << "n/a\n";
4378 return;
4381 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4383 // If the group has no read only accesses print the write accesses.
4384 if (Pair.second.empty()) {
4385 OS.indent(8) << "[[";
4386 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4387 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4388 << ">";
4390 OS << " ]]\n";
4393 for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4394 OS.indent(8) << "[[";
4395 OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4396 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4397 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4398 << ">";
4400 OS << " ]]\n";
4405 void Scop::printStatements(raw_ostream &OS, bool PrintInstructions) const {
4406 OS << "Statements {\n";
4408 for (const ScopStmt &Stmt : *this) {
4409 OS.indent(4);
4410 Stmt.print(OS, PrintInstructions);
4413 OS.indent(4) << "}\n";
4416 void Scop::printArrayInfo(raw_ostream &OS) const {
4417 OS << "Arrays {\n";
4419 for (auto &Array : arrays())
4420 Array->print(OS);
4422 OS.indent(4) << "}\n";
4424 OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4426 for (auto &Array : arrays())
4427 Array->print(OS, /* SizeAsPwAff */ true);
4429 OS.indent(4) << "}\n";
4432 void Scop::print(raw_ostream &OS, bool PrintInstructions) const {
4433 OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4434 OS.indent(4) << "Region: " << getNameStr() << "\n";
4435 OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4436 OS.indent(4) << "Invariant Accesses: {\n";
4437 for (const auto &IAClass : InvariantEquivClasses) {
4438 const auto &MAs = IAClass.InvariantAccesses;
4439 if (MAs.empty()) {
4440 OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4441 } else {
4442 MAs.front()->print(OS);
4443 OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4444 << "\n";
4447 OS.indent(4) << "}\n";
4448 printContext(OS.indent(4));
4449 printArrayInfo(OS.indent(4));
4450 printAliasAssumptions(OS);
4451 printStatements(OS.indent(4), PrintInstructions);
4454 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4455 LLVM_DUMP_METHOD void Scop::dump() const { print(dbgs(), true); }
4456 #endif
4458 isl::ctx Scop::getIslCtx() const { return IslCtx.get(); }
4460 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4461 bool NonNegative) {
4462 // First try to use the SCEVAffinator to generate a piecewise defined
4463 // affine function from @p E in the context of @p BB. If that tasks becomes to
4464 // complex the affinator might return a nullptr. In such a case we invalidate
4465 // the SCoP and return a dummy value. This way we do not need to add error
4466 // handling code to all users of this function.
4467 auto PWAC = Affinator.getPwAff(E, BB);
4468 if (PWAC.first) {
4469 // TODO: We could use a heuristic and either use:
4470 // SCEVAffinator::takeNonNegativeAssumption
4471 // or
4472 // SCEVAffinator::interpretAsUnsigned
4473 // to deal with unsigned or "NonNegative" SCEVs.
4474 if (NonNegative)
4475 Affinator.takeNonNegativeAssumption(PWAC);
4476 return PWAC;
4479 auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
4480 invalidate(COMPLEXITY, DL, BB);
4481 return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4484 isl::union_set Scop::getDomains() const {
4485 isl_space *EmptySpace = isl_space_params_alloc(getIslCtx().get(), 0);
4486 isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4488 for (const ScopStmt &Stmt : *this)
4489 Domain = isl_union_set_add_set(Domain, Stmt.getDomain().release());
4491 return isl::manage(Domain);
4494 isl::pw_aff Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4495 PWACtx PWAC = getPwAff(E, BB);
4496 return PWAC.first;
4499 isl::union_map
4500 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4501 isl::union_map Accesses = isl::union_map::empty(getParamSpace());
4503 for (ScopStmt &Stmt : *this) {
4504 for (MemoryAccess *MA : Stmt) {
4505 if (!Predicate(*MA))
4506 continue;
4508 isl::set Domain = Stmt.getDomain();
4509 isl::map AccessDomain = MA->getAccessRelation();
4510 AccessDomain = AccessDomain.intersect_domain(Domain);
4511 Accesses = Accesses.add_map(AccessDomain);
4515 return Accesses.coalesce();
4518 isl::union_map Scop::getMustWrites() {
4519 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4522 isl::union_map Scop::getMayWrites() {
4523 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4526 isl::union_map Scop::getWrites() {
4527 return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4530 isl::union_map Scop::getReads() {
4531 return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4534 isl::union_map Scop::getAccesses() {
4535 return getAccessesOfType([](MemoryAccess &MA) { return true; });
4538 isl::union_map Scop::getAccesses(ScopArrayInfo *Array) {
4539 return getAccessesOfType(
4540 [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; });
4543 // Check whether @p Node is an extension node.
4545 // @return true if @p Node is an extension node.
4546 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4547 if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4548 return isl_bool_error;
4549 else
4550 return isl_bool_true;
4553 bool Scop::containsExtensionNode(isl::schedule Schedule) {
4554 return isl_schedule_foreach_schedule_node_top_down(
4555 Schedule.keep(), isNotExtNode, nullptr) == isl_stat_error;
4558 isl::union_map Scop::getSchedule() const {
4559 auto Tree = getScheduleTree();
4560 if (containsExtensionNode(Tree))
4561 return nullptr;
4563 return Tree.get_map();
4566 isl::schedule Scop::getScheduleTree() const {
4567 return Schedule.intersect_domain(getDomains());
4570 void Scop::setSchedule(isl::union_map NewSchedule) {
4571 auto S = isl::schedule::from_domain(getDomains());
4572 Schedule = S.insert_partial_schedule(
4573 isl::multi_union_pw_aff::from_union_map(NewSchedule));
4576 void Scop::setScheduleTree(isl::schedule NewSchedule) {
4577 Schedule = NewSchedule;
4580 bool Scop::restrictDomains(isl::union_set Domain) {
4581 bool Changed = false;
4582 for (ScopStmt &Stmt : *this) {
4583 isl::union_set StmtDomain = isl::union_set(Stmt.getDomain());
4584 isl::union_set NewStmtDomain = StmtDomain.intersect(Domain);
4586 if (StmtDomain.is_subset(NewStmtDomain))
4587 continue;
4589 Changed = true;
4591 NewStmtDomain = NewStmtDomain.coalesce();
4593 if (NewStmtDomain.is_empty())
4594 Stmt.restrictDomain(isl::set::empty(Stmt.getDomainSpace()));
4595 else
4596 Stmt.restrictDomain(isl::set(NewStmtDomain));
4598 return Changed;
4601 ScalarEvolution *Scop::getSE() const { return SE; }
4603 // Create an isl_multi_union_aff that defines an identity mapping from the
4604 // elements of USet to their N-th dimension.
4606 // # Example:
4608 // Domain: { A[i,j]; B[i,j,k] }
4609 // N: 1
4611 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4613 // @param USet A union set describing the elements for which to generate a
4614 // mapping.
4615 // @param N The dimension to map to.
4616 // @returns A mapping from USet to its N-th dimension.
4617 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
4618 assert(N >= 0);
4619 assert(USet);
4620 assert(!USet.is_empty());
4622 auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
4624 auto Lambda = [&Result, N](isl::set S) -> isl::stat {
4625 int Dim = S.dim(isl::dim::set);
4626 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
4627 N, Dim - N);
4628 if (N > 1)
4629 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
4631 Result = Result.add_pw_multi_aff(PMA);
4632 return isl::stat::ok;
4635 isl::stat Res = USet.foreach_set(Lambda);
4636 (void)Res;
4638 assert(Res == isl::stat::ok);
4640 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
4643 void Scop::addScopStmt(BasicBlock *BB, Loop *SurroundingLoop,
4644 std::vector<Instruction *> Instructions, int Count) {
4645 assert(BB && "Unexpected nullptr!");
4646 Stmts.emplace_back(*this, *BB, SurroundingLoop, Instructions, Count);
4647 auto *Stmt = &Stmts.back();
4648 StmtMap[BB].push_back(Stmt);
4649 for (Instruction *Inst : Instructions) {
4650 assert(!InstStmtMap.count(Inst) &&
4651 "Unexpected statement corresponding to the instruction.");
4652 InstStmtMap[Inst] = Stmt;
4656 void Scop::addScopStmt(Region *R, Loop *SurroundingLoop,
4657 std::vector<Instruction *> Instructions) {
4658 assert(R && "Unexpected nullptr!");
4659 Stmts.emplace_back(*this, *R, SurroundingLoop, Instructions);
4660 auto *Stmt = &Stmts.back();
4662 for (Instruction *Inst : Instructions) {
4663 assert(!InstStmtMap.count(Inst) &&
4664 "Unexpected statement corresponding to the instruction.");
4665 InstStmtMap[Inst] = Stmt;
4668 for (BasicBlock *BB : R->blocks()) {
4669 StmtMap[BB].push_back(Stmt);
4670 if (BB == R->getEntry())
4671 continue;
4672 for (Instruction &Inst : *BB) {
4673 assert(!InstStmtMap.count(&Inst) &&
4674 "Unexpected statement corresponding to the instruction.");
4675 InstStmtMap[&Inst] = Stmt;
4680 ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel,
4681 isl::set Domain) {
4682 #ifndef NDEBUG
4683 isl::set SourceDomain = SourceRel.domain();
4684 isl::set TargetDomain = TargetRel.domain();
4685 assert(Domain.is_subset(TargetDomain) &&
4686 "Target access not defined for complete statement domain");
4687 assert(Domain.is_subset(SourceDomain) &&
4688 "Source access not defined for complete statement domain");
4689 #endif
4690 Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4691 CopyStmtsNum++;
4692 return &(Stmts.back());
4695 void Scop::buildSchedule(LoopInfo &LI) {
4696 Loop *L = getLoopSurroundingScop(*this, LI);
4697 LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4698 buildSchedule(getRegion().getNode(), LoopStack, LI);
4699 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4700 Schedule = LoopStack[0].Schedule;
4703 /// To generate a schedule for the elements in a Region we traverse the Region
4704 /// in reverse-post-order and add the contained RegionNodes in traversal order
4705 /// to the schedule of the loop that is currently at the top of the LoopStack.
4706 /// For loop-free codes, this results in a correct sequential ordering.
4708 /// Example:
4709 /// bb1(0)
4710 /// / \.
4711 /// bb2(1) bb3(2)
4712 /// \ / \.
4713 /// bb4(3) bb5(4)
4714 /// \ /
4715 /// bb6(5)
4717 /// Including loops requires additional processing. Whenever a loop header is
4718 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4719 /// from an empty schedule, we first process all RegionNodes that are within
4720 /// this loop and complete the sequential schedule at this loop-level before
4721 /// processing about any other nodes. To implement this
4722 /// loop-nodes-first-processing, the reverse post-order traversal is
4723 /// insufficient. Hence, we additionally check if the traversal yields
4724 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4725 /// These region-nodes are then queue and only traverse after the all nodes
4726 /// within the current loop have been processed.
4727 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4728 Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4730 ReversePostOrderTraversal<Region *> RTraversal(R);
4731 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4732 std::deque<RegionNode *> DelayList;
4733 bool LastRNWaiting = false;
4735 // Iterate over the region @p R in reverse post-order but queue
4736 // sub-regions/blocks iff they are not part of the last encountered but not
4737 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4738 // that we queued the last sub-region/block from the reverse post-order
4739 // iterator. If it is set we have to explore the next sub-region/block from
4740 // the iterator (if any) to guarantee progress. If it is not set we first try
4741 // the next queued sub-region/blocks.
4742 while (!WorkList.empty() || !DelayList.empty()) {
4743 RegionNode *RN;
4745 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
4746 RN = WorkList.front();
4747 WorkList.pop_front();
4748 LastRNWaiting = false;
4749 } else {
4750 RN = DelayList.front();
4751 DelayList.pop_front();
4754 Loop *L = getRegionNodeLoop(RN, LI);
4755 if (!contains(L))
4756 L = OuterScopLoop;
4758 Loop *LastLoop = LoopStack.back().L;
4759 if (LastLoop != L) {
4760 if (LastLoop && !LastLoop->contains(L)) {
4761 LastRNWaiting = true;
4762 DelayList.push_back(RN);
4763 continue;
4765 LoopStack.push_back({L, nullptr, 0});
4767 buildSchedule(RN, LoopStack, LI);
4771 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
4772 if (RN->isSubRegion()) {
4773 auto *LocalRegion = RN->getNodeAs<Region>();
4774 if (!isNonAffineSubRegion(LocalRegion)) {
4775 buildSchedule(LocalRegion, LoopStack, LI);
4776 return;
4780 assert(LoopStack.rbegin() != LoopStack.rend());
4781 auto LoopData = LoopStack.rbegin();
4782 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
4784 for (auto *Stmt : getStmtListFor(RN)) {
4785 isl::union_set UDomain{Stmt->getDomain()};
4786 auto StmtSchedule = isl::schedule::from_domain(UDomain);
4787 LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
4790 // Check if we just processed the last node in this loop. If we did, finalize
4791 // the loop by:
4793 // - adding new schedule dimensions
4794 // - folding the resulting schedule into the parent loop schedule
4795 // - dropping the loop schedule from the LoopStack.
4797 // Then continue to check surrounding loops, which might also have been
4798 // completed by this node.
4799 size_t Dimension = LoopStack.size();
4800 while (LoopData->L &&
4801 LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
4802 isl::schedule Schedule = LoopData->Schedule;
4803 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
4805 assert(std::next(LoopData) != LoopStack.rend());
4806 ++LoopData;
4807 --Dimension;
4809 if (Schedule) {
4810 isl::union_set Domain = Schedule.get_domain();
4811 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
4812 Schedule = Schedule.insert_partial_schedule(MUPA);
4813 LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
4816 LoopData->NumBlocksProcessed += NumBlocksProcessed;
4818 // Now pop all loops processed up there from the LoopStack
4819 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
4822 ArrayRef<ScopStmt *> Scop::getStmtListFor(BasicBlock *BB) const {
4823 auto StmtMapIt = StmtMap.find(BB);
4824 if (StmtMapIt == StmtMap.end())
4825 return {};
4826 return StmtMapIt->second;
4829 ScopStmt *Scop::getLastStmtFor(BasicBlock *BB) const {
4830 ArrayRef<ScopStmt *> StmtList = getStmtListFor(BB);
4831 if (!StmtList.empty())
4832 return StmtList.back();
4833 return nullptr;
4836 ArrayRef<ScopStmt *> Scop::getStmtListFor(RegionNode *RN) const {
4837 if (RN->isSubRegion())
4838 return getStmtListFor(RN->getNodeAs<Region>());
4839 return getStmtListFor(RN->getNodeAs<BasicBlock>());
4842 ArrayRef<ScopStmt *> Scop::getStmtListFor(Region *R) const {
4843 return getStmtListFor(R->getEntry());
4846 int Scop::getRelativeLoopDepth(const Loop *L) const {
4847 if (!L || !R.contains(L))
4848 return -1;
4849 // outermostLoopInRegion always returns nullptr for top level regions
4850 if (R.isTopLevelRegion()) {
4851 // LoopInfo's depths start at 1, we start at 0
4852 return L->getLoopDepth() - 1;
4853 } else {
4854 Loop *OuterLoop = R.outermostLoopInRegion(const_cast<Loop *>(L));
4855 assert(OuterLoop);
4856 return L->getLoopDepth() - OuterLoop->getLoopDepth();
4860 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
4861 for (auto &SAI : arrays()) {
4862 if (SAI->getName() == BaseName)
4863 return SAI;
4865 return nullptr;
4868 void Scop::addAccessData(MemoryAccess *Access) {
4869 const ScopArrayInfo *SAI = Access->getOriginalScopArrayInfo();
4870 assert(SAI && "can only use after access relations have been constructed");
4872 if (Access->isOriginalValueKind() && Access->isRead())
4873 ValueUseAccs[SAI].push_back(Access);
4874 else if (Access->isOriginalAnyPHIKind() && Access->isWrite())
4875 PHIIncomingAccs[SAI].push_back(Access);
4878 void Scop::removeAccessData(MemoryAccess *Access) {
4879 if (Access->isOriginalValueKind() && Access->isWrite()) {
4880 ValueDefAccs.erase(Access->getAccessValue());
4881 } else if (Access->isOriginalValueKind() && Access->isRead()) {
4882 auto &Uses = ValueUseAccs[Access->getScopArrayInfo()];
4883 std::remove(Uses.begin(), Uses.end(), Access);
4884 } else if (Access->isOriginalPHIKind() && Access->isRead()) {
4885 PHINode *PHI = cast<PHINode>(Access->getAccessInstruction());
4886 PHIReadAccs.erase(PHI);
4887 } else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) {
4888 auto &Incomings = PHIIncomingAccs[Access->getScopArrayInfo()];
4889 std::remove(Incomings.begin(), Incomings.end(), Access);
4893 MemoryAccess *Scop::getValueDef(const ScopArrayInfo *SAI) const {
4894 assert(SAI->isValueKind());
4896 Instruction *Val = dyn_cast<Instruction>(SAI->getBasePtr());
4897 if (!Val)
4898 return nullptr;
4900 return ValueDefAccs.lookup(Val);
4903 ArrayRef<MemoryAccess *> Scop::getValueUses(const ScopArrayInfo *SAI) const {
4904 assert(SAI->isValueKind());
4905 auto It = ValueUseAccs.find(SAI);
4906 if (It == ValueUseAccs.end())
4907 return {};
4908 return It->second;
4911 MemoryAccess *Scop::getPHIRead(const ScopArrayInfo *SAI) const {
4912 assert(SAI->isPHIKind() || SAI->isExitPHIKind());
4914 if (SAI->isExitPHIKind())
4915 return nullptr;
4917 PHINode *PHI = cast<PHINode>(SAI->getBasePtr());
4918 return PHIReadAccs.lookup(PHI);
4921 ArrayRef<MemoryAccess *> Scop::getPHIIncomings(const ScopArrayInfo *SAI) const {
4922 assert(SAI->isPHIKind() || SAI->isExitPHIKind());
4923 auto It = PHIIncomingAccs.find(SAI);
4924 if (It == PHIIncomingAccs.end())
4925 return {};
4926 return It->second;
4929 bool Scop::isEscaping(Instruction *Inst) {
4930 assert(contains(Inst) && "The concept of escaping makes only sense for "
4931 "values defined inside the SCoP");
4933 for (Use &Use : Inst->uses()) {
4934 BasicBlock *UserBB = getUseBlock(Use);
4935 if (!contains(UserBB))
4936 return true;
4938 // When the SCoP region exit needs to be simplified, PHIs in the region exit
4939 // move to a new basic block such that its incoming blocks are not in the
4940 // SCoP anymore.
4941 if (hasSingleExitEdge() && isa<PHINode>(Use.getUser()) &&
4942 isExit(cast<PHINode>(Use.getUser())->getParent()))
4943 return true;
4945 return false;
4948 Scop::ScopStatistics Scop::getStatistics() const {
4949 ScopStatistics Result;
4950 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4951 auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
4953 int NumTotalLoops = LoopStat.NumLoops;
4954 Result.NumBoxedLoops = getBoxedLoops().size();
4955 Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
4957 for (const ScopStmt &Stmt : *this) {
4958 isl::set Domain = Stmt.getDomain().intersect_params(getContext());
4959 bool IsInLoop = Stmt.getNumIterators() >= 1;
4960 for (MemoryAccess *MA : Stmt) {
4961 if (!MA->isWrite())
4962 continue;
4964 if (MA->isLatestValueKind()) {
4965 Result.NumValueWrites += 1;
4966 if (IsInLoop)
4967 Result.NumValueWritesInLoops += 1;
4970 if (MA->isLatestAnyPHIKind()) {
4971 Result.NumPHIWrites += 1;
4972 if (IsInLoop)
4973 Result.NumPHIWritesInLoops += 1;
4976 isl::set AccSet =
4977 MA->getAccessRelation().intersect_domain(Domain).range();
4978 if (AccSet.is_singleton()) {
4979 Result.NumSingletonWrites += 1;
4980 if (IsInLoop)
4981 Result.NumSingletonWritesInLoops += 1;
4985 #endif
4986 return Result;
4989 raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
4990 scop.print(OS, PollyPrintInstructions);
4991 return OS;
4994 //===----------------------------------------------------------------------===//
4995 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
4996 AU.addRequired<LoopInfoWrapperPass>();
4997 AU.addRequired<RegionInfoPass>();
4998 AU.addRequired<DominatorTreeWrapperPass>();
4999 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5000 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5001 AU.addRequired<AAResultsWrapperPass>();
5002 AU.addRequired<AssumptionCacheTracker>();
5003 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5004 AU.setPreservesAll();
5007 void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
5008 Scop::ScopStatistics ScopStats) {
5009 assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
5011 NumScops++;
5012 NumLoopsInScop += Stats.NumLoops;
5013 MaxNumLoopsInScop =
5014 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
5016 if (Stats.MaxDepth == 1)
5017 NumScopsDepthOne++;
5018 else if (Stats.MaxDepth == 2)
5019 NumScopsDepthTwo++;
5020 else if (Stats.MaxDepth == 3)
5021 NumScopsDepthThree++;
5022 else if (Stats.MaxDepth == 4)
5023 NumScopsDepthFour++;
5024 else if (Stats.MaxDepth == 5)
5025 NumScopsDepthFive++;
5026 else
5027 NumScopsDepthLarger++;
5029 NumAffineLoops += ScopStats.NumAffineLoops;
5030 NumBoxedLoops += ScopStats.NumBoxedLoops;
5032 NumValueWrites += ScopStats.NumValueWrites;
5033 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
5034 NumPHIWrites += ScopStats.NumPHIWrites;
5035 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
5036 NumSingletonWrites += ScopStats.NumSingletonWrites;
5037 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
5040 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
5041 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5043 if (!SD.isMaxRegionInScop(*R))
5044 return false;
5046 Function *F = R->getEntry()->getParent();
5047 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5048 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5049 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5050 auto const &DL = F->getParent()->getDataLayout();
5051 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5052 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
5053 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5055 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5056 S = SB.getScop(); // take ownership of scop object
5058 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5059 if (S) {
5060 ScopDetection::LoopStats Stats =
5061 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5062 updateLoopCountStatistic(Stats, S->getStatistics());
5064 #endif
5066 return false;
5069 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
5070 if (S)
5071 S->print(OS, PollyPrintInstructions);
5072 else
5073 OS << "Invalid Scop!\n";
5076 char ScopInfoRegionPass::ID = 0;
5078 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5080 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops",
5081 "Polly - Create polyhedral description of Scops", false,
5082 false);
5083 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5084 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5085 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5086 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5087 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5088 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5089 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5090 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
5091 "Polly - Create polyhedral description of Scops", false,
5092 false)
5094 //===----------------------------------------------------------------------===//
5095 ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE,
5096 LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT,
5097 AssumptionCache &AC, OptimizationRemarkEmitter &ORE)
5098 : DL(DL), SD(SD), SE(SE), LI(LI), AA(AA), DT(DT), AC(AC), ORE(ORE) {
5099 recompute();
5102 void ScopInfo::recompute() {
5103 RegionToScopMap.clear();
5104 /// Create polyhedral description of scops for all the valid regions of a
5105 /// function.
5106 for (auto &It : SD) {
5107 Region *R = const_cast<Region *>(It);
5108 if (!SD.isMaxRegionInScop(*R))
5109 continue;
5111 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5112 std::unique_ptr<Scop> S = SB.getScop();
5113 if (!S)
5114 continue;
5115 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5116 ScopDetection::LoopStats Stats =
5117 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5118 updateLoopCountStatistic(Stats, S->getStatistics());
5119 #endif
5120 bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
5121 assert(Inserted && "Building Scop for the same region twice!");
5122 (void)Inserted;
5126 bool ScopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
5127 FunctionAnalysisManager::Invalidator &Inv) {
5128 // Check whether the analysis, all analyses on functions have been preserved
5129 // or anything we're holding references to is being invalidated
5130 auto PAC = PA.getChecker<ScopInfoAnalysis>();
5131 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
5132 Inv.invalidate<ScopAnalysis>(F, PA) ||
5133 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
5134 Inv.invalidate<LoopAnalysis>(F, PA) ||
5135 Inv.invalidate<AAManager>(F, PA) ||
5136 Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
5137 Inv.invalidate<AssumptionAnalysis>(F, PA);
5140 AnalysisKey ScopInfoAnalysis::Key;
5142 ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F,
5143 FunctionAnalysisManager &FAM) {
5144 auto &SD = FAM.getResult<ScopAnalysis>(F);
5145 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
5146 auto &LI = FAM.getResult<LoopAnalysis>(F);
5147 auto &AA = FAM.getResult<AAManager>(F);
5148 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
5149 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
5150 auto &DL = F.getParent()->getDataLayout();
5151 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5152 return {DL, SD, SE, LI, AA, DT, AC, ORE};
5155 PreservedAnalyses ScopInfoPrinterPass::run(Function &F,
5156 FunctionAnalysisManager &FAM) {
5157 auto &SI = FAM.getResult<ScopInfoAnalysis>(F);
5158 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5159 // order here to keep the output persistent
5160 for (auto &It : reverse(SI)) {
5161 if (It.second)
5162 It.second->print(Stream, PollyPrintInstructions);
5163 else
5164 Stream << "Invalid Scop!\n";
5166 return PreservedAnalyses::all();
5169 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5170 AU.addRequired<LoopInfoWrapperPass>();
5171 AU.addRequired<RegionInfoPass>();
5172 AU.addRequired<DominatorTreeWrapperPass>();
5173 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5174 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5175 AU.addRequired<AAResultsWrapperPass>();
5176 AU.addRequired<AssumptionCacheTracker>();
5177 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5178 AU.setPreservesAll();
5181 bool ScopInfoWrapperPass::runOnFunction(Function &F) {
5182 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5183 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5184 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5185 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5186 auto const &DL = F.getParent()->getDataLayout();
5187 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5188 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5189 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5191 Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC, ORE});
5192 return false;
5195 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
5196 for (auto &It : *Result) {
5197 if (It.second)
5198 It.second->print(OS, PollyPrintInstructions);
5199 else
5200 OS << "Invalid Scop!\n";
5204 char ScopInfoWrapperPass::ID = 0;
5206 Pass *polly::createScopInfoWrapperPassPass() {
5207 return new ScopInfoWrapperPass();
5210 INITIALIZE_PASS_BEGIN(
5211 ScopInfoWrapperPass, "polly-function-scops",
5212 "Polly - Create polyhedral description of all Scops of a function", false,
5213 false);
5214 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5215 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5216 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5217 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5218 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5219 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5220 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5221 INITIALIZE_PASS_END(
5222 ScopInfoWrapperPass, "polly-function-scops",
5223 "Polly - Create polyhedral description of all Scops of a function", false,
5224 false)