[ConstantRange] Rename isWrappedSet() to isUpperWrapped()
[polly-mirror.git] / lib / Analysis / ScopInfo.cpp
blob30c759d9f51d07bb2fcb48fe18ab6de87cde05a0
1 //===- ScopInfo.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Create a polyhedral description for a static control flow region.
11 // The pass creates a polyhedral description of the Scops detected by the Scop
12 // detection derived from their LLVM-IR code.
14 // This representation is shared among several tools in the polyhedral
15 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
17 //===----------------------------------------------------------------------===//
19 #include "polly/ScopInfo.h"
20 #include "polly/LinkAllPasses.h"
21 #include "polly/Options.h"
22 #include "polly/ScopBuilder.h"
23 #include "polly/ScopDetection.h"
24 #include "polly/Support/GICHelper.h"
25 #include "polly/Support/ISLOStream.h"
26 #include "polly/Support/ISLTools.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
82 #include "isl/aff.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
85 #include "isl/map.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
90 #include "isl/set.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
93 #include "isl/val.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdlib>
97 #include <cstring>
98 #include <deque>
99 #include <iterator>
100 #include <memory>
101 #include <string>
102 #include <tuple>
103 #include <utility>
104 #include <vector>
106 using namespace llvm;
107 using namespace polly;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
130 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
131 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
132 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
133 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
134 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
135 STATISTIC(NumScopsDepthLarger,
136 "Number of scops with maximal loop depth 6 and larger");
137 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
139 STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
140 STATISTIC(
141 NumValueWritesInLoops,
142 "Number of scalar value writes nested in affine loops after ScopInfo");
143 STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
144 STATISTIC(NumPHIWritesInLoops,
145 "Number of scalar phi writes nested in affine loops after ScopInfo");
146 STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
147 STATISTIC(NumSingletonWritesInLoops,
148 "Number of singleton writes nested in affine loops after ScopInfo");
150 // The maximal number of basic sets we allow during domain construction to
151 // be created. More complex scops will result in very high compile time and
152 // are also unlikely to result in good code
153 static int const MaxDisjunctsInDomain = 20;
155 // The number of disjunct in the context after which we stop to add more
156 // disjuncts. This parameter is there to avoid exponential growth in the
157 // number of disjunct when adding non-convex sets to the context.
158 static int const MaxDisjunctsInContext = 4;
160 // The maximal number of dimensions we allow during invariant load construction.
161 // More complex access ranges will result in very high compile time and are also
162 // unlikely to result in good code. This value is very high and should only
163 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
164 static int const MaxDimensionsInAccessRange = 9;
166 static cl::opt<int>
167 OptComputeOut("polly-analysis-computeout",
168 cl::desc("Bound the scop analysis by a maximal amount of "
169 "computational steps (0 means no bound)"),
170 cl::Hidden, cl::init(800000), cl::ZeroOrMore,
171 cl::cat(PollyCategory));
173 static cl::opt<bool> PollyRemarksMinimal(
174 "polly-remarks-minimal",
175 cl::desc("Do not emit remarks about assumptions that are known"),
176 cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
178 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
179 "polly-rtc-max-array-disjuncts",
180 cl::desc("The maximal number of disjunts allowed in memory accesses to "
181 "to build RTCs."),
182 cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
184 static cl::opt<unsigned> RunTimeChecksMaxParameters(
185 "polly-rtc-max-parameters",
186 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
187 cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
189 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
190 "polly-rtc-max-arrays-per-group",
191 cl::desc("The maximal number of arrays to compare in each alias group."),
192 cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
194 static cl::opt<std::string> UserContextStr(
195 "polly-context", cl::value_desc("isl parameter set"),
196 cl::desc("Provide additional constraints on the context parameters"),
197 cl::init(""), cl::cat(PollyCategory));
199 static cl::opt<bool>
200 IslOnErrorAbort("polly-on-isl-error-abort",
201 cl::desc("Abort if an isl error is encountered"),
202 cl::init(true), cl::cat(PollyCategory));
204 static cl::opt<bool> PollyPreciseInbounds(
205 "polly-precise-inbounds",
206 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
207 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
209 static cl::opt<bool>
210 PollyIgnoreInbounds("polly-ignore-inbounds",
211 cl::desc("Do not take inbounds assumptions at all"),
212 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
214 static cl::opt<bool> PollyIgnoreParamBounds(
215 "polly-ignore-parameter-bounds",
216 cl::desc(
217 "Do not add parameter bounds and do no gist simplify sets accordingly"),
218 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
220 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
221 "polly-allow-dereference-of-all-function-parameters",
222 cl::desc(
223 "Treat all parameters to functions that are pointers as dereferencible."
224 " This is useful for invariant load hoisting, since we can generate"
225 " less runtime checks. This is only valid if all pointers to functions"
226 " are always initialized, so that Polly can choose to hoist"
227 " their loads. "),
228 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
230 static cl::opt<bool> PollyPreciseFoldAccesses(
231 "polly-precise-fold-accesses",
232 cl::desc("Fold memory accesses to model more possible delinearizations "
233 "(does not scale well)"),
234 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
236 bool polly::UseInstructionNames;
238 static cl::opt<bool, true> XUseInstructionNames(
239 "polly-use-llvm-names",
240 cl::desc("Use LLVM-IR names when deriving statement names"),
241 cl::location(UseInstructionNames), cl::Hidden, cl::init(false),
242 cl::ZeroOrMore, cl::cat(PollyCategory));
244 static cl::opt<bool> PollyPrintInstructions(
245 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
246 cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory));
248 //===----------------------------------------------------------------------===//
250 // Create a sequence of two schedules. Either argument may be null and is
251 // interpreted as the empty schedule. Can also return null if both schedules are
252 // empty.
253 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
254 if (!Prev)
255 return Succ;
256 if (!Succ)
257 return Prev;
259 return Prev.sequence(Succ);
262 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
263 int dim, isl::dim type) {
264 isl::val V;
265 isl::ctx Ctx = S.get_ctx();
267 // The upper and lower bound for a parameter value is derived either from
268 // the data type of the parameter or from the - possibly more restrictive -
269 // range metadata.
270 V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true);
271 S = S.lower_bound_val(type, dim, V);
272 V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true);
273 S = S.upper_bound_val(type, dim, V);
275 if (Range.isFullSet())
276 return S;
278 if (S.n_basic_set() > MaxDisjunctsInContext)
279 return S;
281 // In case of signed wrapping, we can refine the set of valid values by
282 // excluding the part not covered by the wrapping range.
283 if (Range.isSignWrappedSet()) {
284 V = valFromAPInt(Ctx.get(), Range.getLower(), true);
285 isl::set SLB = S.lower_bound_val(type, dim, V);
287 V = valFromAPInt(Ctx.get(), Range.getUpper(), true);
288 V = V.sub_ui(1);
289 isl::set SUB = S.upper_bound_val(type, dim, V);
290 S = SLB.unite(SUB);
293 return S;
296 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
297 LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
298 if (!BasePtrLI)
299 return nullptr;
301 if (!S->contains(BasePtrLI))
302 return nullptr;
304 ScalarEvolution &SE = *S->getSE();
306 auto *OriginBaseSCEV =
307 SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
308 if (!OriginBaseSCEV)
309 return nullptr;
311 auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
312 if (!OriginBaseSCEVUnknown)
313 return nullptr;
315 return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
316 MemoryKind::Array);
319 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx,
320 ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
321 const DataLayout &DL, Scop *S,
322 const char *BaseName)
323 : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
324 std::string BasePtrName =
325 BaseName ? BaseName
326 : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
327 Kind == MemoryKind::PHI ? "__phi" : "",
328 UseInstructionNames);
329 Id = isl::id::alloc(Ctx, BasePtrName, this);
331 updateSizes(Sizes);
333 if (!BasePtr || Kind != MemoryKind::Array) {
334 BasePtrOriginSAI = nullptr;
335 return;
338 BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
339 if (BasePtrOriginSAI)
340 const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
343 ScopArrayInfo::~ScopArrayInfo() = default;
345 isl::space ScopArrayInfo::getSpace() const {
346 auto Space = isl::space(Id.get_ctx(), 0, getNumberOfDimensions());
347 Space = Space.set_tuple_id(isl::dim::set, Id);
348 return Space;
351 bool ScopArrayInfo::isReadOnly() {
352 isl::union_set WriteSet = S.getWrites().range();
353 isl::space Space = getSpace();
354 WriteSet = WriteSet.extract_set(Space);
356 return bool(WriteSet.is_empty());
359 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const {
360 if (Array->getElementType() != getElementType())
361 return false;
363 if (Array->getNumberOfDimensions() != getNumberOfDimensions())
364 return false;
366 for (unsigned i = 0; i < getNumberOfDimensions(); i++)
367 if (Array->getDimensionSize(i) != getDimensionSize(i))
368 return false;
370 return true;
373 void ScopArrayInfo::updateElementType(Type *NewElementType) {
374 if (NewElementType == ElementType)
375 return;
377 auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
378 auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
380 if (NewElementSize == OldElementSize || NewElementSize == 0)
381 return;
383 if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) {
384 ElementType = NewElementType;
385 } else {
386 auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
387 ElementType = IntegerType::get(ElementType->getContext(), GCD);
391 /// Make the ScopArrayInfo model a Fortran Array
392 void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
393 assert(FAD && "got invalid Fortran array descriptor");
394 if (this->FAD) {
395 assert(this->FAD == FAD &&
396 "receiving different array descriptors for same array");
397 return;
400 assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
401 assert(!this->FAD);
402 this->FAD = FAD;
404 isl::space Space(S.getIslCtx(), 1, 0);
406 std::string param_name = getName();
407 param_name += "_fortranarr_size";
408 isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this);
410 Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff);
411 isl::pw_aff PwAff =
412 isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0);
414 DimensionSizesPw[0] = PwAff;
417 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
418 bool CheckConsistency) {
419 int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
420 int ExtraDimsNew = NewSizes.size() - SharedDims;
421 int ExtraDimsOld = DimensionSizes.size() - SharedDims;
423 if (CheckConsistency) {
424 for (int i = 0; i < SharedDims; i++) {
425 auto *NewSize = NewSizes[i + ExtraDimsNew];
426 auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
427 if (NewSize && KnownSize && NewSize != KnownSize)
428 return false;
431 if (DimensionSizes.size() >= NewSizes.size())
432 return true;
435 DimensionSizes.clear();
436 DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
437 NewSizes.end());
438 DimensionSizesPw.clear();
439 for (const SCEV *Expr : DimensionSizes) {
440 if (!Expr) {
441 DimensionSizesPw.push_back(nullptr);
442 continue;
444 isl::pw_aff Size = S.getPwAffOnly(Expr);
445 DimensionSizesPw.push_back(Size);
447 return true;
450 std::string ScopArrayInfo::getName() const { return Id.get_name(); }
452 int ScopArrayInfo::getElemSizeInBytes() const {
453 return DL.getTypeAllocSize(ElementType);
456 isl::id ScopArrayInfo::getBasePtrId() const { return Id; }
458 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
459 LLVM_DUMP_METHOD void ScopArrayInfo::dump() const { print(errs()); }
460 #endif
462 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
463 OS.indent(8) << *getElementType() << " " << getName();
464 unsigned u = 0;
465 // If this is a Fortran array, then we can print the outermost dimension
466 // as a isl_pw_aff even though there is no SCEV information.
467 bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
469 if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
470 !getDimensionSize(0)) {
471 OS << "[*]";
472 u++;
474 for (; u < getNumberOfDimensions(); u++) {
475 OS << "[";
477 if (SizeAsPwAff) {
478 isl::pw_aff Size = getDimensionSizePw(u);
479 OS << " " << Size << " ";
480 } else {
481 OS << *getDimensionSize(u);
484 OS << "]";
487 OS << ";";
489 if (BasePtrOriginSAI)
490 OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
492 OS << " // Element size " << getElemSizeInBytes() << "\n";
495 const ScopArrayInfo *
496 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA) {
497 isl::id Id = PMA.get_tuple_id(isl::dim::out);
498 assert(!Id.is_null() && "Output dimension didn't have an ID");
499 return getFromId(Id);
502 const ScopArrayInfo *ScopArrayInfo::getFromId(isl::id Id) {
503 void *User = Id.get_user();
504 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
505 return SAI;
508 void MemoryAccess::wrapConstantDimensions() {
509 auto *SAI = getScopArrayInfo();
510 isl::space ArraySpace = SAI->getSpace();
511 isl::ctx Ctx = ArraySpace.get_ctx();
512 unsigned DimsArray = SAI->getNumberOfDimensions();
514 isl::multi_aff DivModAff = isl::multi_aff::identity(
515 ArraySpace.map_from_domain_and_range(ArraySpace));
516 isl::local_space LArraySpace = isl::local_space(ArraySpace);
518 // Begin with last dimension, to iteratively carry into higher dimensions.
519 for (int i = DimsArray - 1; i > 0; i--) {
520 auto *DimSize = SAI->getDimensionSize(i);
521 auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
523 // This transformation is not applicable to dimensions with dynamic size.
524 if (!DimSizeCst)
525 continue;
527 // This transformation is not applicable to dimensions of size zero.
528 if (DimSize->isZero())
529 continue;
531 isl::val DimSizeVal =
532 valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false);
533 isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i);
534 isl::aff PrevVar =
535 isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1);
537 // Compute: index % size
538 // Modulo must apply in the divide of the previous iteration, if any.
539 isl::aff Modulo = Var.mod(DimSizeVal);
540 Modulo = Modulo.pullback(DivModAff);
542 // Compute: floor(index / size)
543 isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal));
544 Divide = Divide.floor();
545 Divide = Divide.add(PrevVar);
546 Divide = Divide.pullback(DivModAff);
548 // Apply Modulo and Divide.
549 DivModAff = DivModAff.set_aff(i, Modulo);
550 DivModAff = DivModAff.set_aff(i - 1, Divide);
553 // Apply all modulo/divides on the accesses.
554 isl::map Relation = AccessRelation;
555 Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff));
556 Relation = Relation.detect_equalities();
557 AccessRelation = Relation;
560 void MemoryAccess::updateDimensionality() {
561 auto *SAI = getScopArrayInfo();
562 isl::space ArraySpace = SAI->getSpace();
563 isl::space AccessSpace = AccessRelation.get_space().range();
564 isl::ctx Ctx = ArraySpace.get_ctx();
566 auto DimsArray = ArraySpace.dim(isl::dim::set);
567 auto DimsAccess = AccessSpace.dim(isl::dim::set);
568 auto DimsMissing = DimsArray - DimsAccess;
570 auto *BB = getStatement()->getEntryBlock();
571 auto &DL = BB->getModule()->getDataLayout();
572 unsigned ArrayElemSize = SAI->getElemSizeInBytes();
573 unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
575 isl::map Map = isl::map::from_domain_and_range(
576 isl::set::universe(AccessSpace), isl::set::universe(ArraySpace));
578 for (unsigned i = 0; i < DimsMissing; i++)
579 Map = Map.fix_si(isl::dim::out, i, 0);
581 for (unsigned i = DimsMissing; i < DimsArray; i++)
582 Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i);
584 AccessRelation = AccessRelation.apply_range(Map);
586 // For the non delinearized arrays, divide the access function of the last
587 // subscript by the size of the elements in the array.
589 // A stride one array access in C expressed as A[i] is expressed in
590 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
591 // two subsequent values of 'i' index two values that are stored next to
592 // each other in memory. By this division we make this characteristic
593 // obvious again. If the base pointer was accessed with offsets not divisible
594 // by the accesses element size, we will have chosen a smaller ArrayElemSize
595 // that divides the offsets of all accesses to this base pointer.
596 if (DimsAccess == 1) {
597 isl::val V = isl::val(Ctx, ArrayElemSize);
598 AccessRelation = AccessRelation.floordiv_val(V);
601 // We currently do this only if we added at least one dimension, which means
602 // some dimension's indices have not been specified, an indicator that some
603 // index values have been added together.
604 // TODO: Investigate general usefulness; Effect on unit tests is to make index
605 // expressions more complicated.
606 if (DimsMissing)
607 wrapConstantDimensions();
609 if (!isAffine())
610 computeBoundsOnAccessRelation(ArrayElemSize);
612 // Introduce multi-element accesses in case the type loaded by this memory
613 // access is larger than the canonical element type of the array.
615 // An access ((float *)A)[i] to an array char *A is modeled as
616 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
617 if (ElemBytes > ArrayElemSize) {
618 assert(ElemBytes % ArrayElemSize == 0 &&
619 "Loaded element size should be multiple of canonical element size");
620 isl::map Map = isl::map::from_domain_and_range(
621 isl::set::universe(ArraySpace), isl::set::universe(ArraySpace));
622 for (unsigned i = 0; i < DimsArray - 1; i++)
623 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
625 isl::constraint C;
626 isl::local_space LS;
628 LS = isl::local_space(Map.get_space());
629 int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
631 C = isl::constraint::alloc_inequality(LS);
632 C = C.set_constant_val(isl::val(Ctx, Num - 1));
633 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1);
634 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1);
635 Map = Map.add_constraint(C);
637 C = isl::constraint::alloc_inequality(LS);
638 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1);
639 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1);
640 C = C.set_constant_val(isl::val(Ctx, 0));
641 Map = Map.add_constraint(C);
642 AccessRelation = AccessRelation.apply_range(Map);
646 const std::string
647 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
648 switch (RT) {
649 case MemoryAccess::RT_NONE:
650 llvm_unreachable("Requested a reduction operator string for a memory "
651 "access which isn't a reduction");
652 case MemoryAccess::RT_ADD:
653 return "+";
654 case MemoryAccess::RT_MUL:
655 return "*";
656 case MemoryAccess::RT_BOR:
657 return "|";
658 case MemoryAccess::RT_BXOR:
659 return "^";
660 case MemoryAccess::RT_BAND:
661 return "&";
663 llvm_unreachable("Unknown reduction type");
666 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
667 isl::id ArrayId = getArrayId();
668 void *User = ArrayId.get_user();
669 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
670 return SAI;
673 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
674 isl::id ArrayId = getLatestArrayId();
675 void *User = ArrayId.get_user();
676 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
677 return SAI;
680 isl::id MemoryAccess::getOriginalArrayId() const {
681 return AccessRelation.get_tuple_id(isl::dim::out);
684 isl::id MemoryAccess::getLatestArrayId() const {
685 if (!hasNewAccessRelation())
686 return getOriginalArrayId();
687 return NewAccessRelation.get_tuple_id(isl::dim::out);
690 isl::map MemoryAccess::getAddressFunction() const {
691 return getAccessRelation().lexmin();
694 isl::pw_multi_aff
695 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule) const {
696 isl::map Schedule, ScheduledAccRel;
697 isl::union_set UDomain;
699 UDomain = getStatement()->getDomain();
700 USchedule = USchedule.intersect_domain(UDomain);
701 Schedule = isl::map::from_union_map(USchedule);
702 ScheduledAccRel = getAddressFunction().apply_domain(Schedule);
703 return isl::pw_multi_aff::from_map(ScheduledAccRel);
706 isl::map MemoryAccess::getOriginalAccessRelation() const {
707 return AccessRelation;
710 std::string MemoryAccess::getOriginalAccessRelationStr() const {
711 return AccessRelation.to_str();
714 isl::space MemoryAccess::getOriginalAccessRelationSpace() const {
715 return AccessRelation.get_space();
718 isl::map MemoryAccess::getNewAccessRelation() const {
719 return NewAccessRelation;
722 std::string MemoryAccess::getNewAccessRelationStr() const {
723 return NewAccessRelation.to_str();
726 std::string MemoryAccess::getAccessRelationStr() const {
727 return getAccessRelation().to_str();
730 isl::basic_map MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
731 isl::space Space = isl::space(Statement->getIslCtx(), 0, 1);
732 Space = Space.align_params(Statement->getDomainSpace());
734 return isl::basic_map::from_domain_and_range(
735 isl::basic_set::universe(Statement->getDomainSpace()),
736 isl::basic_set::universe(Space));
739 // Formalize no out-of-bound access assumption
741 // When delinearizing array accesses we optimistically assume that the
742 // delinearized accesses do not access out of bound locations (the subscript
743 // expression of each array evaluates for each statement instance that is
744 // executed to a value that is larger than zero and strictly smaller than the
745 // size of the corresponding dimension). The only exception is the outermost
746 // dimension for which we do not need to assume any upper bound. At this point
747 // we formalize this assumption to ensure that at code generation time the
748 // relevant run-time checks can be generated.
750 // To find the set of constraints necessary to avoid out of bound accesses, we
751 // first build the set of data locations that are not within array bounds. We
752 // then apply the reverse access relation to obtain the set of iterations that
753 // may contain invalid accesses and reduce this set of iterations to the ones
754 // that are actually executed by intersecting them with the domain of the
755 // statement. If we now project out all loop dimensions, we obtain a set of
756 // parameters that may cause statement instances to be executed that may
757 // possibly yield out of bound memory accesses. The complement of these
758 // constraints is the set of constraints that needs to be assumed to ensure such
759 // statement instances are never executed.
760 void MemoryAccess::assumeNoOutOfBound() {
761 if (PollyIgnoreInbounds)
762 return;
763 auto *SAI = getScopArrayInfo();
764 isl::space Space = getOriginalAccessRelationSpace().range();
765 isl::set Outside = isl::set::empty(Space);
766 for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) {
767 isl::local_space LS(Space);
768 isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i);
769 isl::pw_aff Zero = isl::pw_aff(LS);
771 isl::set DimOutside = Var.lt_set(Zero);
772 isl::pw_aff SizeE = SAI->getDimensionSizePw(i);
773 SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set));
774 SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set));
775 DimOutside = DimOutside.unite(SizeE.le_set(Var));
777 Outside = Outside.unite(DimOutside);
780 Outside = Outside.apply(getAccessRelation().reverse());
781 Outside = Outside.intersect(Statement->getDomain());
782 Outside = Outside.params();
784 // Remove divs to avoid the construction of overly complicated assumptions.
785 // Doing so increases the set of parameter combinations that are assumed to
786 // not appear. This is always save, but may make the resulting run-time check
787 // bail out more often than strictly necessary.
788 Outside = Outside.remove_divs();
789 Outside = Outside.complement();
790 const auto &Loc = getAccessInstruction()
791 ? getAccessInstruction()->getDebugLoc()
792 : DebugLoc();
793 if (!PollyPreciseInbounds)
794 Outside = Outside.gist_params(Statement->getDomain().params());
795 Statement->getParent()->recordAssumption(INBOUNDS, Outside, Loc,
796 AS_ASSUMPTION);
799 void MemoryAccess::buildMemIntrinsicAccessRelation() {
800 assert(isMemoryIntrinsic());
801 assert(Subscripts.size() == 2 && Sizes.size() == 1);
803 isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]);
804 isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA);
806 isl::map LengthMap;
807 if (Subscripts[1] == nullptr) {
808 LengthMap = isl::map::universe(SubscriptMap.get_space());
809 } else {
810 isl::pw_aff LengthPWA = getPwAff(Subscripts[1]);
811 LengthMap = isl::map::from_pw_aff(LengthPWA);
812 isl::space RangeSpace = LengthMap.get_space().range();
813 LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace));
815 LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0);
816 LengthMap = LengthMap.align_params(SubscriptMap.get_space());
817 SubscriptMap = SubscriptMap.align_params(LengthMap.get_space());
818 LengthMap = LengthMap.sum(SubscriptMap);
819 AccessRelation =
820 LengthMap.set_tuple_id(isl::dim::in, getStatement()->getDomainId());
823 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
824 ScalarEvolution *SE = Statement->getParent()->getSE();
826 auto MAI = MemAccInst(getAccessInstruction());
827 if (isa<MemIntrinsic>(MAI))
828 return;
830 Value *Ptr = MAI.getPointerOperand();
831 if (!Ptr || !SE->isSCEVable(Ptr->getType()))
832 return;
834 auto *PtrSCEV = SE->getSCEV(Ptr);
835 if (isa<SCEVCouldNotCompute>(PtrSCEV))
836 return;
838 auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
839 if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV))
840 PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
842 const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
843 if (Range.isFullSet())
844 return;
846 if (Range.isUpperWrapped() || Range.isSignWrappedSet())
847 return;
849 bool isWrapping = Range.isSignWrappedSet();
851 unsigned BW = Range.getBitWidth();
852 const auto One = APInt(BW, 1);
853 const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin();
854 const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax();
856 auto Min = LB.sdiv(APInt(BW, ElementSize));
857 auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
859 assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
861 isl::map Relation = AccessRelation;
862 isl::set AccessRange = Relation.range();
863 AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0,
864 isl::dim::set);
865 AccessRelation = Relation.intersect_range(AccessRange);
868 void MemoryAccess::foldAccessRelation() {
869 if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1]))
870 return;
872 int Size = Subscripts.size();
874 isl::map NewAccessRelation = AccessRelation;
876 for (int i = Size - 2; i >= 0; --i) {
877 isl::space Space;
878 isl::map MapOne, MapTwo;
879 isl::pw_aff DimSize = getPwAff(Sizes[i + 1]);
881 isl::space SpaceSize = DimSize.get_space();
882 isl::id ParamId = SpaceSize.get_dim_id(isl::dim::param, 0);
884 Space = AccessRelation.get_space();
885 Space = Space.range().map_from_set();
886 Space = Space.align_params(SpaceSize);
888 int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId);
890 MapOne = isl::map::universe(Space);
891 for (int j = 0; j < Size; ++j)
892 MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j);
893 MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0);
895 MapTwo = isl::map::universe(Space);
896 for (int j = 0; j < Size; ++j)
897 if (j < i || j > i + 1)
898 MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j);
900 isl::local_space LS(Space);
901 isl::constraint C;
902 C = isl::constraint::alloc_equality(LS);
903 C = C.set_constant_si(-1);
904 C = C.set_coefficient_si(isl::dim::in, i, 1);
905 C = C.set_coefficient_si(isl::dim::out, i, -1);
906 MapTwo = MapTwo.add_constraint(C);
907 C = isl::constraint::alloc_equality(LS);
908 C = C.set_coefficient_si(isl::dim::in, i + 1, 1);
909 C = C.set_coefficient_si(isl::dim::out, i + 1, -1);
910 C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1);
911 MapTwo = MapTwo.add_constraint(C);
912 MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1);
914 MapOne = MapOne.unite(MapTwo);
915 NewAccessRelation = NewAccessRelation.apply_range(MapOne);
918 isl::id BaseAddrId = getScopArrayInfo()->getBasePtrId();
919 isl::space Space = Statement->getDomainSpace();
920 NewAccessRelation = NewAccessRelation.set_tuple_id(
921 isl::dim::in, Space.get_tuple_id(isl::dim::set));
922 NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
923 NewAccessRelation = NewAccessRelation.gist_domain(Statement->getDomain());
925 // Access dimension folding might in certain cases increase the number of
926 // disjuncts in the memory access, which can possibly complicate the generated
927 // run-time checks and can lead to costly compilation.
928 if (!PollyPreciseFoldAccesses &&
929 NewAccessRelation.n_basic_map() > AccessRelation.n_basic_map()) {
930 } else {
931 AccessRelation = NewAccessRelation;
935 /// Check if @p Expr is divisible by @p Size.
936 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
937 assert(Size != 0);
938 if (Size == 1)
939 return true;
941 // Only one factor needs to be divisible.
942 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
943 for (auto *FactorExpr : MulExpr->operands())
944 if (isDivisible(FactorExpr, Size, SE))
945 return true;
946 return false;
949 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
950 // to be divisible.
951 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
952 for (auto *OpExpr : NAryExpr->operands())
953 if (!isDivisible(OpExpr, Size, SE))
954 return false;
955 return true;
958 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
959 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
960 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
961 return MulSCEV == Expr;
964 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
965 assert(AccessRelation.is_null() && "AccessRelation already built");
967 // Initialize the invalid domain which describes all iterations for which the
968 // access relation is not modeled correctly.
969 isl::set StmtInvalidDomain = getStatement()->getInvalidDomain();
970 InvalidDomain = isl::set::empty(StmtInvalidDomain.get_space());
972 isl::ctx Ctx = Id.get_ctx();
973 isl::id BaseAddrId = SAI->getBasePtrId();
975 if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) {
976 buildMemIntrinsicAccessRelation();
977 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
978 return;
981 if (!isAffine()) {
982 // We overapproximate non-affine accesses with a possible access to the
983 // whole array. For read accesses it does not make a difference, if an
984 // access must or may happen. However, for write accesses it is important to
985 // differentiate between writes that must happen and writes that may happen.
986 if (AccessRelation.is_null())
987 AccessRelation = createBasicAccessMap(Statement);
989 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
990 return;
993 isl::space Space = isl::space(Ctx, 0, Statement->getNumIterators(), 0);
994 AccessRelation = isl::map::universe(Space);
996 for (int i = 0, Size = Subscripts.size(); i < Size; ++i) {
997 isl::pw_aff Affine = getPwAff(Subscripts[i]);
998 isl::map SubscriptMap = isl::map::from_pw_aff(Affine);
999 AccessRelation = AccessRelation.flat_range_product(SubscriptMap);
1002 Space = Statement->getDomainSpace();
1003 AccessRelation = AccessRelation.set_tuple_id(
1004 isl::dim::in, Space.get_tuple_id(isl::dim::set));
1005 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1007 AccessRelation = AccessRelation.gist_domain(Statement->getDomain());
1010 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
1011 AccessType AccType, Value *BaseAddress,
1012 Type *ElementType, bool Affine,
1013 ArrayRef<const SCEV *> Subscripts,
1014 ArrayRef<const SCEV *> Sizes, Value *AccessValue,
1015 MemoryKind Kind)
1016 : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr),
1017 BaseAddr(BaseAddress), ElementType(ElementType),
1018 Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst),
1019 AccessValue(AccessValue), IsAffine(Affine),
1020 Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
1021 NewAccessRelation(nullptr), FAD(nullptr) {
1022 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1023 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1025 std::string IdName = Stmt->getBaseName() + Access;
1026 Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1029 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel)
1030 : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt),
1031 InvalidDomain(nullptr), AccessRelation(nullptr),
1032 NewAccessRelation(AccRel), FAD(nullptr) {
1033 isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out);
1034 auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
1035 Sizes.push_back(nullptr);
1036 for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++)
1037 Sizes.push_back(SAI->getDimensionSize(i));
1038 ElementType = SAI->getElementType();
1039 BaseAddr = SAI->getBasePtr();
1040 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1041 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1043 std::string IdName = Stmt->getBaseName() + Access;
1044 Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1047 MemoryAccess::~MemoryAccess() = default;
1049 void MemoryAccess::realignParams() {
1050 isl::set Ctx = Statement->getParent()->getContext();
1051 InvalidDomain = InvalidDomain.gist_params(Ctx);
1052 AccessRelation = AccessRelation.gist_params(Ctx);
1055 const std::string MemoryAccess::getReductionOperatorStr() const {
1056 return MemoryAccess::getReductionOperatorStr(getReductionType());
1059 isl::id MemoryAccess::getId() const { return Id; }
1061 raw_ostream &polly::operator<<(raw_ostream &OS,
1062 MemoryAccess::ReductionType RT) {
1063 if (RT == MemoryAccess::RT_NONE)
1064 OS << "NONE";
1065 else
1066 OS << MemoryAccess::getReductionOperatorStr(RT);
1067 return OS;
1070 void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; }
1072 void MemoryAccess::print(raw_ostream &OS) const {
1073 switch (AccType) {
1074 case READ:
1075 OS.indent(12) << "ReadAccess :=\t";
1076 break;
1077 case MUST_WRITE:
1078 OS.indent(12) << "MustWriteAccess :=\t";
1079 break;
1080 case MAY_WRITE:
1081 OS.indent(12) << "MayWriteAccess :=\t";
1082 break;
1085 OS << "[Reduction Type: " << getReductionType() << "] ";
1087 if (FAD) {
1088 OS << "[Fortran array descriptor: " << FAD->getName();
1089 OS << "] ";
1092 OS << "[Scalar: " << isScalarKind() << "]\n";
1093 OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1094 if (hasNewAccessRelation())
1095 OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1098 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1099 LLVM_DUMP_METHOD void MemoryAccess::dump() const { print(errs()); }
1100 #endif
1102 isl::pw_aff MemoryAccess::getPwAff(const SCEV *E) {
1103 auto *Stmt = getStatement();
1104 PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1105 isl::set StmtDom = getStatement()->getDomain();
1106 StmtDom = StmtDom.reset_tuple_id();
1107 isl::set NewInvalidDom = StmtDom.intersect(PWAC.second);
1108 InvalidDomain = InvalidDomain.unite(NewInvalidDom);
1109 return PWAC.first;
1112 // Create a map in the size of the provided set domain, that maps from the
1113 // one element of the provided set domain to another element of the provided
1114 // set domain.
1115 // The mapping is limited to all points that are equal in all but the last
1116 // dimension and for which the last dimension of the input is strict smaller
1117 // than the last dimension of the output.
1119 // getEqualAndLarger(set[i0, i1, ..., iX]):
1121 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1122 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1124 static isl::map getEqualAndLarger(isl::space SetDomain) {
1125 isl::space Space = SetDomain.map_from_set();
1126 isl::map Map = isl::map::universe(Space);
1127 unsigned lastDimension = Map.dim(isl::dim::in) - 1;
1129 // Set all but the last dimension to be equal for the input and output
1131 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1132 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1133 for (unsigned i = 0; i < lastDimension; ++i)
1134 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
1136 // Set the last dimension of the input to be strict smaller than the
1137 // last dimension of the output.
1139 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1140 Map = Map.order_lt(isl::dim::in, lastDimension, isl::dim::out, lastDimension);
1141 return Map;
1144 isl::set MemoryAccess::getStride(isl::map Schedule) const {
1145 isl::map AccessRelation = getAccessRelation();
1146 isl::space Space = Schedule.get_space().range();
1147 isl::map NextScatt = getEqualAndLarger(Space);
1149 Schedule = Schedule.reverse();
1150 NextScatt = NextScatt.lexmin();
1152 NextScatt = NextScatt.apply_range(Schedule);
1153 NextScatt = NextScatt.apply_range(AccessRelation);
1154 NextScatt = NextScatt.apply_domain(Schedule);
1155 NextScatt = NextScatt.apply_domain(AccessRelation);
1157 isl::set Deltas = NextScatt.deltas();
1158 return Deltas;
1161 bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const {
1162 isl::set Stride, StrideX;
1163 bool IsStrideX;
1165 Stride = getStride(Schedule);
1166 StrideX = isl::set::universe(Stride.get_space());
1167 for (unsigned i = 0; i < StrideX.dim(isl::dim::set) - 1; i++)
1168 StrideX = StrideX.fix_si(isl::dim::set, i, 0);
1169 StrideX = StrideX.fix_si(isl::dim::set, StrideX.dim(isl::dim::set) - 1,
1170 StrideWidth);
1171 IsStrideX = Stride.is_subset(StrideX);
1173 return IsStrideX;
1176 bool MemoryAccess::isStrideZero(isl::map Schedule) const {
1177 return isStrideX(Schedule, 0);
1180 bool MemoryAccess::isStrideOne(isl::map Schedule) const {
1181 return isStrideX(Schedule, 1);
1184 void MemoryAccess::setAccessRelation(isl::map NewAccess) {
1185 AccessRelation = NewAccess;
1188 void MemoryAccess::setNewAccessRelation(isl::map NewAccess) {
1189 assert(NewAccess);
1191 #ifndef NDEBUG
1192 // Check domain space compatibility.
1193 isl::space NewSpace = NewAccess.get_space();
1194 isl::space NewDomainSpace = NewSpace.domain();
1195 isl::space OriginalDomainSpace = getStatement()->getDomainSpace();
1196 assert(OriginalDomainSpace.has_equal_tuples(NewDomainSpace));
1198 // Reads must be executed unconditionally. Writes might be executed in a
1199 // subdomain only.
1200 if (isRead()) {
1201 // Check whether there is an access for every statement instance.
1202 isl::set StmtDomain = getStatement()->getDomain();
1203 StmtDomain =
1204 StmtDomain.intersect_params(getStatement()->getParent()->getContext());
1205 isl::set NewDomain = NewAccess.domain();
1206 assert(StmtDomain.is_subset(NewDomain) &&
1207 "Partial READ accesses not supported");
1210 isl::space NewAccessSpace = NewAccess.get_space();
1211 assert(NewAccessSpace.has_tuple_id(isl::dim::set) &&
1212 "Must specify the array that is accessed");
1213 isl::id NewArrayId = NewAccessSpace.get_tuple_id(isl::dim::set);
1214 auto *SAI = static_cast<ScopArrayInfo *>(NewArrayId.get_user());
1215 assert(SAI && "Must set a ScopArrayInfo");
1217 if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1218 InvariantEquivClassTy *EqClass =
1219 getStatement()->getParent()->lookupInvariantEquivClass(
1220 SAI->getBasePtr());
1221 assert(EqClass &&
1222 "Access functions to indirect arrays must have an invariant and "
1223 "hoisted base pointer");
1226 // Check whether access dimensions correspond to number of dimensions of the
1227 // accesses array.
1228 auto Dims = SAI->getNumberOfDimensions();
1229 assert(NewAccessSpace.dim(isl::dim::set) == Dims &&
1230 "Access dims must match array dims");
1231 #endif
1233 NewAccess = NewAccess.gist_domain(getStatement()->getDomain());
1234 NewAccessRelation = NewAccess;
1237 bool MemoryAccess::isLatestPartialAccess() const {
1238 isl::set StmtDom = getStatement()->getDomain();
1239 isl::set AccDom = getLatestAccessRelation().domain();
1241 return !StmtDom.is_subset(AccDom);
1244 //===----------------------------------------------------------------------===//
1246 isl::map ScopStmt::getSchedule() const {
1247 isl::set Domain = getDomain();
1248 if (Domain.is_empty())
1249 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1250 auto Schedule = getParent()->getSchedule();
1251 if (!Schedule)
1252 return nullptr;
1253 Schedule = Schedule.intersect_domain(isl::union_set(Domain));
1254 if (Schedule.is_empty())
1255 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1256 isl::map M = M.from_union_map(Schedule);
1257 M = M.coalesce();
1258 M = M.gist_domain(Domain);
1259 M = M.coalesce();
1260 return M;
1263 void ScopStmt::restrictDomain(isl::set NewDomain) {
1264 assert(NewDomain.is_subset(Domain) &&
1265 "New domain is not a subset of old domain!");
1266 Domain = NewDomain;
1269 void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
1270 Instruction *AccessInst = Access->getAccessInstruction();
1272 if (Access->isArrayKind()) {
1273 MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1274 MAL.emplace_front(Access);
1275 } else if (Access->isValueKind() && Access->isWrite()) {
1276 Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1277 assert(!ValueWrites.lookup(AccessVal));
1279 ValueWrites[AccessVal] = Access;
1280 } else if (Access->isValueKind() && Access->isRead()) {
1281 Value *AccessVal = Access->getAccessValue();
1282 assert(!ValueReads.lookup(AccessVal));
1284 ValueReads[AccessVal] = Access;
1285 } else if (Access->isAnyPHIKind() && Access->isWrite()) {
1286 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1287 assert(!PHIWrites.lookup(PHI));
1289 PHIWrites[PHI] = Access;
1290 } else if (Access->isAnyPHIKind() && Access->isRead()) {
1291 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1292 assert(!PHIReads.lookup(PHI));
1294 PHIReads[PHI] = Access;
1297 if (Prepend) {
1298 MemAccs.insert(MemAccs.begin(), Access);
1299 return;
1301 MemAccs.push_back(Access);
1304 void ScopStmt::realignParams() {
1305 for (MemoryAccess *MA : *this)
1306 MA->realignParams();
1308 isl::set Ctx = Parent.getContext();
1309 InvalidDomain = InvalidDomain.gist_params(Ctx);
1310 Domain = Domain.gist_params(Ctx);
1313 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
1314 static isl::set collectBoundedParts(isl::set S) {
1315 isl::set BoundedParts = isl::set::empty(S.get_space());
1316 for (isl::basic_set BSet : S.get_basic_set_list())
1317 if (BSet.is_bounded())
1318 BoundedParts = BoundedParts.unite(isl::set(BSet));
1319 return BoundedParts;
1322 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1324 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1325 /// both with regards to the dimension @p Dim.
1326 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
1327 unsigned Dim) {
1328 for (unsigned u = 0, e = S.n_dim(); u < e; u++)
1329 S = S.lower_bound_si(isl::dim::set, u, 0);
1331 unsigned NumDimsS = S.n_dim();
1332 isl::set OnlyDimS = S;
1334 // Remove dimensions that are greater than Dim as they are not interesting.
1335 assert(NumDimsS >= Dim + 1);
1336 OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
1338 // Create artificial parametric upper bounds for dimensions smaller than Dim
1339 // as we are not interested in them.
1340 OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
1342 for (unsigned u = 0; u < Dim; u++) {
1343 isl::constraint C = isl::constraint::alloc_inequality(
1344 isl::local_space(OnlyDimS.get_space()));
1345 C = C.set_coefficient_si(isl::dim::param, u, 1);
1346 C = C.set_coefficient_si(isl::dim::set, u, -1);
1347 OnlyDimS = OnlyDimS.add_constraint(C);
1350 // Collect all bounded parts of OnlyDimS.
1351 isl::set BoundedParts = collectBoundedParts(OnlyDimS);
1353 // Create the dimensions greater than Dim again.
1354 BoundedParts =
1355 BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
1357 // Remove the artificial upper bound parameters again.
1358 BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
1360 isl::set UnboundedParts = S.subtract(BoundedParts);
1361 return std::make_pair(UnboundedParts, BoundedParts);
1364 /// Create the conditions under which @p L @p Pred @p R is true.
1365 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
1366 isl::pw_aff R) {
1367 switch (Pred) {
1368 case ICmpInst::ICMP_EQ:
1369 return L.eq_set(R);
1370 case ICmpInst::ICMP_NE:
1371 return L.ne_set(R);
1372 case ICmpInst::ICMP_SLT:
1373 return L.lt_set(R);
1374 case ICmpInst::ICMP_SLE:
1375 return L.le_set(R);
1376 case ICmpInst::ICMP_SGT:
1377 return L.gt_set(R);
1378 case ICmpInst::ICMP_SGE:
1379 return L.ge_set(R);
1380 case ICmpInst::ICMP_ULT:
1381 return L.lt_set(R);
1382 case ICmpInst::ICMP_UGT:
1383 return L.gt_set(R);
1384 case ICmpInst::ICMP_ULE:
1385 return L.le_set(R);
1386 case ICmpInst::ICMP_UGE:
1387 return L.ge_set(R);
1388 default:
1389 llvm_unreachable("Non integer predicate not supported");
1393 /// Compute the isl representation for the SCEV @p E in this BB.
1395 /// @param S The Scop in which @p BB resides in.
1396 /// @param BB The BB for which isl representation is to be
1397 /// computed.
1398 /// @param InvalidDomainMap A map of BB to their invalid domains.
1399 /// @param E The SCEV that should be translated.
1400 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1402 /// Note that this function will also adjust the invalid context accordingly.
1404 __isl_give isl_pw_aff *
1405 getPwAff(Scop &S, BasicBlock *BB,
1406 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, const SCEV *E,
1407 bool NonNegative = false) {
1408 PWACtx PWAC = S.getPwAff(E, BB, NonNegative);
1409 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
1410 return PWAC.first.release();
1413 /// Build the conditions sets for the switch @p SI in the @p Domain.
1415 /// This will fill @p ConditionSets with the conditions under which control
1416 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1417 /// have as many elements as @p SI has successors.
1418 bool buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L,
1419 __isl_keep isl_set *Domain,
1420 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1421 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1422 Value *Condition = getConditionFromTerminator(SI);
1423 assert(Condition && "No condition for switch");
1425 ScalarEvolution &SE = *S.getSE();
1426 isl_pw_aff *LHS, *RHS;
1427 LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
1429 unsigned NumSuccessors = SI->getNumSuccessors();
1430 ConditionSets.resize(NumSuccessors);
1431 for (auto &Case : SI->cases()) {
1432 unsigned Idx = Case.getSuccessorIndex();
1433 ConstantInt *CaseValue = Case.getCaseValue();
1435 RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue));
1436 isl_set *CaseConditionSet =
1437 buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
1438 isl::manage(RHS))
1439 .release();
1440 ConditionSets[Idx] = isl_set_coalesce(
1441 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1444 assert(ConditionSets[0] == nullptr && "Default condition set was set");
1445 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1446 for (unsigned u = 2; u < NumSuccessors; u++)
1447 ConditionSetUnion =
1448 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1449 ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
1451 isl_pw_aff_free(LHS);
1453 return true;
1456 /// Build condition sets for unsigned ICmpInst(s).
1457 /// Special handling is required for unsigned operands to ensure that if
1458 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1459 /// it should wrap around.
1461 /// @param IsStrictUpperBound holds information on the predicate relation
1462 /// between TestVal and UpperBound, i.e,
1463 /// TestVal < UpperBound OR TestVal <= UpperBound
1464 __isl_give isl_set *
1465 buildUnsignedConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1466 __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal,
1467 const SCEV *SCEV_UpperBound,
1468 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1469 bool IsStrictUpperBound) {
1470 // Do not take NonNeg assumption on TestVal
1471 // as it might have MSB (Sign bit) set.
1472 isl_pw_aff *TestVal = getPwAff(S, BB, InvalidDomainMap, SCEV_TestVal, false);
1473 // Take NonNeg assumption on UpperBound.
1474 isl_pw_aff *UpperBound =
1475 getPwAff(S, BB, InvalidDomainMap, SCEV_UpperBound, true);
1477 // 0 <= TestVal
1478 isl_set *First =
1479 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1480 isl_pw_aff_get_domain_space(TestVal))),
1481 isl_pw_aff_copy(TestVal));
1483 isl_set *Second;
1484 if (IsStrictUpperBound)
1485 // TestVal < UpperBound
1486 Second = isl_pw_aff_lt_set(TestVal, UpperBound);
1487 else
1488 // TestVal <= UpperBound
1489 Second = isl_pw_aff_le_set(TestVal, UpperBound);
1491 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
1492 return ConsequenceCondSet;
1495 /// Build the conditions sets for the branch condition @p Condition in
1496 /// the @p Domain.
1498 /// This will fill @p ConditionSets with the conditions under which control
1499 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1500 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1501 /// context under which @p Condition is true/false will be returned as the
1502 /// new elements of @p ConditionSets.
1503 bool buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1504 Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
1505 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1506 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1507 ScalarEvolution &SE = *S.getSE();
1508 isl_set *ConsequenceCondSet = nullptr;
1510 if (auto Load = dyn_cast<LoadInst>(Condition)) {
1511 const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
1512 const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
1513 bool NonNeg = false;
1514 isl_pw_aff *LHS = getPwAff(S, BB, InvalidDomainMap, LHSSCEV, NonNeg);
1515 isl_pw_aff *RHS = getPwAff(S, BB, InvalidDomainMap, RHSSCEV, NonNeg);
1516 ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
1517 isl::manage(RHS))
1518 .release();
1519 } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
1520 auto *Unique = dyn_cast<ConstantInt>(
1521 getUniqueNonErrorValue(PHI, &S.getRegion(), *S.getLI(), *S.getDT()));
1523 if (Unique->isZero())
1524 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1525 else
1526 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1527 } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
1528 if (CCond->isZero())
1529 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1530 else
1531 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1532 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
1533 auto Opcode = BinOp->getOpcode();
1534 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1536 bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain,
1537 InvalidDomainMap, ConditionSets) &&
1538 buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain,
1539 InvalidDomainMap, ConditionSets);
1540 if (!Valid) {
1541 while (!ConditionSets.empty())
1542 isl_set_free(ConditionSets.pop_back_val());
1543 return false;
1546 isl_set_free(ConditionSets.pop_back_val());
1547 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1548 isl_set_free(ConditionSets.pop_back_val());
1549 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1551 if (Opcode == Instruction::And)
1552 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1553 else
1554 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1555 } else {
1556 auto *ICond = dyn_cast<ICmpInst>(Condition);
1557 assert(ICond &&
1558 "Condition of exiting branch was neither constant nor ICmp!");
1560 LoopInfo &LI = *S.getLI();
1561 DominatorTree &DT = *S.getDT();
1562 Region &R = S.getRegion();
1564 isl_pw_aff *LHS, *RHS;
1565 // For unsigned comparisons we assumed the signed bit of neither operand
1566 // to be set. The comparison is equal to a signed comparison under this
1567 // assumption.
1568 bool NonNeg = ICond->isUnsigned();
1569 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
1570 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
1572 LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, LI, DT);
1573 RightOperand = tryForwardThroughPHI(RightOperand, R, SE, LI, DT);
1575 switch (ICond->getPredicate()) {
1576 case ICmpInst::ICMP_ULT:
1577 ConsequenceCondSet =
1578 buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1579 RightOperand, InvalidDomainMap, true);
1580 break;
1581 case ICmpInst::ICMP_ULE:
1582 ConsequenceCondSet =
1583 buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1584 RightOperand, InvalidDomainMap, false);
1585 break;
1586 case ICmpInst::ICMP_UGT:
1587 ConsequenceCondSet =
1588 buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1589 LeftOperand, InvalidDomainMap, true);
1590 break;
1591 case ICmpInst::ICMP_UGE:
1592 ConsequenceCondSet =
1593 buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1594 LeftOperand, InvalidDomainMap, false);
1595 break;
1596 default:
1597 LHS = getPwAff(S, BB, InvalidDomainMap, LeftOperand, NonNeg);
1598 RHS = getPwAff(S, BB, InvalidDomainMap, RightOperand, NonNeg);
1599 ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
1600 isl::manage(LHS), isl::manage(RHS))
1601 .release();
1602 break;
1606 // If no terminator was given we are only looking for parameter constraints
1607 // under which @p Condition is true/false.
1608 if (!TI)
1609 ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1610 assert(ConsequenceCondSet);
1611 ConsequenceCondSet = isl_set_coalesce(
1612 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1614 isl_set *AlternativeCondSet = nullptr;
1615 bool TooComplex =
1616 isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1618 if (!TooComplex) {
1619 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1620 isl_set_copy(ConsequenceCondSet));
1621 TooComplex =
1622 isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1625 if (TooComplex) {
1626 S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
1627 TI ? TI->getParent() : nullptr /* BasicBlock */);
1628 isl_set_free(AlternativeCondSet);
1629 isl_set_free(ConsequenceCondSet);
1630 return false;
1633 ConditionSets.push_back(ConsequenceCondSet);
1634 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1636 return true;
1639 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1641 /// This will fill @p ConditionSets with the conditions under which control
1642 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1643 /// have as many elements as @p TI has successors.
1644 bool buildConditionSets(Scop &S, BasicBlock *BB, Instruction *TI, Loop *L,
1645 __isl_keep isl_set *Domain,
1646 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1647 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1648 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1649 return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap,
1650 ConditionSets);
1652 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1654 if (TI->getNumSuccessors() == 1) {
1655 ConditionSets.push_back(isl_set_copy(Domain));
1656 return true;
1659 Value *Condition = getConditionFromTerminator(TI);
1660 assert(Condition && "No condition for Terminator");
1662 return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap,
1663 ConditionSets);
1666 ScopStmt::ScopStmt(Scop &parent, Region &R, StringRef Name,
1667 Loop *SurroundingLoop,
1668 std::vector<Instruction *> EntryBlockInstructions)
1669 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R),
1670 Build(nullptr), BaseName(Name), SurroundingLoop(SurroundingLoop),
1671 Instructions(EntryBlockInstructions) {}
1673 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, StringRef Name,
1674 Loop *SurroundingLoop,
1675 std::vector<Instruction *> Instructions)
1676 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1677 Build(nullptr), BaseName(Name), SurroundingLoop(SurroundingLoop),
1678 Instructions(Instructions) {}
1680 ScopStmt::ScopStmt(Scop &parent, isl::map SourceRel, isl::map TargetRel,
1681 isl::set NewDomain)
1682 : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain),
1683 Build(nullptr) {
1684 BaseName = getIslCompatibleName("CopyStmt_", "",
1685 std::to_string(parent.getCopyStmtsNum()));
1686 isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1687 Domain = Domain.set_tuple_id(Id);
1688 TargetRel = TargetRel.set_tuple_id(isl::dim::in, Id);
1689 auto *Access =
1690 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1691 parent.addAccessFunction(Access);
1692 addAccess(Access);
1693 SourceRel = SourceRel.set_tuple_id(isl::dim::in, Id);
1694 Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1695 parent.addAccessFunction(Access);
1696 addAccess(Access);
1699 ScopStmt::~ScopStmt() = default;
1701 std::string ScopStmt::getDomainStr() const { return Domain.to_str(); }
1703 std::string ScopStmt::getScheduleStr() const {
1704 auto *S = getSchedule().release();
1705 if (!S)
1706 return {};
1707 auto Str = stringFromIslObj(S);
1708 isl_map_free(S);
1709 return Str;
1712 void ScopStmt::setInvalidDomain(isl::set ID) { InvalidDomain = ID; }
1714 BasicBlock *ScopStmt::getEntryBlock() const {
1715 if (isBlockStmt())
1716 return getBasicBlock();
1717 return getRegion()->getEntry();
1720 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1722 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1724 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1725 return NestLoops[Dimension];
1728 isl::ctx ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1730 isl::set ScopStmt::getDomain() const { return Domain; }
1732 isl::space ScopStmt::getDomainSpace() const { return Domain.get_space(); }
1734 isl::id ScopStmt::getDomainId() const { return Domain.get_tuple_id(); }
1736 void ScopStmt::printInstructions(raw_ostream &OS) const {
1737 OS << "Instructions {\n";
1739 for (Instruction *Inst : Instructions)
1740 OS.indent(16) << *Inst << "\n";
1742 OS.indent(12) << "}\n";
1745 void ScopStmt::print(raw_ostream &OS, bool PrintInstructions) const {
1746 OS << "\t" << getBaseName() << "\n";
1747 OS.indent(12) << "Domain :=\n";
1749 if (Domain) {
1750 OS.indent(16) << getDomainStr() << ";\n";
1751 } else
1752 OS.indent(16) << "n/a\n";
1754 OS.indent(12) << "Schedule :=\n";
1756 if (Domain) {
1757 OS.indent(16) << getScheduleStr() << ";\n";
1758 } else
1759 OS.indent(16) << "n/a\n";
1761 for (MemoryAccess *Access : MemAccs)
1762 Access->print(OS);
1764 if (PrintInstructions)
1765 printInstructions(OS.indent(12));
1768 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1769 LLVM_DUMP_METHOD void ScopStmt::dump() const { print(dbgs(), true); }
1770 #endif
1772 void ScopStmt::removeAccessData(MemoryAccess *MA) {
1773 if (MA->isRead() && MA->isOriginalValueKind()) {
1774 bool Found = ValueReads.erase(MA->getAccessValue());
1775 (void)Found;
1776 assert(Found && "Expected access data not found");
1778 if (MA->isWrite() && MA->isOriginalValueKind()) {
1779 bool Found = ValueWrites.erase(cast<Instruction>(MA->getAccessValue()));
1780 (void)Found;
1781 assert(Found && "Expected access data not found");
1783 if (MA->isWrite() && MA->isOriginalAnyPHIKind()) {
1784 bool Found = PHIWrites.erase(cast<PHINode>(MA->getAccessInstruction()));
1785 (void)Found;
1786 assert(Found && "Expected access data not found");
1788 if (MA->isRead() && MA->isOriginalAnyPHIKind()) {
1789 bool Found = PHIReads.erase(cast<PHINode>(MA->getAccessInstruction()));
1790 (void)Found;
1791 assert(Found && "Expected access data not found");
1795 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
1796 // Remove the memory accesses from this statement together with all scalar
1797 // accesses that were caused by it. MemoryKind::Value READs have no access
1798 // instruction, hence would not be removed by this function. However, it is
1799 // only used for invariant LoadInst accesses, its arguments are always affine,
1800 // hence synthesizable, and therefore there are no MemoryKind::Value READ
1801 // accesses to be removed.
1802 auto Predicate = [&](MemoryAccess *Acc) {
1803 return Acc->getAccessInstruction() == MA->getAccessInstruction();
1805 for (auto *MA : MemAccs) {
1806 if (Predicate(MA)) {
1807 removeAccessData(MA);
1808 Parent.removeAccessData(MA);
1811 MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
1812 MemAccs.end());
1813 InstructionToAccess.erase(MA->getAccessInstruction());
1816 void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA, bool AfterHoisting) {
1817 if (AfterHoisting) {
1818 auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
1819 assert(MAIt != MemAccs.end());
1820 MemAccs.erase(MAIt);
1822 removeAccessData(MA);
1823 Parent.removeAccessData(MA);
1826 auto It = InstructionToAccess.find(MA->getAccessInstruction());
1827 if (It != InstructionToAccess.end()) {
1828 It->second.remove(MA);
1829 if (It->second.empty())
1830 InstructionToAccess.erase(MA->getAccessInstruction());
1834 MemoryAccess *ScopStmt::ensureValueRead(Value *V) {
1835 MemoryAccess *Access = lookupInputAccessOf(V);
1836 if (Access)
1837 return Access;
1839 ScopArrayInfo *SAI =
1840 Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value);
1841 Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(),
1842 true, {}, {}, V, MemoryKind::Value);
1843 Parent.addAccessFunction(Access);
1844 Access->buildAccessRelation(SAI);
1845 addAccess(Access);
1846 Parent.addAccessData(Access);
1847 return Access;
1850 raw_ostream &polly::operator<<(raw_ostream &OS, const ScopStmt &S) {
1851 S.print(OS, PollyPrintInstructions);
1852 return OS;
1855 //===----------------------------------------------------------------------===//
1856 /// Scop class implement
1858 void Scop::setContext(isl::set NewContext) {
1859 Context = NewContext.align_params(Context.get_space());
1862 namespace {
1864 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1865 struct SCEVSensitiveParameterRewriter
1866 : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
1867 const ValueToValueMap &VMap;
1869 public:
1870 SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap,
1871 ScalarEvolution &SE)
1872 : SCEVRewriteVisitor(SE), VMap(VMap) {}
1874 static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
1875 const ValueToValueMap &VMap) {
1876 SCEVSensitiveParameterRewriter SSPR(VMap, SE);
1877 return SSPR.visit(E);
1880 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
1881 auto *Start = visit(E->getStart());
1882 auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
1883 visit(E->getStepRecurrence(SE)),
1884 E->getLoop(), SCEV::FlagAnyWrap);
1885 return SE.getAddExpr(Start, AddRec);
1888 const SCEV *visitUnknown(const SCEVUnknown *E) {
1889 if (auto *NewValue = VMap.lookup(E->getValue()))
1890 return SE.getUnknown(NewValue);
1891 return E;
1895 /// Check whether we should remap a SCEV expression.
1896 struct SCEVFindInsideScop : public SCEVTraversal<SCEVFindInsideScop> {
1897 const ValueToValueMap &VMap;
1898 bool FoundInside = false;
1899 const Scop *S;
1901 public:
1902 SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE,
1903 const Scop *S)
1904 : SCEVTraversal(*this), VMap(VMap), S(S) {}
1906 static bool hasVariant(const SCEV *E, ScalarEvolution &SE,
1907 const ValueToValueMap &VMap, const Scop *S) {
1908 SCEVFindInsideScop SFIS(VMap, SE, S);
1909 SFIS.visitAll(E);
1910 return SFIS.FoundInside;
1913 bool follow(const SCEV *E) {
1914 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E)) {
1915 FoundInside |= S->getRegion().contains(AddRec->getLoop());
1916 } else if (auto *Unknown = dyn_cast<SCEVUnknown>(E)) {
1917 if (Instruction *I = dyn_cast<Instruction>(Unknown->getValue()))
1918 FoundInside |= S->getRegion().contains(I) && !VMap.count(I);
1920 return !FoundInside;
1923 bool isDone() { return FoundInside; }
1925 } // end anonymous namespace
1927 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const {
1928 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
1929 // doesn't like addition between an AddRec and an expression that
1930 // doesn't have a dominance relationship with it.)
1931 if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this))
1932 return E;
1934 // Rewrite SCEV.
1935 return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
1938 // This table of function names is used to translate parameter names in more
1939 // human-readable names. This makes it easier to interpret Polly analysis
1940 // results.
1941 StringMap<std::string> KnownNames = {
1942 {"_Z13get_global_idj", "global_id"},
1943 {"_Z12get_local_idj", "local_id"},
1944 {"_Z15get_global_sizej", "global_size"},
1945 {"_Z14get_local_sizej", "local_size"},
1946 {"_Z12get_work_dimv", "work_dim"},
1947 {"_Z17get_global_offsetj", "global_offset"},
1948 {"_Z12get_group_idj", "group_id"},
1949 {"_Z14get_num_groupsj", "num_groups"},
1952 static std::string getCallParamName(CallInst *Call) {
1953 std::string Result;
1954 raw_string_ostream OS(Result);
1955 std::string Name = Call->getCalledFunction()->getName();
1957 auto Iterator = KnownNames.find(Name);
1958 if (Iterator != KnownNames.end())
1959 Name = "__" + Iterator->getValue();
1960 OS << Name;
1961 for (auto &Operand : Call->arg_operands()) {
1962 ConstantInt *Op = cast<ConstantInt>(&Operand);
1963 OS << "_" << Op->getValue();
1965 OS.flush();
1966 return Result;
1969 void Scop::createParameterId(const SCEV *Parameter) {
1970 assert(Parameters.count(Parameter));
1971 assert(!ParameterIds.count(Parameter));
1973 std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
1975 if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
1976 Value *Val = ValueParameter->getValue();
1977 CallInst *Call = dyn_cast<CallInst>(Val);
1979 if (Call && isConstCall(Call)) {
1980 ParameterName = getCallParamName(Call);
1981 } else if (UseInstructionNames) {
1982 // If this parameter references a specific Value and this value has a name
1983 // we use this name as it is likely to be unique and more useful than just
1984 // a number.
1985 if (Val->hasName())
1986 ParameterName = Val->getName();
1987 else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) {
1988 auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
1989 if (LoadOrigin->hasName()) {
1990 ParameterName += "_loaded_from_";
1991 ParameterName +=
1992 LI->getPointerOperand()->stripInBoundsOffsets()->getName();
1997 ParameterName = getIslCompatibleName("", ParameterName, "");
2000 isl::id Id = isl::id::alloc(getIslCtx(), ParameterName,
2001 const_cast<void *>((const void *)Parameter));
2002 ParameterIds[Parameter] = Id;
2005 void Scop::addParams(const ParameterSetTy &NewParameters) {
2006 for (const SCEV *Parameter : NewParameters) {
2007 // Normalize the SCEV to get the representing element for an invariant load.
2008 Parameter = extractConstantFactor(Parameter, *SE).second;
2009 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2011 if (Parameters.insert(Parameter))
2012 createParameterId(Parameter);
2016 isl::id Scop::getIdForParam(const SCEV *Parameter) const {
2017 // Normalize the SCEV to get the representing element for an invariant load.
2018 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2019 return ParameterIds.lookup(Parameter);
2022 isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
2023 isl::set DomainContext = getDomains().params();
2024 return C.intersect_params(DomainContext);
2027 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
2028 return DT.dominates(BB, getEntry());
2031 void Scop::addUserAssumptions(
2032 AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
2033 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2034 for (auto &Assumption : AC.assumptions()) {
2035 auto *CI = dyn_cast_or_null<CallInst>(Assumption);
2036 if (!CI || CI->getNumArgOperands() != 1)
2037 continue;
2039 bool InScop = contains(CI);
2040 if (!InScop && !isDominatedBy(DT, CI->getParent()))
2041 continue;
2043 auto *L = LI.getLoopFor(CI->getParent());
2044 auto *Val = CI->getArgOperand(0);
2045 ParameterSetTy DetectedParams;
2046 if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) {
2047 ORE.emit(
2048 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
2049 << "Non-affine user assumption ignored.");
2050 continue;
2053 // Collect all newly introduced parameters.
2054 ParameterSetTy NewParams;
2055 for (auto *Param : DetectedParams) {
2056 Param = extractConstantFactor(Param, *SE).second;
2057 Param = getRepresentingInvariantLoadSCEV(Param);
2058 if (Parameters.count(Param))
2059 continue;
2060 NewParams.insert(Param);
2063 SmallVector<isl_set *, 2> ConditionSets;
2064 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
2065 BasicBlock *BB = InScop ? CI->getParent() : getRegion().getEntry();
2066 auto *Dom = InScop ? DomainMap[BB].copy() : Context.copy();
2067 assert(Dom && "Cannot propagate a nullptr.");
2068 bool Valid = buildConditionSets(*this, BB, Val, TI, L, Dom,
2069 InvalidDomainMap, ConditionSets);
2070 isl_set_free(Dom);
2072 if (!Valid)
2073 continue;
2075 isl_set *AssumptionCtx = nullptr;
2076 if (InScop) {
2077 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
2078 isl_set_free(ConditionSets[0]);
2079 } else {
2080 AssumptionCtx = isl_set_complement(ConditionSets[1]);
2081 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
2084 // Project out newly introduced parameters as they are not otherwise useful.
2085 if (!NewParams.empty()) {
2086 for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
2087 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2088 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2089 isl_id_free(Id);
2091 if (!NewParams.count(Param))
2092 continue;
2094 AssumptionCtx =
2095 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2098 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
2099 << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
2100 Context = Context.intersect(isl::manage(AssumptionCtx));
2104 void Scop::addUserContext() {
2105 if (UserContextStr.empty())
2106 return;
2108 isl::set UserContext = isl::set(getIslCtx(), UserContextStr.c_str());
2109 isl::space Space = getParamSpace();
2110 if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) {
2111 std::string SpaceStr = Space.to_str();
2112 errs() << "Error: the context provided in -polly-context has not the same "
2113 << "number of dimensions than the computed context. Due to this "
2114 << "mismatch, the -polly-context option is ignored. Please provide "
2115 << "the context in the parameter space: " << SpaceStr << ".\n";
2116 return;
2119 for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) {
2120 std::string NameContext = Context.get_dim_name(isl::dim::param, i);
2121 std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2123 if (NameContext != NameUserContext) {
2124 std::string SpaceStr = Space.to_str();
2125 errs() << "Error: the name of dimension " << i
2126 << " provided in -polly-context "
2127 << "is '" << NameUserContext << "', but the name in the computed "
2128 << "context is '" << NameContext
2129 << "'. Due to this name mismatch, "
2130 << "the -polly-context option is ignored. Please provide "
2131 << "the context in the parameter space: " << SpaceStr << ".\n";
2132 return;
2135 UserContext = UserContext.set_dim_id(isl::dim::param, i,
2136 Space.get_dim_id(isl::dim::param, i));
2139 Context = Context.intersect(UserContext);
2142 void Scop::buildInvariantEquivalenceClasses() {
2143 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2145 const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2146 for (LoadInst *LInst : RIL) {
2147 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2149 Type *Ty = LInst->getType();
2150 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2151 if (ClassRep) {
2152 InvEquivClassVMap[LInst] = ClassRep;
2153 continue;
2156 ClassRep = LInst;
2157 InvariantEquivClasses.emplace_back(
2158 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2162 void Scop::buildContext() {
2163 isl::space Space = isl::space::params_alloc(getIslCtx(), 0);
2164 Context = isl::set::universe(Space);
2165 InvalidContext = isl::set::empty(Space);
2166 AssumedContext = isl::set::universe(Space);
2169 void Scop::addParameterBounds() {
2170 unsigned PDim = 0;
2171 for (auto *Parameter : Parameters) {
2172 ConstantRange SRange = SE->getSignedRange(Parameter);
2173 Context = addRangeBoundsToSet(Context, SRange, PDim++, isl::dim::param);
2177 static std::vector<isl::id> getFortranArrayIds(Scop::array_range Arrays) {
2178 std::vector<isl::id> OutermostSizeIds;
2179 for (auto Array : Arrays) {
2180 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2181 // for its outermost dimension. Fortran arrays will have this since the
2182 // outermost dimension size can be picked up from their runtime description.
2183 // TODO: actually need to check if it has a FAD, but for now this works.
2184 if (Array->getNumberOfDimensions() > 0) {
2185 isl::pw_aff PwAff = Array->getDimensionSizePw(0);
2186 if (!PwAff)
2187 continue;
2189 isl::id Id = PwAff.get_dim_id(isl::dim::param, 0);
2190 assert(!Id.is_null() &&
2191 "Invalid Id for PwAff expression in Fortran array");
2192 OutermostSizeIds.push_back(Id);
2195 return OutermostSizeIds;
2198 // The FORTRAN array size parameters are known to be non-negative.
2199 static isl::set boundFortranArrayParams(isl::set Context,
2200 Scop::array_range Arrays) {
2201 std::vector<isl::id> OutermostSizeIds;
2202 OutermostSizeIds = getFortranArrayIds(Arrays);
2204 for (isl::id Id : OutermostSizeIds) {
2205 int dim = Context.find_dim_by_id(isl::dim::param, Id);
2206 Context = Context.lower_bound_si(isl::dim::param, dim, 0);
2209 return Context;
2212 void Scop::realignParams() {
2213 if (PollyIgnoreParamBounds)
2214 return;
2216 // Add all parameters into a common model.
2217 isl::space Space = getFullParamSpace();
2219 // Align the parameters of all data structures to the model.
2220 Context = Context.align_params(Space);
2222 // Bound the size of the fortran array dimensions.
2223 Context = boundFortranArrayParams(Context, arrays());
2225 // As all parameters are known add bounds to them.
2226 addParameterBounds();
2228 for (ScopStmt &Stmt : *this)
2229 Stmt.realignParams();
2230 // Simplify the schedule according to the context too.
2231 Schedule = Schedule.gist_domain_params(getContext());
2234 static isl::set simplifyAssumptionContext(isl::set AssumptionContext,
2235 const Scop &S) {
2236 // If we have modeled all blocks in the SCoP that have side effects we can
2237 // simplify the context with the constraints that are needed for anything to
2238 // be executed at all. However, if we have error blocks in the SCoP we already
2239 // assumed some parameter combinations cannot occur and removed them from the
2240 // domains, thus we cannot use the remaining domain to simplify the
2241 // assumptions.
2242 if (!S.hasErrorBlock()) {
2243 auto DomainParameters = S.getDomains().params();
2244 AssumptionContext = AssumptionContext.gist_params(DomainParameters);
2247 AssumptionContext = AssumptionContext.gist_params(S.getContext());
2248 return AssumptionContext;
2251 void Scop::simplifyContexts() {
2252 // The parameter constraints of the iteration domains give us a set of
2253 // constraints that need to hold for all cases where at least a single
2254 // statement iteration is executed in the whole scop. We now simplify the
2255 // assumed context under the assumption that such constraints hold and at
2256 // least a single statement iteration is executed. For cases where no
2257 // statement instances are executed, the assumptions we have taken about
2258 // the executed code do not matter and can be changed.
2260 // WARNING: This only holds if the assumptions we have taken do not reduce
2261 // the set of statement instances that are executed. Otherwise we
2262 // may run into a case where the iteration domains suggest that
2263 // for a certain set of parameter constraints no code is executed,
2264 // but in the original program some computation would have been
2265 // performed. In such a case, modifying the run-time conditions and
2266 // possibly influencing the run-time check may cause certain scops
2267 // to not be executed.
2269 // Example:
2271 // When delinearizing the following code:
2273 // for (long i = 0; i < 100; i++)
2274 // for (long j = 0; j < m; j++)
2275 // A[i+p][j] = 1.0;
2277 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2278 // otherwise we would access out of bound data. Now, knowing that code is
2279 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2280 AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2281 InvalidContext = InvalidContext.align_params(getParamSpace());
2284 /// Add the minimal/maximal access in @p Set to @p User.
2286 /// @return True if more accesses should be added, false if we reached the
2287 /// maximal number of run-time checks to be generated.
2288 static bool buildMinMaxAccess(isl::set Set,
2289 Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2290 isl::pw_multi_aff MinPMA, MaxPMA;
2291 isl::pw_aff LastDimAff;
2292 isl::aff OneAff;
2293 unsigned Pos;
2295 Set = Set.remove_divs();
2296 polly::simplify(Set);
2298 if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts)
2299 Set = Set.simple_hull();
2301 // Restrict the number of parameters involved in the access as the lexmin/
2302 // lexmax computation will take too long if this number is high.
2304 // Experiments with a simple test case using an i7 4800MQ:
2306 // #Parameters involved | Time (in sec)
2307 // 6 | 0.01
2308 // 7 | 0.04
2309 // 8 | 0.12
2310 // 9 | 0.40
2311 // 10 | 1.54
2312 // 11 | 6.78
2313 // 12 | 30.38
2315 if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2316 unsigned InvolvedParams = 0;
2317 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
2318 if (Set.involves_dims(isl::dim::param, u, 1))
2319 InvolvedParams++;
2321 if (InvolvedParams > RunTimeChecksMaxParameters)
2322 return false;
2325 MinPMA = Set.lexmin_pw_multi_aff();
2326 MaxPMA = Set.lexmax_pw_multi_aff();
2328 MinPMA = MinPMA.coalesce();
2329 MaxPMA = MaxPMA.coalesce();
2331 // Adjust the last dimension of the maximal access by one as we want to
2332 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2333 // we test during code generation might now point after the end of the
2334 // allocated array but we will never dereference it anyway.
2335 assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) &&
2336 "Assumed at least one output dimension");
2338 Pos = MaxPMA.dim(isl::dim::out) - 1;
2339 LastDimAff = MaxPMA.get_pw_aff(Pos);
2340 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2341 OneAff = OneAff.add_constant_si(1);
2342 LastDimAff = LastDimAff.add(OneAff);
2343 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2345 if (!MinPMA || !MaxPMA)
2346 return false;
2348 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
2350 return true;
2353 static isl::set getAccessDomain(MemoryAccess *MA) {
2354 isl::set Domain = MA->getStatement()->getDomain();
2355 Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim());
2356 return Domain.reset_tuple_id();
2359 /// Wrapper function to calculate minimal/maximal accesses to each array.
2360 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2361 Scop::MinMaxVectorTy &MinMaxAccesses) {
2362 MinMaxAccesses.reserve(AliasGroup.size());
2364 isl::union_set Domains = S.getDomains();
2365 isl::union_map Accesses = isl::union_map::empty(S.getParamSpace());
2367 for (MemoryAccess *MA : AliasGroup)
2368 Accesses = Accesses.add_map(MA->getAccessRelation());
2370 Accesses = Accesses.intersect_domain(Domains);
2371 isl::union_set Locations = Accesses.range();
2373 bool LimitReached = false;
2374 for (isl::set Set : Locations.get_set_list()) {
2375 LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, S);
2376 if (LimitReached)
2377 break;
2380 return !LimitReached;
2383 /// Helper to treat non-affine regions and basic blocks the same.
2385 ///{
2387 /// Return the block that is the representing block for @p RN.
2388 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2389 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2390 : RN->getNodeAs<BasicBlock>();
2393 /// Return the @p idx'th block that is executed after @p RN.
2394 static inline BasicBlock *
2395 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
2396 if (RN->isSubRegion()) {
2397 assert(idx == 0);
2398 return RN->getNodeAs<Region>()->getExit();
2400 return TI->getSuccessor(idx);
2403 /// Return the smallest loop surrounding @p RN.
2404 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2405 if (!RN->isSubRegion()) {
2406 BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2407 Loop *L = LI.getLoopFor(BB);
2409 // Unreachable statements are not considered to belong to a LLVM loop, as
2410 // they are not part of an actual loop in the control flow graph.
2411 // Nevertheless, we handle certain unreachable statements that are common
2412 // when modeling run-time bounds checks as being part of the loop to be
2413 // able to model them and to later eliminate the run-time bounds checks.
2415 // Specifically, for basic blocks that terminate in an unreachable and
2416 // where the immediate predecessor is part of a loop, we assume these
2417 // basic blocks belong to the loop the predecessor belongs to. This
2418 // allows us to model the following code.
2420 // for (i = 0; i < N; i++) {
2421 // if (i > 1024)
2422 // abort(); <- this abort might be translated to an
2423 // unreachable
2425 // A[i] = ...
2426 // }
2427 if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
2428 L = LI.getLoopFor(BB->getPrevNode());
2429 return L;
2432 Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2433 Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2434 while (L && NonAffineSubRegion->contains(L))
2435 L = L->getParentLoop();
2436 return L;
2439 /// Get the number of blocks in @p L.
2441 /// The number of blocks in a loop are the number of basic blocks actually
2442 /// belonging to the loop, as well as all single basic blocks that the loop
2443 /// exits to and which terminate in an unreachable instruction. We do not
2444 /// allow such basic blocks in the exit of a scop, hence they belong to the
2445 /// scop and represent run-time conditions which we want to model and
2446 /// subsequently speculate away.
2448 /// @see getRegionNodeLoop for additional details.
2449 unsigned getNumBlocksInLoop(Loop *L) {
2450 unsigned NumBlocks = L->getNumBlocks();
2451 SmallVector<BasicBlock *, 4> ExitBlocks;
2452 L->getExitBlocks(ExitBlocks);
2454 for (auto ExitBlock : ExitBlocks) {
2455 if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2456 NumBlocks++;
2458 return NumBlocks;
2461 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2462 if (!RN->isSubRegion())
2463 return 1;
2465 Region *R = RN->getNodeAs<Region>();
2466 return std::distance(R->block_begin(), R->block_end());
2469 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2470 const DominatorTree &DT) {
2471 if (!RN->isSubRegion())
2472 return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2473 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2474 if (isErrorBlock(*BB, R, LI, DT))
2475 return true;
2476 return false;
2479 ///}
2481 isl::set Scop::getDomainConditions(const ScopStmt *Stmt) const {
2482 return getDomainConditions(Stmt->getEntryBlock());
2485 isl::set Scop::getDomainConditions(BasicBlock *BB) const {
2486 auto DIt = DomainMap.find(BB);
2487 if (DIt != DomainMap.end())
2488 return DIt->getSecond();
2490 auto &RI = *R.getRegionInfo();
2491 auto *BBR = RI.getRegionFor(BB);
2492 while (BBR->getEntry() == BB)
2493 BBR = BBR->getParent();
2494 return getDomainConditions(BBR->getEntry());
2497 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI,
2498 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2499 bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2500 auto *EntryBB = R->getEntry();
2501 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
2502 int LD = getRelativeLoopDepth(L);
2503 auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD + 1));
2505 while (LD-- >= 0) {
2506 L = L->getParentLoop();
2509 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
2510 DomainMap[EntryBB] = isl::manage(S);
2512 if (IsOnlyNonAffineRegion)
2513 return !containsErrorBlock(R->getNode(), *R, LI, DT);
2515 if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap))
2516 return false;
2518 if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap))
2519 return false;
2521 // Error blocks and blocks dominated by them have been assumed to never be
2522 // executed. Representing them in the Scop does not add any value. In fact,
2523 // it is likely to cause issues during construction of the ScopStmts. The
2524 // contents of error blocks have not been verified to be expressible and
2525 // will cause problems when building up a ScopStmt for them.
2526 // Furthermore, basic blocks dominated by error blocks may reference
2527 // instructions in the error block which, if the error block is not modeled,
2528 // can themselves not be constructed properly. To this end we will replace
2529 // the domains of error blocks and those only reachable via error blocks
2530 // with an empty set. Additionally, we will record for each block under which
2531 // parameter combination it would be reached via an error block in its
2532 // InvalidDomain. This information is needed during load hoisting.
2533 if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap))
2534 return false;
2536 return true;
2539 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2540 /// to be compatible to domains constructed for loop @p NewL.
2542 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2543 /// edge from @p OldL to @p NewL.
2544 static isl::set adjustDomainDimensions(Scop &S, isl::set Dom, Loop *OldL,
2545 Loop *NewL) {
2546 // If the loops are the same there is nothing to do.
2547 if (NewL == OldL)
2548 return Dom;
2550 int OldDepth = S.getRelativeLoopDepth(OldL);
2551 int NewDepth = S.getRelativeLoopDepth(NewL);
2552 // If both loops are non-affine loops there is nothing to do.
2553 if (OldDepth == -1 && NewDepth == -1)
2554 return Dom;
2556 // Distinguish three cases:
2557 // 1) The depth is the same but the loops are not.
2558 // => One loop was left one was entered.
2559 // 2) The depth increased from OldL to NewL.
2560 // => One loop was entered, none was left.
2561 // 3) The depth decreased from OldL to NewL.
2562 // => Loops were left were difference of the depths defines how many.
2563 if (OldDepth == NewDepth) {
2564 assert(OldL->getParentLoop() == NewL->getParentLoop());
2565 Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
2566 Dom = Dom.add_dims(isl::dim::set, 1);
2567 } else if (OldDepth < NewDepth) {
2568 assert(OldDepth + 1 == NewDepth);
2569 auto &R = S.getRegion();
2570 (void)R;
2571 assert(NewL->getParentLoop() == OldL ||
2572 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2573 Dom = Dom.add_dims(isl::dim::set, 1);
2574 } else {
2575 assert(OldDepth > NewDepth);
2576 int Diff = OldDepth - NewDepth;
2577 int NumDim = Dom.n_dim();
2578 assert(NumDim >= Diff);
2579 Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
2582 return Dom;
2585 bool Scop::propagateInvalidStmtDomains(
2586 Region *R, DominatorTree &DT, LoopInfo &LI,
2587 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2588 ReversePostOrderTraversal<Region *> RTraversal(R);
2589 for (auto *RN : RTraversal) {
2591 // Recurse for affine subregions but go on for basic blocks and non-affine
2592 // subregions.
2593 if (RN->isSubRegion()) {
2594 Region *SubRegion = RN->getNodeAs<Region>();
2595 if (!isNonAffineSubRegion(SubRegion)) {
2596 propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap);
2597 continue;
2601 bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2602 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2603 isl::set &Domain = DomainMap[BB];
2604 assert(Domain && "Cannot propagate a nullptr");
2606 isl::set InvalidDomain = InvalidDomainMap[BB];
2608 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
2610 if (!IsInvalidBlock) {
2611 InvalidDomain = InvalidDomain.intersect(Domain);
2612 } else {
2613 InvalidDomain = Domain;
2614 isl::set DomPar = Domain.params();
2615 recordAssumption(ERRORBLOCK, DomPar, BB->getTerminator()->getDebugLoc(),
2616 AS_RESTRICTION);
2617 Domain = isl::set::empty(Domain.get_space());
2620 if (InvalidDomain.is_empty()) {
2621 InvalidDomainMap[BB] = InvalidDomain;
2622 continue;
2625 auto *BBLoop = getRegionNodeLoop(RN, LI);
2626 auto *TI = BB->getTerminator();
2627 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
2628 for (unsigned u = 0; u < NumSuccs; u++) {
2629 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2631 // Skip successors outside the SCoP.
2632 if (!contains(SuccBB))
2633 continue;
2635 // Skip backedges.
2636 if (DT.dominates(SuccBB, BB))
2637 continue;
2639 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2641 auto AdjustedInvalidDomain =
2642 adjustDomainDimensions(*this, InvalidDomain, BBLoop, SuccBBLoop);
2644 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
2645 SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
2646 SuccInvalidDomain = SuccInvalidDomain.coalesce();
2647 unsigned NumConjucts = SuccInvalidDomain.n_basic_set();
2649 InvalidDomainMap[SuccBB] = SuccInvalidDomain;
2651 // Check if the maximal number of domain disjunctions was reached.
2652 // In case this happens we will bail.
2653 if (NumConjucts < MaxDisjunctsInDomain)
2654 continue;
2656 InvalidDomainMap.erase(BB);
2657 invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
2658 return false;
2661 InvalidDomainMap[BB] = InvalidDomain;
2664 return true;
2667 void Scop::propagateDomainConstraintsToRegionExit(
2668 BasicBlock *BB, Loop *BBLoop,
2669 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI,
2670 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2671 // Check if the block @p BB is the entry of a region. If so we propagate it's
2672 // domain to the exit block of the region. Otherwise we are done.
2673 auto *RI = R.getRegionInfo();
2674 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
2675 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
2676 if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB))
2677 return;
2679 // Do not propagate the domain if there is a loop backedge inside the region
2680 // that would prevent the exit block from being executed.
2681 auto *L = BBLoop;
2682 while (L && contains(L)) {
2683 SmallVector<BasicBlock *, 4> LatchBBs;
2684 BBLoop->getLoopLatches(LatchBBs);
2685 for (auto *LatchBB : LatchBBs)
2686 if (BB != LatchBB && BBReg->contains(LatchBB))
2687 return;
2688 L = L->getParentLoop();
2691 isl::set Domain = DomainMap[BB];
2692 assert(Domain && "Cannot propagate a nullptr");
2694 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops());
2696 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2697 // adjust the domain before we can propagate it.
2698 isl::set AdjustedDomain =
2699 adjustDomainDimensions(*this, Domain, BBLoop, ExitBBLoop);
2700 isl::set &ExitDomain = DomainMap[ExitBB];
2702 // If the exit domain is not yet created we set it otherwise we "add" the
2703 // current domain.
2704 ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
2706 // Initialize the invalid domain.
2707 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
2709 FinishedExitBlocks.insert(ExitBB);
2712 bool Scop::buildDomainsWithBranchConstraints(
2713 Region *R, DominatorTree &DT, LoopInfo &LI,
2714 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2715 // To create the domain for each block in R we iterate over all blocks and
2716 // subregions in R and propagate the conditions under which the current region
2717 // element is executed. To this end we iterate in reverse post order over R as
2718 // it ensures that we first visit all predecessors of a region node (either a
2719 // basic block or a subregion) before we visit the region node itself.
2720 // Initially, only the domain for the SCoP region entry block is set and from
2721 // there we propagate the current domain to all successors, however we add the
2722 // condition that the successor is actually executed next.
2723 // As we are only interested in non-loop carried constraints here we can
2724 // simply skip loop back edges.
2726 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2727 ReversePostOrderTraversal<Region *> RTraversal(R);
2728 for (auto *RN : RTraversal) {
2729 // Recurse for affine subregions but go on for basic blocks and non-affine
2730 // subregions.
2731 if (RN->isSubRegion()) {
2732 Region *SubRegion = RN->getNodeAs<Region>();
2733 if (!isNonAffineSubRegion(SubRegion)) {
2734 if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI,
2735 InvalidDomainMap))
2736 return false;
2737 continue;
2741 if (containsErrorBlock(RN, getRegion(), LI, DT))
2742 HasErrorBlock = true;
2744 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2745 Instruction *TI = BB->getTerminator();
2747 if (isa<UnreachableInst>(TI))
2748 continue;
2750 isl::set Domain = DomainMap.lookup(BB);
2751 if (!Domain)
2752 continue;
2753 MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get()));
2755 auto *BBLoop = getRegionNodeLoop(RN, LI);
2756 // Propagate the domain from BB directly to blocks that have a superset
2757 // domain, at the moment only region exit nodes of regions that start in BB.
2758 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI,
2759 InvalidDomainMap);
2761 // If all successors of BB have been set a domain through the propagation
2762 // above we do not need to build condition sets but can just skip this
2763 // block. However, it is important to note that this is a local property
2764 // with regards to the region @p R. To this end FinishedExitBlocks is a
2765 // local variable.
2766 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
2767 return FinishedExitBlocks.count(SuccBB);
2769 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
2770 continue;
2772 // Build the condition sets for the successor nodes of the current region
2773 // node. If it is a non-affine subregion we will always execute the single
2774 // exit node, hence the single entry node domain is the condition set. For
2775 // basic blocks we use the helper function buildConditionSets.
2776 SmallVector<isl_set *, 8> ConditionSets;
2777 if (RN->isSubRegion())
2778 ConditionSets.push_back(Domain.copy());
2779 else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(),
2780 InvalidDomainMap, ConditionSets))
2781 return false;
2783 // Now iterate over the successors and set their initial domain based on
2784 // their condition set. We skip back edges here and have to be careful when
2785 // we leave a loop not to keep constraints over a dimension that doesn't
2786 // exist anymore.
2787 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
2788 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
2789 isl::set CondSet = isl::manage(ConditionSets[u]);
2790 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2792 // Skip blocks outside the region.
2793 if (!contains(SuccBB))
2794 continue;
2796 // If we propagate the domain of some block to "SuccBB" we do not have to
2797 // adjust the domain.
2798 if (FinishedExitBlocks.count(SuccBB))
2799 continue;
2801 // Skip back edges.
2802 if (DT.dominates(SuccBB, BB))
2803 continue;
2805 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2807 CondSet = adjustDomainDimensions(*this, CondSet, BBLoop, SuccBBLoop);
2809 // Set the domain for the successor or merge it with an existing domain in
2810 // case there are multiple paths (without loop back edges) to the
2811 // successor block.
2812 isl::set &SuccDomain = DomainMap[SuccBB];
2814 if (SuccDomain) {
2815 SuccDomain = SuccDomain.unite(CondSet).coalesce();
2816 } else {
2817 // Initialize the invalid domain.
2818 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
2819 SuccDomain = CondSet;
2822 SuccDomain = SuccDomain.detect_equalities();
2824 // Check if the maximal number of domain disjunctions was reached.
2825 // In case this happens we will clean up and bail.
2826 if (SuccDomain.n_basic_set() < MaxDisjunctsInDomain)
2827 continue;
2829 invalidate(COMPLEXITY, DebugLoc());
2830 while (++u < ConditionSets.size())
2831 isl_set_free(ConditionSets[u]);
2832 return false;
2836 return true;
2839 isl::set Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain,
2840 DominatorTree &DT,
2841 LoopInfo &LI) {
2842 // If @p BB is the ScopEntry we are done
2843 if (R.getEntry() == BB)
2844 return isl::set::universe(Domain.get_space());
2846 // The region info of this function.
2847 auto &RI = *R.getRegionInfo();
2849 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops());
2851 // A domain to collect all predecessor domains, thus all conditions under
2852 // which the block is executed. To this end we start with the empty domain.
2853 isl::set PredDom = isl::set::empty(Domain.get_space());
2855 // Set of regions of which the entry block domain has been propagated to BB.
2856 // all predecessors inside any of the regions can be skipped.
2857 SmallSet<Region *, 8> PropagatedRegions;
2859 for (auto *PredBB : predecessors(BB)) {
2860 // Skip backedges.
2861 if (DT.dominates(BB, PredBB))
2862 continue;
2864 // If the predecessor is in a region we used for propagation we can skip it.
2865 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
2866 if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
2867 PredBBInRegion)) {
2868 continue;
2871 // Check if there is a valid region we can use for propagation, thus look
2872 // for a region that contains the predecessor and has @p BB as exit block.
2873 auto *PredR = RI.getRegionFor(PredBB);
2874 while (PredR->getExit() != BB && !PredR->contains(BB))
2875 PredR->getParent();
2877 // If a valid region for propagation was found use the entry of that region
2878 // for propagation, otherwise the PredBB directly.
2879 if (PredR->getExit() == BB) {
2880 PredBB = PredR->getEntry();
2881 PropagatedRegions.insert(PredR);
2884 isl::set PredBBDom = getDomainConditions(PredBB);
2885 Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops());
2886 PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
2887 PredDom = PredDom.unite(PredBBDom);
2890 return PredDom;
2893 bool Scop::propagateDomainConstraints(
2894 Region *R, DominatorTree &DT, LoopInfo &LI,
2895 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2896 // Iterate over the region R and propagate the domain constrains from the
2897 // predecessors to the current node. In contrast to the
2898 // buildDomainsWithBranchConstraints function, this one will pull the domain
2899 // information from the predecessors instead of pushing it to the successors.
2900 // Additionally, we assume the domains to be already present in the domain
2901 // map here. However, we iterate again in reverse post order so we know all
2902 // predecessors have been visited before a block or non-affine subregion is
2903 // visited.
2905 ReversePostOrderTraversal<Region *> RTraversal(R);
2906 for (auto *RN : RTraversal) {
2907 // Recurse for affine subregions but go on for basic blocks and non-affine
2908 // subregions.
2909 if (RN->isSubRegion()) {
2910 Region *SubRegion = RN->getNodeAs<Region>();
2911 if (!isNonAffineSubRegion(SubRegion)) {
2912 if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap))
2913 return false;
2914 continue;
2918 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2919 isl::set &Domain = DomainMap[BB];
2920 assert(Domain);
2922 // Under the union of all predecessor conditions we can reach this block.
2923 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI);
2924 Domain = Domain.intersect(PredDom).coalesce();
2925 Domain = Domain.align_params(getParamSpace());
2927 Loop *BBLoop = getRegionNodeLoop(RN, LI);
2928 if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop))
2929 if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap))
2930 return false;
2933 return true;
2936 /// Create a map to map from a given iteration to a subsequent iteration.
2938 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
2939 /// is incremented by one and all other dimensions are equal, e.g.,
2940 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
2942 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
2943 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
2944 isl::space MapSpace = SetSpace.map_from_set();
2945 isl::map NextIterationMap = isl::map::universe(MapSpace);
2946 for (unsigned u = 0; u < NextIterationMap.dim(isl::dim::in); u++)
2947 if (u != Dim)
2948 NextIterationMap =
2949 NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
2950 isl::constraint C =
2951 isl::constraint::alloc_equality(isl::local_space(MapSpace));
2952 C = C.set_constant_si(1);
2953 C = C.set_coefficient_si(isl::dim::in, Dim, 1);
2954 C = C.set_coefficient_si(isl::dim::out, Dim, -1);
2955 NextIterationMap = NextIterationMap.add_constraint(C);
2956 return NextIterationMap;
2959 bool Scop::addLoopBoundsToHeaderDomain(
2960 Loop *L, LoopInfo &LI, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2961 int LoopDepth = getRelativeLoopDepth(L);
2962 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
2964 BasicBlock *HeaderBB = L->getHeader();
2965 assert(DomainMap.count(HeaderBB));
2966 isl::set &HeaderBBDom = DomainMap[HeaderBB];
2968 isl::map NextIterationMap =
2969 createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
2971 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
2973 SmallVector<BasicBlock *, 4> LatchBlocks;
2974 L->getLoopLatches(LatchBlocks);
2976 for (BasicBlock *LatchBB : LatchBlocks) {
2977 // If the latch is only reachable via error statements we skip it.
2978 isl::set LatchBBDom = DomainMap.lookup(LatchBB);
2979 if (!LatchBBDom)
2980 continue;
2982 isl::set BackedgeCondition = nullptr;
2984 Instruction *TI = LatchBB->getTerminator();
2985 BranchInst *BI = dyn_cast<BranchInst>(TI);
2986 assert(BI && "Only branch instructions allowed in loop latches");
2988 if (BI->isUnconditional())
2989 BackedgeCondition = LatchBBDom;
2990 else {
2991 SmallVector<isl_set *, 8> ConditionSets;
2992 int idx = BI->getSuccessor(0) != HeaderBB;
2993 if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(),
2994 InvalidDomainMap, ConditionSets))
2995 return false;
2997 // Free the non back edge condition set as we do not need it.
2998 isl_set_free(ConditionSets[1 - idx]);
3000 BackedgeCondition = isl::manage(ConditionSets[idx]);
3003 int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
3004 assert(LatchLoopDepth >= LoopDepth);
3005 BackedgeCondition = BackedgeCondition.project_out(
3006 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
3007 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
3010 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
3011 for (int i = 0; i < LoopDepth; i++)
3012 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
3014 isl::set UnionBackedgeConditionComplement =
3015 UnionBackedgeCondition.complement();
3016 UnionBackedgeConditionComplement =
3017 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
3019 UnionBackedgeConditionComplement =
3020 UnionBackedgeConditionComplement.apply(ForwardMap);
3021 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
3022 HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
3024 auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
3025 HeaderBBDom = Parts.second;
3027 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3028 // the bounded assumptions to the context as they are already implied by the
3029 // <nsw> tag.
3030 if (Affinator.hasNSWAddRecForLoop(L))
3031 return true;
3033 isl::set UnboundedCtx = Parts.first.params();
3034 recordAssumption(INFINITELOOP, UnboundedCtx,
3035 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
3036 return true;
3039 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
3040 Value *PointerBase = MA->getOriginalBaseAddr();
3042 auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
3043 if (!PointerBaseInst)
3044 return nullptr;
3046 auto *BasePtrStmt = getStmtFor(PointerBaseInst);
3047 if (!BasePtrStmt)
3048 return nullptr;
3050 return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
3053 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
3054 isl::union_map Writes) {
3055 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) {
3056 return getNonHoistableCtx(BasePtrMA, Writes).is_null();
3059 Value *BaseAddr = MA->getOriginalBaseAddr();
3060 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
3061 if (!isa<LoadInst>(BasePtrInst))
3062 return contains(BasePtrInst);
3064 return false;
3067 bool Scop::buildAliasChecks(AliasAnalysis &AA) {
3068 if (!PollyUseRuntimeAliasChecks)
3069 return true;
3071 if (buildAliasGroups(AA)) {
3072 // Aliasing assumptions do not go through addAssumption but we still want to
3073 // collect statistics so we do it here explicitly.
3074 if (MinMaxAliasGroups.size())
3075 AssumptionsAliasing++;
3076 return true;
3079 // If a problem occurs while building the alias groups we need to delete
3080 // this SCoP and pretend it wasn't valid in the first place. To this end
3081 // we make the assumed context infeasible.
3082 invalidate(ALIASING, DebugLoc());
3084 LLVM_DEBUG(
3085 dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3086 << " could not be created as the number of parameters involved "
3087 "is too high. The SCoP will be "
3088 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3089 "the maximal number of parameters but be advised that the "
3090 "compile time might increase exponentially.\n\n");
3091 return false;
3094 std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3095 Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3096 AliasSetTracker AST(AA);
3098 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3099 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3100 for (ScopStmt &Stmt : *this) {
3102 isl::set StmtDomain = Stmt.getDomain();
3103 bool StmtDomainEmpty = StmtDomain.is_empty();
3105 // Statements with an empty domain will never be executed.
3106 if (StmtDomainEmpty)
3107 continue;
3109 for (MemoryAccess *MA : Stmt) {
3110 if (MA->isScalarKind())
3111 continue;
3112 if (!MA->isRead())
3113 HasWriteAccess.insert(MA->getScopArrayInfo());
3114 MemAccInst Acc(MA->getAccessInstruction());
3115 if (MA->isRead() && isa<MemTransferInst>(Acc))
3116 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3117 else
3118 PtrToAcc[Acc.getPointerOperand()] = MA;
3119 AST.add(Acc);
3123 AliasGroupVectorTy AliasGroups;
3124 for (AliasSet &AS : AST) {
3125 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3126 continue;
3127 AliasGroupTy AG;
3128 for (auto &PR : AS)
3129 AG.push_back(PtrToAcc[PR.getValue()]);
3130 if (AG.size() < 2)
3131 continue;
3132 AliasGroups.push_back(std::move(AG));
3135 return std::make_tuple(AliasGroups, HasWriteAccess);
3138 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3139 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3140 AliasGroupTy NewAG;
3141 AliasGroupTy &AG = AliasGroups[u];
3142 AliasGroupTy::iterator AGI = AG.begin();
3143 isl::set AGDomain = getAccessDomain(*AGI);
3144 while (AGI != AG.end()) {
3145 MemoryAccess *MA = *AGI;
3146 isl::set MADomain = getAccessDomain(MA);
3147 if (AGDomain.is_disjoint(MADomain)) {
3148 NewAG.push_back(MA);
3149 AGI = AG.erase(AGI);
3150 } else {
3151 AGDomain = AGDomain.unite(MADomain);
3152 AGI++;
3155 if (NewAG.size() > 1)
3156 AliasGroups.push_back(std::move(NewAG));
3160 bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3161 // To create sound alias checks we perform the following steps:
3162 // o) We partition each group into read only and non read only accesses.
3163 // o) For each group with more than one base pointer we then compute minimal
3164 // and maximal accesses to each array of a group in read only and non
3165 // read only partitions separately.
3166 AliasGroupVectorTy AliasGroups;
3167 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3169 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3171 splitAliasGroupsByDomain(AliasGroups);
3173 for (AliasGroupTy &AG : AliasGroups) {
3174 if (!hasFeasibleRuntimeContext())
3175 return false;
3178 IslMaxOperationsGuard MaxOpGuard(getIslCtx().get(), OptComputeOut);
3179 bool Valid = buildAliasGroup(AG, HasWriteAccess);
3180 if (!Valid)
3181 return false;
3183 if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota) {
3184 invalidate(COMPLEXITY, DebugLoc());
3185 return false;
3189 return true;
3192 bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3193 DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3194 AliasGroupTy ReadOnlyAccesses;
3195 AliasGroupTy ReadWriteAccesses;
3196 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3197 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3199 if (AliasGroup.size() < 2)
3200 return true;
3202 for (MemoryAccess *Access : AliasGroup) {
3203 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3204 Access->getAccessInstruction())
3205 << "Possibly aliasing pointer, use restrict keyword.");
3206 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3207 if (HasWriteAccess.count(Array)) {
3208 ReadWriteArrays.insert(Array);
3209 ReadWriteAccesses.push_back(Access);
3210 } else {
3211 ReadOnlyArrays.insert(Array);
3212 ReadOnlyAccesses.push_back(Access);
3216 // If there are no read-only pointers, and less than two read-write pointers,
3217 // no alias check is needed.
3218 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3219 return true;
3221 // If there is no read-write pointer, no alias check is needed.
3222 if (ReadWriteArrays.empty())
3223 return true;
3225 // For non-affine accesses, no alias check can be generated as we cannot
3226 // compute a sufficiently tight lower and upper bound: bail out.
3227 for (MemoryAccess *MA : AliasGroup) {
3228 if (!MA->isAffine()) {
3229 invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3230 MA->getAccessInstruction()->getParent());
3231 return false;
3235 // Ensure that for all memory accesses for which we generate alias checks,
3236 // their base pointers are available.
3237 for (MemoryAccess *MA : AliasGroup) {
3238 if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3239 addRequiredInvariantLoad(
3240 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3243 MinMaxAliasGroups.emplace_back();
3244 MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3245 MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3246 MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3248 bool Valid;
3250 Valid =
3251 calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3253 if (!Valid)
3254 return false;
3256 // Bail out if the number of values we need to compare is too large.
3257 // This is important as the number of comparisons grows quadratically with
3258 // the number of values we need to compare.
3259 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3260 RunTimeChecksMaxArraysPerGroup)
3261 return false;
3263 Valid =
3264 calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3266 if (!Valid)
3267 return false;
3269 return true;
3272 /// Get the smallest loop that contains @p S but is not in @p S.
3273 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3274 // Start with the smallest loop containing the entry and expand that
3275 // loop until it contains all blocks in the region. If there is a loop
3276 // containing all blocks in the region check if it is itself contained
3277 // and if so take the parent loop as it will be the smallest containing
3278 // the region but not contained by it.
3279 Loop *L = LI.getLoopFor(S.getEntry());
3280 while (L) {
3281 bool AllContained = true;
3282 for (auto *BB : S.blocks())
3283 AllContained &= L->contains(BB);
3284 if (AllContained)
3285 break;
3286 L = L->getParentLoop();
3289 return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
3292 int Scop::NextScopID = 0;
3294 std::string Scop::CurrentFunc;
3296 int Scop::getNextID(std::string ParentFunc) {
3297 if (ParentFunc != CurrentFunc) {
3298 CurrentFunc = ParentFunc;
3299 NextScopID = 0;
3301 return NextScopID++;
3304 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3305 DominatorTree &DT, ScopDetection::DetectionContext &DC,
3306 OptimizationRemarkEmitter &ORE)
3307 : IslCtx(isl_ctx_alloc(), isl_ctx_free), SE(&ScalarEvolution), DT(&DT),
3308 R(R), name(None), HasSingleExitEdge(R.getExitingBlock()), DC(DC),
3309 ORE(ORE), Affinator(this, LI),
3310 ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
3311 if (IslOnErrorAbort)
3312 isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT);
3313 buildContext();
3316 Scop::~Scop() = default;
3318 void Scop::foldSizeConstantsToRight() {
3319 isl::union_set Accessed = getAccesses().range();
3321 for (auto Array : arrays()) {
3322 if (Array->getNumberOfDimensions() <= 1)
3323 continue;
3325 isl::space Space = Array->getSpace();
3326 Space = Space.align_params(Accessed.get_space());
3328 if (!Accessed.contains(Space))
3329 continue;
3331 isl::set Elements = Accessed.extract_set(Space);
3332 isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
3334 std::vector<int> Int;
3335 int Dims = Elements.dim(isl::dim::set);
3336 for (int i = 0; i < Dims; i++) {
3337 isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
3338 DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
3339 DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
3341 isl::basic_set DimHull = DimOnly.affine_hull();
3343 if (i == Dims - 1) {
3344 Int.push_back(1);
3345 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
3346 continue;
3349 if (DimHull.dim(isl::dim::div) == 1) {
3350 isl::aff Diff = DimHull.get_div(0);
3351 isl::val Val = Diff.get_denominator_val();
3353 int ValInt = 1;
3354 if (Val.is_int()) {
3355 auto ValAPInt = APIntFromVal(Val);
3356 if (ValAPInt.isSignedIntN(32))
3357 ValInt = ValAPInt.getSExtValue();
3358 } else {
3361 Int.push_back(ValInt);
3362 isl::constraint C = isl::constraint::alloc_equality(
3363 isl::local_space(Transform.get_space()));
3364 C = C.set_coefficient_si(isl::dim::out, i, ValInt);
3365 C = C.set_coefficient_si(isl::dim::in, i, -1);
3366 Transform = Transform.add_constraint(C);
3367 continue;
3370 isl::basic_set ZeroSet = isl::basic_set(DimHull);
3371 ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
3373 int ValInt = 1;
3374 if (ZeroSet.is_equal(DimHull)) {
3375 ValInt = 0;
3378 Int.push_back(ValInt);
3379 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
3382 isl::set MappedElements = isl::map(Transform).domain();
3383 if (!Elements.is_subset(MappedElements))
3384 continue;
3386 bool CanFold = true;
3387 if (Int[0] <= 1)
3388 CanFold = false;
3390 unsigned NumDims = Array->getNumberOfDimensions();
3391 for (unsigned i = 1; i < NumDims - 1; i++)
3392 if (Int[0] != Int[i] && Int[i])
3393 CanFold = false;
3395 if (!CanFold)
3396 continue;
3398 for (auto &Access : AccessFunctions)
3399 if (Access->getScopArrayInfo() == Array)
3400 Access->setAccessRelation(
3401 Access->getAccessRelation().apply_range(Transform));
3403 std::vector<const SCEV *> Sizes;
3404 for (unsigned i = 0; i < NumDims; i++) {
3405 auto Size = Array->getDimensionSize(i);
3407 if (i == NumDims - 1)
3408 Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3409 Sizes.push_back(Size);
3412 Array->updateSizes(Sizes, false /* CheckConsistency */);
3416 void Scop::markFortranArrays() {
3417 for (ScopStmt &Stmt : Stmts) {
3418 for (MemoryAccess *MemAcc : Stmt) {
3419 Value *FAD = MemAcc->getFortranArrayDescriptor();
3420 if (!FAD)
3421 continue;
3423 // TODO: const_cast-ing to edit
3424 ScopArrayInfo *SAI =
3425 const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
3426 assert(SAI && "memory access into a Fortran array does not "
3427 "have an associated ScopArrayInfo");
3428 SAI->applyAndSetFAD(FAD);
3433 void Scop::finalizeAccesses() {
3434 updateAccessDimensionality();
3435 foldSizeConstantsToRight();
3436 foldAccessRelations();
3437 assumeNoOutOfBounds();
3438 markFortranArrays();
3441 void Scop::updateAccessDimensionality() {
3442 // Check all array accesses for each base pointer and find a (virtual) element
3443 // size for the base pointer that divides all access functions.
3444 for (ScopStmt &Stmt : *this)
3445 for (MemoryAccess *Access : Stmt) {
3446 if (!Access->isArrayKind())
3447 continue;
3448 ScopArrayInfo *Array =
3449 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3451 if (Array->getNumberOfDimensions() != 1)
3452 continue;
3453 unsigned DivisibleSize = Array->getElemSizeInBytes();
3454 const SCEV *Subscript = Access->getSubscript(0);
3455 while (!isDivisible(Subscript, DivisibleSize, *SE))
3456 DivisibleSize /= 2;
3457 auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3458 Array->updateElementType(Ty);
3461 for (auto &Stmt : *this)
3462 for (auto &Access : Stmt)
3463 Access->updateDimensionality();
3466 void Scop::foldAccessRelations() {
3467 for (auto &Stmt : *this)
3468 for (auto &Access : Stmt)
3469 Access->foldAccessRelation();
3472 void Scop::assumeNoOutOfBounds() {
3473 for (auto &Stmt : *this)
3474 for (auto &Access : Stmt)
3475 Access->assumeNoOutOfBound();
3478 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
3479 for (Instruction *Inst : Stmt.getInstructions())
3480 InstStmtMap.erase(Inst);
3482 if (Stmt.isRegionStmt()) {
3483 for (BasicBlock *BB : Stmt.getRegion()->blocks()) {
3484 StmtMap.erase(BB);
3485 // Skip entry basic block, as its instructions are already deleted as
3486 // part of the statement's instruction list.
3487 if (BB == Stmt.getEntryBlock())
3488 continue;
3489 for (Instruction &Inst : *BB)
3490 InstStmtMap.erase(&Inst);
3492 } else {
3493 auto StmtMapIt = StmtMap.find(Stmt.getBasicBlock());
3494 if (StmtMapIt != StmtMap.end())
3495 StmtMapIt->second.erase(std::remove(StmtMapIt->second.begin(),
3496 StmtMapIt->second.end(), &Stmt),
3497 StmtMapIt->second.end());
3498 for (Instruction *Inst : Stmt.getInstructions())
3499 InstStmtMap.erase(Inst);
3503 void Scop::removeStmts(std::function<bool(ScopStmt &)> ShouldDelete,
3504 bool AfterHoisting) {
3505 for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
3506 if (!ShouldDelete(*StmtIt)) {
3507 StmtIt++;
3508 continue;
3511 // Start with removing all of the statement's accesses including erasing it
3512 // from all maps that are pointing to them.
3513 // Make a temporary copy because removing MAs invalidates the iterator.
3514 SmallVector<MemoryAccess *, 16> MAList(StmtIt->begin(), StmtIt->end());
3515 for (MemoryAccess *MA : MAList)
3516 StmtIt->removeSingleMemoryAccess(MA, AfterHoisting);
3518 removeFromStmtMap(*StmtIt);
3519 StmtIt = Stmts.erase(StmtIt);
3523 void Scop::removeStmtNotInDomainMap() {
3524 auto ShouldDelete = [this](ScopStmt &Stmt) -> bool {
3525 isl::set Domain = DomainMap.lookup(Stmt.getEntryBlock());
3526 if (!Domain)
3527 return true;
3528 return Domain.is_empty();
3530 removeStmts(ShouldDelete, false);
3533 void Scop::simplifySCoP(bool AfterHoisting) {
3534 auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool {
3535 // Never delete statements that contain calls to debug functions.
3536 if (hasDebugCall(&Stmt))
3537 return false;
3539 bool RemoveStmt = Stmt.isEmpty();
3541 // Remove read only statements only after invariant load hoisting.
3542 if (!RemoveStmt && AfterHoisting) {
3543 bool OnlyRead = true;
3544 for (MemoryAccess *MA : Stmt) {
3545 if (MA->isRead())
3546 continue;
3548 OnlyRead = false;
3549 break;
3552 RemoveStmt = OnlyRead;
3554 return RemoveStmt;
3557 removeStmts(ShouldDelete, AfterHoisting);
3560 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3561 LoadInst *LInst = dyn_cast<LoadInst>(Val);
3562 if (!LInst)
3563 return nullptr;
3565 if (Value *Rep = InvEquivClassVMap.lookup(LInst))
3566 LInst = cast<LoadInst>(Rep);
3568 Type *Ty = LInst->getType();
3569 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3570 for (auto &IAClass : InvariantEquivClasses) {
3571 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3572 continue;
3574 auto &MAs = IAClass.InvariantAccesses;
3575 for (auto *MA : MAs)
3576 if (MA->getAccessInstruction() == Val)
3577 return &IAClass;
3580 return nullptr;
3583 bool isAParameter(llvm::Value *maybeParam, const Function &F) {
3584 for (const llvm::Argument &Arg : F.args())
3585 if (&Arg == maybeParam)
3586 return true;
3588 return false;
3591 bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3592 bool MAInvalidCtxIsEmpty,
3593 bool NonHoistableCtxIsEmpty) {
3594 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3595 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3596 if (PollyAllowDereferenceOfAllFunctionParams &&
3597 isAParameter(LInst->getPointerOperand(), getFunction()))
3598 return true;
3600 // TODO: We can provide more information for better but more expensive
3601 // results.
3602 if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3603 LInst->getAlignment(), DL))
3604 return false;
3606 // If the location might be overwritten we do not hoist it unconditionally.
3608 // TODO: This is probably too conservative.
3609 if (!NonHoistableCtxIsEmpty)
3610 return false;
3612 // If a dereferenceable load is in a statement that is modeled precisely we
3613 // can hoist it.
3614 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3615 return true;
3617 // Even if the statement is not modeled precisely we can hoist the load if it
3618 // does not involve any parameters that might have been specialized by the
3619 // statement domain.
3620 for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
3621 if (!isa<SCEVConstant>(MA->getSubscript(u)))
3622 return false;
3623 return true;
3626 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3627 if (InvMAs.empty())
3628 return;
3630 isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3631 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3633 // Get the context under which the statement is executed but remove the error
3634 // context under which this statement is reached.
3635 isl::set DomainCtx = Stmt.getDomain().params();
3636 DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3638 if (DomainCtx.n_basic_set() >= MaxDisjunctsInDomain) {
3639 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3640 invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3641 return;
3644 // Project out all parameters that relate to loads in the statement. Otherwise
3645 // we could have cyclic dependences on the constraints under which the
3646 // hoisted loads are executed and we could not determine an order in which to
3647 // pre-load them. This happens because not only lower bounds are part of the
3648 // domain but also upper bounds.
3649 for (auto &InvMA : InvMAs) {
3650 auto *MA = InvMA.MA;
3651 Instruction *AccInst = MA->getAccessInstruction();
3652 if (SE->isSCEVable(AccInst->getType())) {
3653 SetVector<Value *> Values;
3654 for (const SCEV *Parameter : Parameters) {
3655 Values.clear();
3656 findValues(Parameter, *SE, Values);
3657 if (!Values.count(AccInst))
3658 continue;
3660 if (isl::id ParamId = getIdForParam(Parameter)) {
3661 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3662 if (Dim >= 0)
3663 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3669 for (auto &InvMA : InvMAs) {
3670 auto *MA = InvMA.MA;
3671 isl::set NHCtx = InvMA.NonHoistableCtx;
3673 // Check for another invariant access that accesses the same location as
3674 // MA and if found consolidate them. Otherwise create a new equivalence
3675 // class at the end of InvariantEquivClasses.
3676 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3677 Type *Ty = LInst->getType();
3678 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3680 isl::set MAInvalidCtx = MA->getInvalidContext();
3681 bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3682 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3684 isl::set MACtx;
3685 // Check if we know that this pointer can be speculatively accessed.
3686 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3687 NonHoistableCtxIsEmpty)) {
3688 MACtx = isl::set::universe(DomainCtx.get_space());
3689 } else {
3690 MACtx = DomainCtx;
3691 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3692 MACtx = MACtx.gist_params(getContext());
3695 bool Consolidated = false;
3696 for (auto &IAClass : InvariantEquivClasses) {
3697 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3698 continue;
3700 // If the pointer and the type is equal check if the access function wrt.
3701 // to the domain is equal too. It can happen that the domain fixes
3702 // parameter values and these can be different for distinct part of the
3703 // SCoP. If this happens we cannot consolidate the loads but need to
3704 // create a new invariant load equivalence class.
3705 auto &MAs = IAClass.InvariantAccesses;
3706 if (!MAs.empty()) {
3707 auto *LastMA = MAs.front();
3709 isl::set AR = MA->getAccessRelation().range();
3710 isl::set LastAR = LastMA->getAccessRelation().range();
3711 bool SameAR = AR.is_equal(LastAR);
3713 if (!SameAR)
3714 continue;
3717 // Add MA to the list of accesses that are in this class.
3718 MAs.push_front(MA);
3720 Consolidated = true;
3722 // Unify the execution context of the class and this statement.
3723 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3724 if (IAClassDomainCtx)
3725 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
3726 else
3727 IAClassDomainCtx = MACtx;
3728 IAClass.ExecutionContext = IAClassDomainCtx;
3729 break;
3732 if (Consolidated)
3733 continue;
3735 MACtx = MACtx.coalesce();
3737 // If we did not consolidate MA, thus did not find an equivalence class
3738 // for it, we create a new one.
3739 InvariantEquivClasses.emplace_back(
3740 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3744 /// Check if an access range is too complex.
3746 /// An access range is too complex, if it contains either many disjuncts or
3747 /// very complex expressions. As a simple heuristic, we assume if a set to
3748 /// be too complex if the sum of existentially quantified dimensions and
3749 /// set dimensions is larger than a threshold. This reliably detects both
3750 /// sets with many disjuncts as well as sets with many divisions as they
3751 /// arise in h264.
3753 /// @param AccessRange The range to check for complexity.
3755 /// @returns True if the access range is too complex.
3756 static bool isAccessRangeTooComplex(isl::set AccessRange) {
3757 unsigned NumTotalDims = 0;
3759 for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
3760 NumTotalDims += BSet.dim(isl::dim::div);
3761 NumTotalDims += BSet.dim(isl::dim::set);
3764 if (NumTotalDims > MaxDimensionsInAccessRange)
3765 return true;
3767 return false;
3770 isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
3771 // TODO: Loads that are not loop carried, hence are in a statement with
3772 // zero iterators, are by construction invariant, though we
3773 // currently "hoist" them anyway. This is necessary because we allow
3774 // them to be treated as parameters (e.g., in conditions) and our code
3775 // generation would otherwise use the old value.
3777 auto &Stmt = *Access->getStatement();
3778 BasicBlock *BB = Stmt.getEntryBlock();
3780 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
3781 Access->isMemoryIntrinsic())
3782 return nullptr;
3784 // Skip accesses that have an invariant base pointer which is defined but
3785 // not loaded inside the SCoP. This can happened e.g., if a readnone call
3786 // returns a pointer that is used as a base address. However, as we want
3787 // to hoist indirect pointers, we allow the base pointer to be defined in
3788 // the region if it is also a memory access. Each ScopArrayInfo object
3789 // that has a base pointer origin has a base pointer that is loaded and
3790 // that it is invariant, thus it will be hoisted too. However, if there is
3791 // no base pointer origin we check that the base pointer is defined
3792 // outside the region.
3793 auto *LI = cast<LoadInst>(Access->getAccessInstruction());
3794 if (hasNonHoistableBasePtrInScop(Access, Writes))
3795 return nullptr;
3797 isl::map AccessRelation = Access->getAccessRelation();
3798 assert(!AccessRelation.is_empty());
3800 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
3801 return nullptr;
3803 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
3804 isl::set SafeToLoad;
3806 auto &DL = getFunction().getParent()->getDataLayout();
3807 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
3808 DL)) {
3809 SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
3810 } else if (BB != LI->getParent()) {
3811 // Skip accesses in non-affine subregions as they might not be executed
3812 // under the same condition as the entry of the non-affine subregion.
3813 return nullptr;
3814 } else {
3815 SafeToLoad = AccessRelation.range();
3818 if (isAccessRangeTooComplex(AccessRelation.range()))
3819 return nullptr;
3821 isl::union_map Written = Writes.intersect_range(SafeToLoad);
3822 isl::set WrittenCtx = Written.params();
3823 bool IsWritten = !WrittenCtx.is_empty();
3825 if (!IsWritten)
3826 return WrittenCtx;
3828 WrittenCtx = WrittenCtx.remove_divs();
3829 bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain;
3830 if (TooComplex || !isRequiredInvariantLoad(LI))
3831 return nullptr;
3833 addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), AS_RESTRICTION,
3834 LI->getParent());
3835 return WrittenCtx;
3838 void Scop::verifyInvariantLoads() {
3839 auto &RIL = getRequiredInvariantLoads();
3840 for (LoadInst *LI : RIL) {
3841 assert(LI && contains(LI));
3842 // If there exists a statement in the scop which has a memory access for
3843 // @p LI, then mark this scop as infeasible for optimization.
3844 for (ScopStmt &Stmt : Stmts)
3845 if (Stmt.getArrayAccessOrNULLFor(LI)) {
3846 invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
3847 return;
3852 void Scop::hoistInvariantLoads() {
3853 if (!PollyInvariantLoadHoisting)
3854 return;
3856 isl::union_map Writes = getWrites();
3857 for (ScopStmt &Stmt : *this) {
3858 InvariantAccessesTy InvariantAccesses;
3860 for (MemoryAccess *Access : Stmt)
3861 if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
3862 InvariantAccesses.push_back({Access, NHCtx});
3864 // Transfer the memory access from the statement to the SCoP.
3865 for (auto InvMA : InvariantAccesses)
3866 Stmt.removeMemoryAccess(InvMA.MA);
3867 addInvariantLoads(Stmt, InvariantAccesses);
3871 /// Find the canonical scop array info object for a set of invariant load
3872 /// hoisted loads. The canonical array is the one that corresponds to the
3873 /// first load in the list of accesses which is used as base pointer of a
3874 /// scop array.
3875 static const ScopArrayInfo *findCanonicalArray(Scop *S,
3876 MemoryAccessList &Accesses) {
3877 for (MemoryAccess *Access : Accesses) {
3878 const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
3879 Access->getAccessInstruction(), MemoryKind::Array);
3880 if (CanonicalArray)
3881 return CanonicalArray;
3883 return nullptr;
3886 /// Check if @p Array severs as base array in an invariant load.
3887 static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
3888 for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
3889 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3890 if (Access2->getScopArrayInfo() == Array)
3891 return true;
3892 return false;
3895 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3896 /// with a reference to @p New.
3897 static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
3898 const ScopArrayInfo *New) {
3899 for (ScopStmt &Stmt : *S)
3900 for (MemoryAccess *Access : Stmt) {
3901 if (Access->getLatestScopArrayInfo() != Old)
3902 continue;
3904 isl::id Id = New->getBasePtrId();
3905 isl::map Map = Access->getAccessRelation();
3906 Map = Map.set_tuple_id(isl::dim::out, Id);
3907 Access->setAccessRelation(Map);
3911 void Scop::canonicalizeDynamicBasePtrs() {
3912 for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
3913 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3915 const ScopArrayInfo *CanonicalBasePtrSAI =
3916 findCanonicalArray(this, BasePtrAccesses);
3918 if (!CanonicalBasePtrSAI)
3919 continue;
3921 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3922 const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
3923 BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3924 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3925 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3926 continue;
3928 // we currently do not canonicalize arrays where some accesses are
3929 // hoisted as invariant loads. If we would, we need to update the access
3930 // function of the invariant loads as well. However, as this is not a
3931 // very common situation, we leave this for now to avoid further
3932 // complexity increases.
3933 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
3934 continue;
3936 replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
3941 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
3942 ArrayRef<const SCEV *> Sizes,
3943 MemoryKind Kind,
3944 const char *BaseName) {
3945 assert((BasePtr || BaseName) &&
3946 "BasePtr and BaseName can not be nullptr at the same time.");
3947 assert(!(BasePtr && BaseName) && "BaseName is redundant.");
3948 auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
3949 : ScopArrayNameMap[BaseName];
3950 if (!SAI) {
3951 auto &DL = getFunction().getParent()->getDataLayout();
3952 SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
3953 DL, this, BaseName));
3954 ScopArrayInfoSet.insert(SAI.get());
3955 } else {
3956 SAI->updateElementType(ElementType);
3957 // In case of mismatching array sizes, we bail out by setting the run-time
3958 // context to false.
3959 if (!SAI->updateSizes(Sizes))
3960 invalidate(DELINEARIZATION, DebugLoc());
3962 return SAI.get();
3965 ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
3966 const std::string &BaseName,
3967 const std::vector<unsigned> &Sizes) {
3968 auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
3969 std::vector<const SCEV *> SCEVSizes;
3971 for (auto size : Sizes)
3972 if (size)
3973 SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
3974 else
3975 SCEVSizes.push_back(nullptr);
3977 auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
3978 MemoryKind::Array, BaseName.c_str());
3979 return SAI;
3982 const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr,
3983 MemoryKind Kind) {
3984 auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
3985 return SAI;
3988 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
3989 auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind);
3990 assert(SAI && "No ScopArrayInfo available for this base pointer");
3991 return SAI;
3994 std::string Scop::getContextStr() const { return getContext().to_str(); }
3996 std::string Scop::getAssumedContextStr() const {
3997 assert(AssumedContext && "Assumed context not yet built");
3998 return AssumedContext.to_str();
4001 std::string Scop::getInvalidContextStr() const {
4002 return InvalidContext.to_str();
4005 std::string Scop::getNameStr() const {
4006 std::string ExitName, EntryName;
4007 std::tie(EntryName, ExitName) = getEntryExitStr();
4008 return EntryName + "---" + ExitName;
4011 std::pair<std::string, std::string> Scop::getEntryExitStr() const {
4012 std::string ExitName, EntryName;
4013 raw_string_ostream ExitStr(ExitName);
4014 raw_string_ostream EntryStr(EntryName);
4016 R.getEntry()->printAsOperand(EntryStr, false);
4017 EntryStr.str();
4019 if (R.getExit()) {
4020 R.getExit()->printAsOperand(ExitStr, false);
4021 ExitStr.str();
4022 } else
4023 ExitName = "FunctionExit";
4025 return std::make_pair(EntryName, ExitName);
4028 isl::set Scop::getContext() const { return Context; }
4029 isl::space Scop::getParamSpace() const { return getContext().get_space(); }
4031 isl::space Scop::getFullParamSpace() const {
4032 std::vector<isl::id> FortranIDs;
4033 FortranIDs = getFortranArrayIds(arrays());
4035 isl::space Space = isl::space::params_alloc(
4036 getIslCtx(), ParameterIds.size() + FortranIDs.size());
4038 unsigned PDim = 0;
4039 for (const SCEV *Parameter : Parameters) {
4040 isl::id Id = getIdForParam(Parameter);
4041 Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4044 for (isl::id Id : FortranIDs)
4045 Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4047 return Space;
4050 isl::set Scop::getAssumedContext() const {
4051 assert(AssumedContext && "Assumed context not yet built");
4052 return AssumedContext;
4055 bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
4056 if (PollyProcessUnprofitable)
4057 return true;
4059 if (isEmpty())
4060 return false;
4062 unsigned OptimizableStmtsOrLoops = 0;
4063 for (auto &Stmt : *this) {
4064 if (Stmt.getNumIterators() == 0)
4065 continue;
4067 bool ContainsArrayAccs = false;
4068 bool ContainsScalarAccs = false;
4069 for (auto *MA : Stmt) {
4070 if (MA->isRead())
4071 continue;
4072 ContainsArrayAccs |= MA->isLatestArrayKind();
4073 ContainsScalarAccs |= MA->isLatestScalarKind();
4076 if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
4077 OptimizableStmtsOrLoops += Stmt.getNumIterators();
4080 return OptimizableStmtsOrLoops > 1;
4083 bool Scop::hasFeasibleRuntimeContext() const {
4084 auto PositiveContext = getAssumedContext();
4085 auto NegativeContext = getInvalidContext();
4086 PositiveContext = addNonEmptyDomainConstraints(PositiveContext);
4087 // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain
4088 if (!PositiveContext)
4089 return false;
4091 bool IsFeasible = !(PositiveContext.is_empty() ||
4092 PositiveContext.is_subset(NegativeContext));
4093 if (!IsFeasible)
4094 return false;
4096 auto DomainContext = getDomains().params();
4097 IsFeasible = !DomainContext.is_subset(NegativeContext);
4098 IsFeasible &= !Context.is_subset(NegativeContext);
4100 return IsFeasible;
4103 static std::string toString(AssumptionKind Kind) {
4104 switch (Kind) {
4105 case ALIASING:
4106 return "No-aliasing";
4107 case INBOUNDS:
4108 return "Inbounds";
4109 case WRAPPING:
4110 return "No-overflows";
4111 case UNSIGNED:
4112 return "Signed-unsigned";
4113 case COMPLEXITY:
4114 return "Low complexity";
4115 case PROFITABLE:
4116 return "Profitable";
4117 case ERRORBLOCK:
4118 return "No-error";
4119 case INFINITELOOP:
4120 return "Finite loop";
4121 case INVARIANTLOAD:
4122 return "Invariant load";
4123 case DELINEARIZATION:
4124 return "Delinearization";
4126 llvm_unreachable("Unknown AssumptionKind!");
4129 bool Scop::isEffectiveAssumption(isl::set Set, AssumptionSign Sign) {
4130 if (Sign == AS_ASSUMPTION) {
4131 if (Context.is_subset(Set))
4132 return false;
4134 if (AssumedContext.is_subset(Set))
4135 return false;
4136 } else {
4137 if (Set.is_disjoint(Context))
4138 return false;
4140 if (Set.is_subset(InvalidContext))
4141 return false;
4143 return true;
4146 bool Scop::trackAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
4147 AssumptionSign Sign, BasicBlock *BB) {
4148 if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign))
4149 return false;
4151 // Do never emit trivial assumptions as they only clutter the output.
4152 if (!PollyRemarksMinimal) {
4153 isl::set Univ;
4154 if (Sign == AS_ASSUMPTION)
4155 Univ = isl::set::universe(Set.get_space());
4157 bool IsTrivial = (Sign == AS_RESTRICTION && Set.is_empty()) ||
4158 (Sign == AS_ASSUMPTION && Univ.is_equal(Set));
4160 if (IsTrivial)
4161 return false;
4164 switch (Kind) {
4165 case ALIASING:
4166 AssumptionsAliasing++;
4167 break;
4168 case INBOUNDS:
4169 AssumptionsInbounds++;
4170 break;
4171 case WRAPPING:
4172 AssumptionsWrapping++;
4173 break;
4174 case UNSIGNED:
4175 AssumptionsUnsigned++;
4176 break;
4177 case COMPLEXITY:
4178 AssumptionsComplexity++;
4179 break;
4180 case PROFITABLE:
4181 AssumptionsUnprofitable++;
4182 break;
4183 case ERRORBLOCK:
4184 AssumptionsErrorBlock++;
4185 break;
4186 case INFINITELOOP:
4187 AssumptionsInfiniteLoop++;
4188 break;
4189 case INVARIANTLOAD:
4190 AssumptionsInvariantLoad++;
4191 break;
4192 case DELINEARIZATION:
4193 AssumptionsDelinearization++;
4194 break;
4197 auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t";
4198 std::string Msg = toString(Kind) + Suffix + Set.to_str();
4199 if (BB)
4200 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB)
4201 << Msg);
4202 else
4203 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc,
4204 R.getEntry())
4205 << Msg);
4206 return true;
4209 void Scop::addAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
4210 AssumptionSign Sign, BasicBlock *BB) {
4211 // Simplify the assumptions/restrictions first.
4212 Set = Set.gist_params(getContext());
4214 if (!trackAssumption(Kind, Set, Loc, Sign, BB))
4215 return;
4217 if (Sign == AS_ASSUMPTION)
4218 AssumedContext = AssumedContext.intersect(Set).coalesce();
4219 else
4220 InvalidContext = InvalidContext.unite(Set).coalesce();
4223 void Scop::recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
4224 AssumptionSign Sign, BasicBlock *BB) {
4225 assert((Set.is_params() || BB) &&
4226 "Assumptions without a basic block must be parameter sets");
4227 RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4230 void Scop::addRecordedAssumptions() {
4231 while (!RecordedAssumptions.empty()) {
4232 Assumption AS = RecordedAssumptions.pop_back_val();
4234 if (!AS.BB) {
4235 addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
4236 continue;
4239 // If the domain was deleted the assumptions are void.
4240 isl_set *Dom = getDomainConditions(AS.BB).release();
4241 if (!Dom)
4242 continue;
4244 // If a basic block was given use its domain to simplify the assumption.
4245 // In case of restrictions we know they only have to hold on the domain,
4246 // thus we can intersect them with the domain of the block. However, for
4247 // assumptions the domain has to imply them, thus:
4248 // _ _____
4249 // Dom => S <==> A v B <==> A - B
4251 // To avoid the complement we will register A - B as a restriction not an
4252 // assumption.
4253 isl_set *S = AS.Set.copy();
4254 if (AS.Sign == AS_RESTRICTION)
4255 S = isl_set_params(isl_set_intersect(S, Dom));
4256 else /* (AS.Sign == AS_ASSUMPTION) */
4257 S = isl_set_params(isl_set_subtract(Dom, S));
4259 addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB);
4263 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
4264 LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n");
4265 addAssumption(Kind, isl::set::empty(getParamSpace()), Loc, AS_ASSUMPTION, BB);
4268 isl::set Scop::getInvalidContext() const { return InvalidContext; }
4270 void Scop::printContext(raw_ostream &OS) const {
4271 OS << "Context:\n";
4272 OS.indent(4) << Context << "\n";
4274 OS.indent(4) << "Assumed Context:\n";
4275 OS.indent(4) << AssumedContext << "\n";
4277 OS.indent(4) << "Invalid Context:\n";
4278 OS.indent(4) << InvalidContext << "\n";
4280 unsigned Dim = 0;
4281 for (const SCEV *Parameter : Parameters)
4282 OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4285 void Scop::printAliasAssumptions(raw_ostream &OS) const {
4286 int noOfGroups = 0;
4287 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4288 if (Pair.second.size() == 0)
4289 noOfGroups += 1;
4290 else
4291 noOfGroups += Pair.second.size();
4294 OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4295 if (MinMaxAliasGroups.empty()) {
4296 OS.indent(8) << "n/a\n";
4297 return;
4300 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4302 // If the group has no read only accesses print the write accesses.
4303 if (Pair.second.empty()) {
4304 OS.indent(8) << "[[";
4305 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4306 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4307 << ">";
4309 OS << " ]]\n";
4312 for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4313 OS.indent(8) << "[[";
4314 OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4315 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4316 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4317 << ">";
4319 OS << " ]]\n";
4324 void Scop::printStatements(raw_ostream &OS, bool PrintInstructions) const {
4325 OS << "Statements {\n";
4327 for (const ScopStmt &Stmt : *this) {
4328 OS.indent(4);
4329 Stmt.print(OS, PrintInstructions);
4332 OS.indent(4) << "}\n";
4335 void Scop::printArrayInfo(raw_ostream &OS) const {
4336 OS << "Arrays {\n";
4338 for (auto &Array : arrays())
4339 Array->print(OS);
4341 OS.indent(4) << "}\n";
4343 OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4345 for (auto &Array : arrays())
4346 Array->print(OS, /* SizeAsPwAff */ true);
4348 OS.indent(4) << "}\n";
4351 void Scop::print(raw_ostream &OS, bool PrintInstructions) const {
4352 OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4353 OS.indent(4) << "Region: " << getNameStr() << "\n";
4354 OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4355 OS.indent(4) << "Invariant Accesses: {\n";
4356 for (const auto &IAClass : InvariantEquivClasses) {
4357 const auto &MAs = IAClass.InvariantAccesses;
4358 if (MAs.empty()) {
4359 OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4360 } else {
4361 MAs.front()->print(OS);
4362 OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4363 << "\n";
4366 OS.indent(4) << "}\n";
4367 printContext(OS.indent(4));
4368 printArrayInfo(OS.indent(4));
4369 printAliasAssumptions(OS);
4370 printStatements(OS.indent(4), PrintInstructions);
4373 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4374 LLVM_DUMP_METHOD void Scop::dump() const { print(dbgs(), true); }
4375 #endif
4377 isl::ctx Scop::getIslCtx() const { return IslCtx.get(); }
4379 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4380 bool NonNegative) {
4381 // First try to use the SCEVAffinator to generate a piecewise defined
4382 // affine function from @p E in the context of @p BB. If that tasks becomes to
4383 // complex the affinator might return a nullptr. In such a case we invalidate
4384 // the SCoP and return a dummy value. This way we do not need to add error
4385 // handling code to all users of this function.
4386 auto PWAC = Affinator.getPwAff(E, BB);
4387 if (PWAC.first) {
4388 // TODO: We could use a heuristic and either use:
4389 // SCEVAffinator::takeNonNegativeAssumption
4390 // or
4391 // SCEVAffinator::interpretAsUnsigned
4392 // to deal with unsigned or "NonNegative" SCEVs.
4393 if (NonNegative)
4394 Affinator.takeNonNegativeAssumption(PWAC);
4395 return PWAC;
4398 auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
4399 invalidate(COMPLEXITY, DL, BB);
4400 return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4403 isl::union_set Scop::getDomains() const {
4404 isl_space *EmptySpace = isl_space_params_alloc(getIslCtx().get(), 0);
4405 isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4407 for (const ScopStmt &Stmt : *this)
4408 Domain = isl_union_set_add_set(Domain, Stmt.getDomain().release());
4410 return isl::manage(Domain);
4413 isl::pw_aff Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4414 PWACtx PWAC = getPwAff(E, BB);
4415 return PWAC.first;
4418 isl::union_map
4419 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4420 isl::union_map Accesses = isl::union_map::empty(getParamSpace());
4422 for (ScopStmt &Stmt : *this) {
4423 for (MemoryAccess *MA : Stmt) {
4424 if (!Predicate(*MA))
4425 continue;
4427 isl::set Domain = Stmt.getDomain();
4428 isl::map AccessDomain = MA->getAccessRelation();
4429 AccessDomain = AccessDomain.intersect_domain(Domain);
4430 Accesses = Accesses.add_map(AccessDomain);
4434 return Accesses.coalesce();
4437 isl::union_map Scop::getMustWrites() {
4438 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4441 isl::union_map Scop::getMayWrites() {
4442 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4445 isl::union_map Scop::getWrites() {
4446 return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4449 isl::union_map Scop::getReads() {
4450 return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4453 isl::union_map Scop::getAccesses() {
4454 return getAccessesOfType([](MemoryAccess &MA) { return true; });
4457 isl::union_map Scop::getAccesses(ScopArrayInfo *Array) {
4458 return getAccessesOfType(
4459 [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; });
4462 // Check whether @p Node is an extension node.
4464 // @return true if @p Node is an extension node.
4465 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4466 if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4467 return isl_bool_error;
4468 else
4469 return isl_bool_true;
4472 bool Scop::containsExtensionNode(isl::schedule Schedule) {
4473 return isl_schedule_foreach_schedule_node_top_down(
4474 Schedule.get(), isNotExtNode, nullptr) == isl_stat_error;
4477 isl::union_map Scop::getSchedule() const {
4478 auto Tree = getScheduleTree();
4479 if (containsExtensionNode(Tree))
4480 return nullptr;
4482 return Tree.get_map();
4485 isl::schedule Scop::getScheduleTree() const {
4486 return Schedule.intersect_domain(getDomains());
4489 void Scop::setSchedule(isl::union_map NewSchedule) {
4490 auto S = isl::schedule::from_domain(getDomains());
4491 Schedule = S.insert_partial_schedule(
4492 isl::multi_union_pw_aff::from_union_map(NewSchedule));
4493 ScheduleModified = true;
4496 void Scop::setScheduleTree(isl::schedule NewSchedule) {
4497 Schedule = NewSchedule;
4498 ScheduleModified = true;
4501 bool Scop::restrictDomains(isl::union_set Domain) {
4502 bool Changed = false;
4503 for (ScopStmt &Stmt : *this) {
4504 isl::union_set StmtDomain = isl::union_set(Stmt.getDomain());
4505 isl::union_set NewStmtDomain = StmtDomain.intersect(Domain);
4507 if (StmtDomain.is_subset(NewStmtDomain))
4508 continue;
4510 Changed = true;
4512 NewStmtDomain = NewStmtDomain.coalesce();
4514 if (NewStmtDomain.is_empty())
4515 Stmt.restrictDomain(isl::set::empty(Stmt.getDomainSpace()));
4516 else
4517 Stmt.restrictDomain(isl::set(NewStmtDomain));
4519 return Changed;
4522 ScalarEvolution *Scop::getSE() const { return SE; }
4524 // Create an isl_multi_union_aff that defines an identity mapping from the
4525 // elements of USet to their N-th dimension.
4527 // # Example:
4529 // Domain: { A[i,j]; B[i,j,k] }
4530 // N: 1
4532 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4534 // @param USet A union set describing the elements for which to generate a
4535 // mapping.
4536 // @param N The dimension to map to.
4537 // @returns A mapping from USet to its N-th dimension.
4538 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
4539 assert(N >= 0);
4540 assert(USet);
4541 assert(!USet.is_empty());
4543 auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
4545 for (isl::set S : USet.get_set_list()) {
4546 int Dim = S.dim(isl::dim::set);
4547 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
4548 N, Dim - N);
4549 if (N > 1)
4550 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
4552 Result = Result.add_pw_multi_aff(PMA);
4555 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
4558 void Scop::addScopStmt(BasicBlock *BB, StringRef Name, Loop *SurroundingLoop,
4559 std::vector<Instruction *> Instructions) {
4560 assert(BB && "Unexpected nullptr!");
4561 Stmts.emplace_back(*this, *BB, Name, SurroundingLoop, Instructions);
4562 auto *Stmt = &Stmts.back();
4563 StmtMap[BB].push_back(Stmt);
4564 for (Instruction *Inst : Instructions) {
4565 assert(!InstStmtMap.count(Inst) &&
4566 "Unexpected statement corresponding to the instruction.");
4567 InstStmtMap[Inst] = Stmt;
4571 void Scop::addScopStmt(Region *R, StringRef Name, Loop *SurroundingLoop,
4572 std::vector<Instruction *> Instructions) {
4573 assert(R && "Unexpected nullptr!");
4574 Stmts.emplace_back(*this, *R, Name, SurroundingLoop, Instructions);
4575 auto *Stmt = &Stmts.back();
4577 for (Instruction *Inst : Instructions) {
4578 assert(!InstStmtMap.count(Inst) &&
4579 "Unexpected statement corresponding to the instruction.");
4580 InstStmtMap[Inst] = Stmt;
4583 for (BasicBlock *BB : R->blocks()) {
4584 StmtMap[BB].push_back(Stmt);
4585 if (BB == R->getEntry())
4586 continue;
4587 for (Instruction &Inst : *BB) {
4588 assert(!InstStmtMap.count(&Inst) &&
4589 "Unexpected statement corresponding to the instruction.");
4590 InstStmtMap[&Inst] = Stmt;
4595 ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel,
4596 isl::set Domain) {
4597 #ifndef NDEBUG
4598 isl::set SourceDomain = SourceRel.domain();
4599 isl::set TargetDomain = TargetRel.domain();
4600 assert(Domain.is_subset(TargetDomain) &&
4601 "Target access not defined for complete statement domain");
4602 assert(Domain.is_subset(SourceDomain) &&
4603 "Source access not defined for complete statement domain");
4604 #endif
4605 Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4606 CopyStmtsNum++;
4607 return &(Stmts.back());
4610 void Scop::buildSchedule(LoopInfo &LI) {
4611 Loop *L = getLoopSurroundingScop(*this, LI);
4612 LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4613 buildSchedule(getRegion().getNode(), LoopStack, LI);
4614 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4615 Schedule = LoopStack[0].Schedule;
4618 /// To generate a schedule for the elements in a Region we traverse the Region
4619 /// in reverse-post-order and add the contained RegionNodes in traversal order
4620 /// to the schedule of the loop that is currently at the top of the LoopStack.
4621 /// For loop-free codes, this results in a correct sequential ordering.
4623 /// Example:
4624 /// bb1(0)
4625 /// / \.
4626 /// bb2(1) bb3(2)
4627 /// \ / \.
4628 /// bb4(3) bb5(4)
4629 /// \ /
4630 /// bb6(5)
4632 /// Including loops requires additional processing. Whenever a loop header is
4633 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4634 /// from an empty schedule, we first process all RegionNodes that are within
4635 /// this loop and complete the sequential schedule at this loop-level before
4636 /// processing about any other nodes. To implement this
4637 /// loop-nodes-first-processing, the reverse post-order traversal is
4638 /// insufficient. Hence, we additionally check if the traversal yields
4639 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4640 /// These region-nodes are then queue and only traverse after the all nodes
4641 /// within the current loop have been processed.
4642 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4643 Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4645 ReversePostOrderTraversal<Region *> RTraversal(R);
4646 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4647 std::deque<RegionNode *> DelayList;
4648 bool LastRNWaiting = false;
4650 // Iterate over the region @p R in reverse post-order but queue
4651 // sub-regions/blocks iff they are not part of the last encountered but not
4652 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4653 // that we queued the last sub-region/block from the reverse post-order
4654 // iterator. If it is set we have to explore the next sub-region/block from
4655 // the iterator (if any) to guarantee progress. If it is not set we first try
4656 // the next queued sub-region/blocks.
4657 while (!WorkList.empty() || !DelayList.empty()) {
4658 RegionNode *RN;
4660 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
4661 RN = WorkList.front();
4662 WorkList.pop_front();
4663 LastRNWaiting = false;
4664 } else {
4665 RN = DelayList.front();
4666 DelayList.pop_front();
4669 Loop *L = getRegionNodeLoop(RN, LI);
4670 if (!contains(L))
4671 L = OuterScopLoop;
4673 Loop *LastLoop = LoopStack.back().L;
4674 if (LastLoop != L) {
4675 if (LastLoop && !LastLoop->contains(L)) {
4676 LastRNWaiting = true;
4677 DelayList.push_back(RN);
4678 continue;
4680 LoopStack.push_back({L, nullptr, 0});
4682 buildSchedule(RN, LoopStack, LI);
4686 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
4687 if (RN->isSubRegion()) {
4688 auto *LocalRegion = RN->getNodeAs<Region>();
4689 if (!isNonAffineSubRegion(LocalRegion)) {
4690 buildSchedule(LocalRegion, LoopStack, LI);
4691 return;
4695 assert(LoopStack.rbegin() != LoopStack.rend());
4696 auto LoopData = LoopStack.rbegin();
4697 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
4699 for (auto *Stmt : getStmtListFor(RN)) {
4700 isl::union_set UDomain{Stmt->getDomain()};
4701 auto StmtSchedule = isl::schedule::from_domain(UDomain);
4702 LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
4705 // Check if we just processed the last node in this loop. If we did, finalize
4706 // the loop by:
4708 // - adding new schedule dimensions
4709 // - folding the resulting schedule into the parent loop schedule
4710 // - dropping the loop schedule from the LoopStack.
4712 // Then continue to check surrounding loops, which might also have been
4713 // completed by this node.
4714 size_t Dimension = LoopStack.size();
4715 while (LoopData->L &&
4716 LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
4717 isl::schedule Schedule = LoopData->Schedule;
4718 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
4720 assert(std::next(LoopData) != LoopStack.rend());
4721 ++LoopData;
4722 --Dimension;
4724 if (Schedule) {
4725 isl::union_set Domain = Schedule.get_domain();
4726 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
4727 Schedule = Schedule.insert_partial_schedule(MUPA);
4728 LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
4731 LoopData->NumBlocksProcessed += NumBlocksProcessed;
4733 // Now pop all loops processed up there from the LoopStack
4734 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
4737 ArrayRef<ScopStmt *> Scop::getStmtListFor(BasicBlock *BB) const {
4738 auto StmtMapIt = StmtMap.find(BB);
4739 if (StmtMapIt == StmtMap.end())
4740 return {};
4741 return StmtMapIt->second;
4744 ScopStmt *Scop::getIncomingStmtFor(const Use &U) const {
4745 auto *PHI = cast<PHINode>(U.getUser());
4746 BasicBlock *IncomingBB = PHI->getIncomingBlock(U);
4748 // If the value is a non-synthesizable from the incoming block, use the
4749 // statement that contains it as user statement.
4750 if (auto *IncomingInst = dyn_cast<Instruction>(U.get())) {
4751 if (IncomingInst->getParent() == IncomingBB) {
4752 if (ScopStmt *IncomingStmt = getStmtFor(IncomingInst))
4753 return IncomingStmt;
4757 // Otherwise, use the epilogue/last statement.
4758 return getLastStmtFor(IncomingBB);
4761 ScopStmt *Scop::getLastStmtFor(BasicBlock *BB) const {
4762 ArrayRef<ScopStmt *> StmtList = getStmtListFor(BB);
4763 if (!StmtList.empty())
4764 return StmtList.back();
4765 return nullptr;
4768 ArrayRef<ScopStmt *> Scop::getStmtListFor(RegionNode *RN) const {
4769 if (RN->isSubRegion())
4770 return getStmtListFor(RN->getNodeAs<Region>());
4771 return getStmtListFor(RN->getNodeAs<BasicBlock>());
4774 ArrayRef<ScopStmt *> Scop::getStmtListFor(Region *R) const {
4775 return getStmtListFor(R->getEntry());
4778 int Scop::getRelativeLoopDepth(const Loop *L) const {
4779 if (!L || !R.contains(L))
4780 return -1;
4781 // outermostLoopInRegion always returns nullptr for top level regions
4782 if (R.isTopLevelRegion()) {
4783 // LoopInfo's depths start at 1, we start at 0
4784 return L->getLoopDepth() - 1;
4785 } else {
4786 Loop *OuterLoop = R.outermostLoopInRegion(const_cast<Loop *>(L));
4787 assert(OuterLoop);
4788 return L->getLoopDepth() - OuterLoop->getLoopDepth();
4792 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
4793 for (auto &SAI : arrays()) {
4794 if (SAI->getName() == BaseName)
4795 return SAI;
4797 return nullptr;
4800 void Scop::addAccessData(MemoryAccess *Access) {
4801 const ScopArrayInfo *SAI = Access->getOriginalScopArrayInfo();
4802 assert(SAI && "can only use after access relations have been constructed");
4804 if (Access->isOriginalValueKind() && Access->isRead())
4805 ValueUseAccs[SAI].push_back(Access);
4806 else if (Access->isOriginalAnyPHIKind() && Access->isWrite())
4807 PHIIncomingAccs[SAI].push_back(Access);
4810 void Scop::removeAccessData(MemoryAccess *Access) {
4811 if (Access->isOriginalValueKind() && Access->isWrite()) {
4812 ValueDefAccs.erase(Access->getAccessValue());
4813 } else if (Access->isOriginalValueKind() && Access->isRead()) {
4814 auto &Uses = ValueUseAccs[Access->getScopArrayInfo()];
4815 auto NewEnd = std::remove(Uses.begin(), Uses.end(), Access);
4816 Uses.erase(NewEnd, Uses.end());
4817 } else if (Access->isOriginalPHIKind() && Access->isRead()) {
4818 PHINode *PHI = cast<PHINode>(Access->getAccessInstruction());
4819 PHIReadAccs.erase(PHI);
4820 } else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) {
4821 auto &Incomings = PHIIncomingAccs[Access->getScopArrayInfo()];
4822 auto NewEnd = std::remove(Incomings.begin(), Incomings.end(), Access);
4823 Incomings.erase(NewEnd, Incomings.end());
4827 MemoryAccess *Scop::getValueDef(const ScopArrayInfo *SAI) const {
4828 assert(SAI->isValueKind());
4830 Instruction *Val = dyn_cast<Instruction>(SAI->getBasePtr());
4831 if (!Val)
4832 return nullptr;
4834 return ValueDefAccs.lookup(Val);
4837 ArrayRef<MemoryAccess *> Scop::getValueUses(const ScopArrayInfo *SAI) const {
4838 assert(SAI->isValueKind());
4839 auto It = ValueUseAccs.find(SAI);
4840 if (It == ValueUseAccs.end())
4841 return {};
4842 return It->second;
4845 MemoryAccess *Scop::getPHIRead(const ScopArrayInfo *SAI) const {
4846 assert(SAI->isPHIKind() || SAI->isExitPHIKind());
4848 if (SAI->isExitPHIKind())
4849 return nullptr;
4851 PHINode *PHI = cast<PHINode>(SAI->getBasePtr());
4852 return PHIReadAccs.lookup(PHI);
4855 ArrayRef<MemoryAccess *> Scop::getPHIIncomings(const ScopArrayInfo *SAI) const {
4856 assert(SAI->isPHIKind() || SAI->isExitPHIKind());
4857 auto It = PHIIncomingAccs.find(SAI);
4858 if (It == PHIIncomingAccs.end())
4859 return {};
4860 return It->second;
4863 bool Scop::isEscaping(Instruction *Inst) {
4864 assert(contains(Inst) && "The concept of escaping makes only sense for "
4865 "values defined inside the SCoP");
4867 for (Use &Use : Inst->uses()) {
4868 BasicBlock *UserBB = getUseBlock(Use);
4869 if (!contains(UserBB))
4870 return true;
4872 // When the SCoP region exit needs to be simplified, PHIs in the region exit
4873 // move to a new basic block such that its incoming blocks are not in the
4874 // SCoP anymore.
4875 if (hasSingleExitEdge() && isa<PHINode>(Use.getUser()) &&
4876 isExit(cast<PHINode>(Use.getUser())->getParent()))
4877 return true;
4879 return false;
4882 Scop::ScopStatistics Scop::getStatistics() const {
4883 ScopStatistics Result;
4884 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4885 auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
4887 int NumTotalLoops = LoopStat.NumLoops;
4888 Result.NumBoxedLoops = getBoxedLoops().size();
4889 Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
4891 for (const ScopStmt &Stmt : *this) {
4892 isl::set Domain = Stmt.getDomain().intersect_params(getContext());
4893 bool IsInLoop = Stmt.getNumIterators() >= 1;
4894 for (MemoryAccess *MA : Stmt) {
4895 if (!MA->isWrite())
4896 continue;
4898 if (MA->isLatestValueKind()) {
4899 Result.NumValueWrites += 1;
4900 if (IsInLoop)
4901 Result.NumValueWritesInLoops += 1;
4904 if (MA->isLatestAnyPHIKind()) {
4905 Result.NumPHIWrites += 1;
4906 if (IsInLoop)
4907 Result.NumPHIWritesInLoops += 1;
4910 isl::set AccSet =
4911 MA->getAccessRelation().intersect_domain(Domain).range();
4912 if (AccSet.is_singleton()) {
4913 Result.NumSingletonWrites += 1;
4914 if (IsInLoop)
4915 Result.NumSingletonWritesInLoops += 1;
4919 #endif
4920 return Result;
4923 raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
4924 scop.print(OS, PollyPrintInstructions);
4925 return OS;
4928 //===----------------------------------------------------------------------===//
4929 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
4930 AU.addRequired<LoopInfoWrapperPass>();
4931 AU.addRequired<RegionInfoPass>();
4932 AU.addRequired<DominatorTreeWrapperPass>();
4933 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
4934 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
4935 AU.addRequired<AAResultsWrapperPass>();
4936 AU.addRequired<AssumptionCacheTracker>();
4937 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
4938 AU.setPreservesAll();
4941 void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
4942 Scop::ScopStatistics ScopStats) {
4943 assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
4945 NumScops++;
4946 NumLoopsInScop += Stats.NumLoops;
4947 MaxNumLoopsInScop =
4948 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
4950 if (Stats.MaxDepth == 0)
4951 NumScopsDepthZero++;
4952 else if (Stats.MaxDepth == 1)
4953 NumScopsDepthOne++;
4954 else if (Stats.MaxDepth == 2)
4955 NumScopsDepthTwo++;
4956 else if (Stats.MaxDepth == 3)
4957 NumScopsDepthThree++;
4958 else if (Stats.MaxDepth == 4)
4959 NumScopsDepthFour++;
4960 else if (Stats.MaxDepth == 5)
4961 NumScopsDepthFive++;
4962 else
4963 NumScopsDepthLarger++;
4965 NumAffineLoops += ScopStats.NumAffineLoops;
4966 NumBoxedLoops += ScopStats.NumBoxedLoops;
4968 NumValueWrites += ScopStats.NumValueWrites;
4969 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
4970 NumPHIWrites += ScopStats.NumPHIWrites;
4971 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
4972 NumSingletonWrites += ScopStats.NumSingletonWrites;
4973 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
4976 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
4977 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
4979 if (!SD.isMaxRegionInScop(*R))
4980 return false;
4982 Function *F = R->getEntry()->getParent();
4983 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4984 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
4985 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4986 auto const &DL = F->getParent()->getDataLayout();
4987 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4988 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
4989 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4991 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
4992 S = SB.getScop(); // take ownership of scop object
4994 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
4995 if (S) {
4996 ScopDetection::LoopStats Stats =
4997 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
4998 updateLoopCountStatistic(Stats, S->getStatistics());
5000 #endif
5002 return false;
5005 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
5006 if (S)
5007 S->print(OS, PollyPrintInstructions);
5008 else
5009 OS << "Invalid Scop!\n";
5012 char ScopInfoRegionPass::ID = 0;
5014 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5016 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops",
5017 "Polly - Create polyhedral description of Scops", false,
5018 false);
5019 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5020 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5021 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5022 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5023 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5024 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5025 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5026 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
5027 "Polly - Create polyhedral description of Scops", false,
5028 false)
5030 //===----------------------------------------------------------------------===//
5031 ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE,
5032 LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT,
5033 AssumptionCache &AC, OptimizationRemarkEmitter &ORE)
5034 : DL(DL), SD(SD), SE(SE), LI(LI), AA(AA), DT(DT), AC(AC), ORE(ORE) {
5035 recompute();
5038 void ScopInfo::recompute() {
5039 RegionToScopMap.clear();
5040 /// Create polyhedral description of scops for all the valid regions of a
5041 /// function.
5042 for (auto &It : SD) {
5043 Region *R = const_cast<Region *>(It);
5044 if (!SD.isMaxRegionInScop(*R))
5045 continue;
5047 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5048 std::unique_ptr<Scop> S = SB.getScop();
5049 if (!S)
5050 continue;
5051 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5052 ScopDetection::LoopStats Stats =
5053 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5054 updateLoopCountStatistic(Stats, S->getStatistics());
5055 #endif
5056 bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
5057 assert(Inserted && "Building Scop for the same region twice!");
5058 (void)Inserted;
5062 bool ScopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
5063 FunctionAnalysisManager::Invalidator &Inv) {
5064 // Check whether the analysis, all analyses on functions have been preserved
5065 // or anything we're holding references to is being invalidated
5066 auto PAC = PA.getChecker<ScopInfoAnalysis>();
5067 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
5068 Inv.invalidate<ScopAnalysis>(F, PA) ||
5069 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
5070 Inv.invalidate<LoopAnalysis>(F, PA) ||
5071 Inv.invalidate<AAManager>(F, PA) ||
5072 Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
5073 Inv.invalidate<AssumptionAnalysis>(F, PA);
5076 AnalysisKey ScopInfoAnalysis::Key;
5078 ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F,
5079 FunctionAnalysisManager &FAM) {
5080 auto &SD = FAM.getResult<ScopAnalysis>(F);
5081 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
5082 auto &LI = FAM.getResult<LoopAnalysis>(F);
5083 auto &AA = FAM.getResult<AAManager>(F);
5084 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
5085 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
5086 auto &DL = F.getParent()->getDataLayout();
5087 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5088 return {DL, SD, SE, LI, AA, DT, AC, ORE};
5091 PreservedAnalyses ScopInfoPrinterPass::run(Function &F,
5092 FunctionAnalysisManager &FAM) {
5093 auto &SI = FAM.getResult<ScopInfoAnalysis>(F);
5094 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5095 // order here to keep the output persistent
5096 for (auto &It : reverse(SI)) {
5097 if (It.second)
5098 It.second->print(Stream, PollyPrintInstructions);
5099 else
5100 Stream << "Invalid Scop!\n";
5102 return PreservedAnalyses::all();
5105 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5106 AU.addRequired<LoopInfoWrapperPass>();
5107 AU.addRequired<RegionInfoPass>();
5108 AU.addRequired<DominatorTreeWrapperPass>();
5109 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5110 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5111 AU.addRequired<AAResultsWrapperPass>();
5112 AU.addRequired<AssumptionCacheTracker>();
5113 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5114 AU.setPreservesAll();
5117 bool ScopInfoWrapperPass::runOnFunction(Function &F) {
5118 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5119 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5120 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5121 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5122 auto const &DL = F.getParent()->getDataLayout();
5123 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5124 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5125 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5127 Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC, ORE});
5128 return false;
5131 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
5132 for (auto &It : *Result) {
5133 if (It.second)
5134 It.second->print(OS, PollyPrintInstructions);
5135 else
5136 OS << "Invalid Scop!\n";
5140 char ScopInfoWrapperPass::ID = 0;
5142 Pass *polly::createScopInfoWrapperPassPass() {
5143 return new ScopInfoWrapperPass();
5146 INITIALIZE_PASS_BEGIN(
5147 ScopInfoWrapperPass, "polly-function-scops",
5148 "Polly - Create polyhedral description of all Scops of a function", false,
5149 false);
5150 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5151 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5152 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5153 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5154 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5155 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5156 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5157 INITIALIZE_PASS_END(
5158 ScopInfoWrapperPass, "polly-function-scops",
5159 "Polly - Create polyhedral description of all Scops of a function", false,
5160 false)