[ScopBuilder] Report to dbgs() on SCoP bailout. NFC.
[polly-mirror.git] / lib / Analysis / ScopInfo.cpp
blob813a569499f539c1f13b1d5c8d33f88d34d844f3
1 //===- ScopInfo.cpp -------------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a static control flow region.
12 // The pass creates a polyhedral description of the Scops detected by the Scop
13 // detection derived from their LLVM-IR code.
15 // This representation is shared among several tools in the polyhedral
16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite.
18 //===----------------------------------------------------------------------===//
20 #include "polly/ScopInfo.h"
21 #include "polly/LinkAllPasses.h"
22 #include "polly/Options.h"
23 #include "polly/ScopBuilder.h"
24 #include "polly/ScopDetection.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/ISLOStream.h"
27 #include "polly/Support/SCEVAffinator.h"
28 #include "polly/Support/SCEVValidator.h"
29 #include "polly/Support/ScopHelper.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/ADT/StringExtras.h"
42 #include "llvm/ADT/StringMap.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/Loads.h"
47 #include "llvm/Analysis/LoopInfo.h"
48 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
49 #include "llvm/Analysis/RegionInfo.h"
50 #include "llvm/Analysis/RegionIterator.h"
51 #include "llvm/Analysis/ScalarEvolution.h"
52 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
53 #include "llvm/IR/Argument.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/ConstantRange.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugLoc.h"
60 #include "llvm/IR/DerivedTypes.h"
61 #include "llvm/IR/DiagnosticInfo.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/MathExtras.h"
81 #include "llvm/Support/raw_ostream.h"
82 #include "isl/aff.h"
83 #include "isl/constraint.h"
84 #include "isl/local_space.h"
85 #include "isl/map.h"
86 #include "isl/options.h"
87 #include "isl/printer.h"
88 #include "isl/schedule.h"
89 #include "isl/schedule_node.h"
90 #include "isl/set.h"
91 #include "isl/union_map.h"
92 #include "isl/union_set.h"
93 #include "isl/val.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdlib>
97 #include <cstring>
98 #include <deque>
99 #include <iterator>
100 #include <memory>
101 #include <string>
102 #include <tuple>
103 #include <utility>
104 #include <vector>
106 using namespace llvm;
107 using namespace polly;
109 #define DEBUG_TYPE "polly-scops"
111 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
112 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
113 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
114 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
115 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
116 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
117 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
118 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
119 STATISTIC(AssumptionsInvariantLoad,
120 "Number of invariant loads assumptions taken.");
121 STATISTIC(AssumptionsDelinearization,
122 "Number of delinearization assumptions taken.");
124 STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
125 STATISTIC(NumLoopsInScop, "Number of loops in scops");
126 STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
127 STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
129 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
130 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
131 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
132 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
133 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
134 STATISTIC(NumScopsDepthLarger,
135 "Number of scops with maximal loop depth 6 and larger");
136 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
138 STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
139 STATISTIC(
140 NumValueWritesInLoops,
141 "Number of scalar value writes nested in affine loops after ScopInfo");
142 STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
143 STATISTIC(NumPHIWritesInLoops,
144 "Number of scalar phi writes nested in affine loops after ScopInfo");
145 STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
146 STATISTIC(NumSingletonWritesInLoops,
147 "Number of singleton writes nested in affine loops after ScopInfo");
149 // The maximal number of basic sets we allow during domain construction to
150 // be created. More complex scops will result in very high compile time and
151 // are also unlikely to result in good code
152 static int const MaxDisjunctsInDomain = 20;
154 // The number of disjunct in the context after which we stop to add more
155 // disjuncts. This parameter is there to avoid exponential growth in the
156 // number of disjunct when adding non-convex sets to the context.
157 static int const MaxDisjunctsInContext = 4;
159 // The maximal number of dimensions we allow during invariant load construction.
160 // More complex access ranges will result in very high compile time and are also
161 // unlikely to result in good code. This value is very high and should only
162 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
163 static int const MaxDimensionsInAccessRange = 9;
165 static cl::opt<int>
166 OptComputeOut("polly-analysis-computeout",
167 cl::desc("Bound the scop analysis by a maximal amount of "
168 "computational steps (0 means no bound)"),
169 cl::Hidden, cl::init(800000), cl::ZeroOrMore,
170 cl::cat(PollyCategory));
172 static cl::opt<bool> PollyRemarksMinimal(
173 "polly-remarks-minimal",
174 cl::desc("Do not emit remarks about assumptions that are known"),
175 cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
177 // Multiplicative reductions can be disabled separately as these kind of
178 // operations can overflow easily. Additive reductions and bit operations
179 // are in contrast pretty stable.
180 static cl::opt<bool> DisableMultiplicativeReductions(
181 "polly-disable-multiplicative-reductions",
182 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
183 cl::init(false), cl::cat(PollyCategory));
185 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
186 "polly-rtc-max-array-disjuncts",
187 cl::desc("The maximal number of disjunts allowed in memory accesses to "
188 "to build RTCs."),
189 cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
191 static cl::opt<unsigned> RunTimeChecksMaxParameters(
192 "polly-rtc-max-parameters",
193 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
194 cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
196 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
197 "polly-rtc-max-arrays-per-group",
198 cl::desc("The maximal number of arrays to compare in each alias group."),
199 cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
201 static cl::opt<std::string> UserContextStr(
202 "polly-context", cl::value_desc("isl parameter set"),
203 cl::desc("Provide additional constraints on the context parameters"),
204 cl::init(""), cl::cat(PollyCategory));
206 static cl::opt<bool> DetectReductions("polly-detect-reductions",
207 cl::desc("Detect and exploit reductions"),
208 cl::Hidden, cl::ZeroOrMore,
209 cl::init(true), cl::cat(PollyCategory));
211 static cl::opt<bool>
212 IslOnErrorAbort("polly-on-isl-error-abort",
213 cl::desc("Abort if an isl error is encountered"),
214 cl::init(true), cl::cat(PollyCategory));
216 static cl::opt<bool> PollyPreciseInbounds(
217 "polly-precise-inbounds",
218 cl::desc("Take more precise inbounds assumptions (do not scale well)"),
219 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
221 static cl::opt<bool>
222 PollyIgnoreInbounds("polly-ignore-inbounds",
223 cl::desc("Do not take inbounds assumptions at all"),
224 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
226 static cl::opt<bool> PollyIgnoreParamBounds(
227 "polly-ignore-parameter-bounds",
228 cl::desc(
229 "Do not add parameter bounds and do no gist simplify sets accordingly"),
230 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
232 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
233 "polly-allow-dereference-of-all-function-parameters",
234 cl::desc(
235 "Treat all parameters to functions that are pointers as dereferencible."
236 " This is useful for invariant load hoisting, since we can generate"
237 " less runtime checks. This is only valid if all pointers to functions"
238 " are always initialized, so that Polly can choose to hoist"
239 " their loads. "),
240 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
242 static cl::opt<bool> PollyPreciseFoldAccesses(
243 "polly-precise-fold-accesses",
244 cl::desc("Fold memory accesses to model more possible delinearizations "
245 "(does not scale well)"),
246 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
248 bool polly::UseInstructionNames;
250 static cl::opt<bool, true> XUseInstructionNames(
251 "polly-use-llvm-names",
252 cl::desc("Use LLVM-IR names when deriving statement names"),
253 cl::location(UseInstructionNames), cl::Hidden, cl::init(false),
254 cl::ZeroOrMore, cl::cat(PollyCategory));
256 static cl::opt<bool> PollyPrintInstructions(
257 "polly-print-instructions", cl::desc("Output instructions per ScopStmt"),
258 cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory));
260 //===----------------------------------------------------------------------===//
262 // Create a sequence of two schedules. Either argument may be null and is
263 // interpreted as the empty schedule. Can also return null if both schedules are
264 // empty.
265 static __isl_give isl_schedule *
266 combineInSequence(__isl_take isl_schedule *Prev,
267 __isl_take isl_schedule *Succ) {
268 if (!Prev)
269 return Succ;
270 if (!Succ)
271 return Prev;
273 return isl_schedule_sequence(Prev, Succ);
276 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
277 int dim, isl::dim type) {
278 isl::val V;
279 isl::ctx Ctx = S.get_ctx();
281 // The upper and lower bound for a parameter value is derived either from
282 // the data type of the parameter or from the - possibly more restrictive -
283 // range metadata.
284 V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true);
285 S = S.lower_bound_val(type, dim, V);
286 V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true);
287 S = S.upper_bound_val(type, dim, V);
289 if (Range.isFullSet())
290 return S;
292 if (isl_set_n_basic_set(S.get()) > MaxDisjunctsInContext)
293 return S;
295 // In case of signed wrapping, we can refine the set of valid values by
296 // excluding the part not covered by the wrapping range.
297 if (Range.isSignWrappedSet()) {
298 V = valFromAPInt(Ctx.get(), Range.getLower(), true);
299 isl::set SLB = S.lower_bound_val(type, dim, V);
301 V = valFromAPInt(Ctx.get(), Range.getUpper(), true);
302 V = V.sub_ui(1);
303 isl::set SUB = S.upper_bound_val(type, dim, V);
304 S = SLB.unite(SUB);
307 return S;
310 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
311 LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
312 if (!BasePtrLI)
313 return nullptr;
315 if (!S->contains(BasePtrLI))
316 return nullptr;
318 ScalarEvolution &SE = *S->getSE();
320 auto *OriginBaseSCEV =
321 SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
322 if (!OriginBaseSCEV)
323 return nullptr;
325 auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
326 if (!OriginBaseSCEVUnknown)
327 return nullptr;
329 return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
330 MemoryKind::Array);
333 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx,
334 ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
335 const DataLayout &DL, Scop *S,
336 const char *BaseName)
337 : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
338 std::string BasePtrName =
339 BaseName ? BaseName
340 : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
341 Kind == MemoryKind::PHI ? "__phi" : "",
342 UseInstructionNames);
343 Id = isl::id::alloc(Ctx, BasePtrName, this);
345 updateSizes(Sizes);
347 if (!BasePtr || Kind != MemoryKind::Array) {
348 BasePtrOriginSAI = nullptr;
349 return;
352 BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
353 if (BasePtrOriginSAI)
354 const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
357 ScopArrayInfo::~ScopArrayInfo() = default;
359 isl::space ScopArrayInfo::getSpace() const {
360 auto Space = isl::space(Id.get_ctx(), 0, getNumberOfDimensions());
361 Space = Space.set_tuple_id(isl::dim::set, Id);
362 return Space;
365 bool ScopArrayInfo::isReadOnly() {
366 isl::union_set WriteSet = S.getWrites().range();
367 isl::space Space = getSpace();
368 WriteSet = WriteSet.extract_set(Space);
370 return bool(WriteSet.is_empty());
373 bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const {
374 if (Array->getElementType() != getElementType())
375 return false;
377 if (Array->getNumberOfDimensions() != getNumberOfDimensions())
378 return false;
380 for (unsigned i = 0; i < getNumberOfDimensions(); i++)
381 if (Array->getDimensionSize(i) != getDimensionSize(i))
382 return false;
384 return true;
387 void ScopArrayInfo::updateElementType(Type *NewElementType) {
388 if (NewElementType == ElementType)
389 return;
391 auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
392 auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
394 if (NewElementSize == OldElementSize || NewElementSize == 0)
395 return;
397 if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) {
398 ElementType = NewElementType;
399 } else {
400 auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
401 ElementType = IntegerType::get(ElementType->getContext(), GCD);
405 /// Make the ScopArrayInfo model a Fortran Array
406 void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
407 assert(FAD && "got invalid Fortran array descriptor");
408 if (this->FAD) {
409 assert(this->FAD == FAD &&
410 "receiving different array descriptors for same array");
411 return;
414 assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
415 assert(!this->FAD);
416 this->FAD = FAD;
418 isl::space Space(S.getIslCtx(), 1, 0);
420 std::string param_name = getName();
421 param_name += "_fortranarr_size";
422 isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this);
424 Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff);
425 isl::pw_aff PwAff =
426 isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0);
428 DimensionSizesPw[0] = PwAff;
431 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
432 bool CheckConsistency) {
433 int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
434 int ExtraDimsNew = NewSizes.size() - SharedDims;
435 int ExtraDimsOld = DimensionSizes.size() - SharedDims;
437 if (CheckConsistency) {
438 for (int i = 0; i < SharedDims; i++) {
439 auto *NewSize = NewSizes[i + ExtraDimsNew];
440 auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
441 if (NewSize && KnownSize && NewSize != KnownSize)
442 return false;
445 if (DimensionSizes.size() >= NewSizes.size())
446 return true;
449 DimensionSizes.clear();
450 DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
451 NewSizes.end());
452 DimensionSizesPw.clear();
453 for (const SCEV *Expr : DimensionSizes) {
454 if (!Expr) {
455 DimensionSizesPw.push_back(nullptr);
456 continue;
458 isl::pw_aff Size = S.getPwAffOnly(Expr);
459 DimensionSizesPw.push_back(Size);
461 return true;
464 std::string ScopArrayInfo::getName() const { return Id.get_name(); }
466 int ScopArrayInfo::getElemSizeInBytes() const {
467 return DL.getTypeAllocSize(ElementType);
470 isl::id ScopArrayInfo::getBasePtrId() const { return Id; }
472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
473 LLVM_DUMP_METHOD void ScopArrayInfo::dump() const { print(errs()); }
474 #endif
476 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
477 OS.indent(8) << *getElementType() << " " << getName();
478 unsigned u = 0;
479 // If this is a Fortran array, then we can print the outermost dimension
480 // as a isl_pw_aff even though there is no SCEV information.
481 bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
483 if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
484 !getDimensionSize(0)) {
485 OS << "[*]";
486 u++;
488 for (; u < getNumberOfDimensions(); u++) {
489 OS << "[";
491 if (SizeAsPwAff) {
492 isl::pw_aff Size = getDimensionSizePw(u);
493 OS << " " << Size << " ";
494 } else {
495 OS << *getDimensionSize(u);
498 OS << "]";
501 OS << ";";
503 if (BasePtrOriginSAI)
504 OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
506 OS << " // Element size " << getElemSizeInBytes() << "\n";
509 const ScopArrayInfo *
510 ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA) {
511 isl::id Id = PMA.get_tuple_id(isl::dim::out);
512 assert(!Id.is_null() && "Output dimension didn't have an ID");
513 return getFromId(Id);
516 const ScopArrayInfo *ScopArrayInfo::getFromId(isl::id Id) {
517 void *User = Id.get_user();
518 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
519 return SAI;
522 void MemoryAccess::wrapConstantDimensions() {
523 auto *SAI = getScopArrayInfo();
524 isl::space ArraySpace = SAI->getSpace();
525 isl::ctx Ctx = ArraySpace.get_ctx();
526 unsigned DimsArray = SAI->getNumberOfDimensions();
528 isl::multi_aff DivModAff = isl::multi_aff::identity(
529 ArraySpace.map_from_domain_and_range(ArraySpace));
530 isl::local_space LArraySpace = isl::local_space(ArraySpace);
532 // Begin with last dimension, to iteratively carry into higher dimensions.
533 for (int i = DimsArray - 1; i > 0; i--) {
534 auto *DimSize = SAI->getDimensionSize(i);
535 auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
537 // This transformation is not applicable to dimensions with dynamic size.
538 if (!DimSizeCst)
539 continue;
541 // This transformation is not applicable to dimensions of size zero.
542 if (DimSize->isZero())
543 continue;
545 isl::val DimSizeVal =
546 valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false);
547 isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i);
548 isl::aff PrevVar =
549 isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1);
551 // Compute: index % size
552 // Modulo must apply in the divide of the previous iteration, if any.
553 isl::aff Modulo = Var.mod(DimSizeVal);
554 Modulo = Modulo.pullback(DivModAff);
556 // Compute: floor(index / size)
557 isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal));
558 Divide = Divide.floor();
559 Divide = Divide.add(PrevVar);
560 Divide = Divide.pullback(DivModAff);
562 // Apply Modulo and Divide.
563 DivModAff = DivModAff.set_aff(i, Modulo);
564 DivModAff = DivModAff.set_aff(i - 1, Divide);
567 // Apply all modulo/divides on the accesses.
568 isl::map Relation = AccessRelation;
569 Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff));
570 Relation = Relation.detect_equalities();
571 AccessRelation = Relation;
574 void MemoryAccess::updateDimensionality() {
575 auto *SAI = getScopArrayInfo();
576 isl::space ArraySpace = SAI->getSpace();
577 isl::space AccessSpace = AccessRelation.get_space().range();
578 isl::ctx Ctx = ArraySpace.get_ctx();
580 auto DimsArray = ArraySpace.dim(isl::dim::set);
581 auto DimsAccess = AccessSpace.dim(isl::dim::set);
582 auto DimsMissing = DimsArray - DimsAccess;
584 auto *BB = getStatement()->getEntryBlock();
585 auto &DL = BB->getModule()->getDataLayout();
586 unsigned ArrayElemSize = SAI->getElemSizeInBytes();
587 unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
589 isl::map Map = isl::map::from_domain_and_range(
590 isl::set::universe(AccessSpace), isl::set::universe(ArraySpace));
592 for (unsigned i = 0; i < DimsMissing; i++)
593 Map = Map.fix_si(isl::dim::out, i, 0);
595 for (unsigned i = DimsMissing; i < DimsArray; i++)
596 Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i);
598 AccessRelation = AccessRelation.apply_range(Map);
600 // For the non delinearized arrays, divide the access function of the last
601 // subscript by the size of the elements in the array.
603 // A stride one array access in C expressed as A[i] is expressed in
604 // LLVM-IR as something like A[i * elementsize]. This hides the fact that
605 // two subsequent values of 'i' index two values that are stored next to
606 // each other in memory. By this division we make this characteristic
607 // obvious again. If the base pointer was accessed with offsets not divisible
608 // by the accesses element size, we will have chosen a smaller ArrayElemSize
609 // that divides the offsets of all accesses to this base pointer.
610 if (DimsAccess == 1) {
611 isl::val V = isl::val(Ctx, ArrayElemSize);
612 AccessRelation = AccessRelation.floordiv_val(V);
615 // We currently do this only if we added at least one dimension, which means
616 // some dimension's indices have not been specified, an indicator that some
617 // index values have been added together.
618 // TODO: Investigate general usefulness; Effect on unit tests is to make index
619 // expressions more complicated.
620 if (DimsMissing)
621 wrapConstantDimensions();
623 if (!isAffine())
624 computeBoundsOnAccessRelation(ArrayElemSize);
626 // Introduce multi-element accesses in case the type loaded by this memory
627 // access is larger than the canonical element type of the array.
629 // An access ((float *)A)[i] to an array char *A is modeled as
630 // {[i] -> A[o] : 4 i <= o <= 4 i + 3
631 if (ElemBytes > ArrayElemSize) {
632 assert(ElemBytes % ArrayElemSize == 0 &&
633 "Loaded element size should be multiple of canonical element size");
634 isl::map Map = isl::map::from_domain_and_range(
635 isl::set::universe(ArraySpace), isl::set::universe(ArraySpace));
636 for (unsigned i = 0; i < DimsArray - 1; i++)
637 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
639 isl::constraint C;
640 isl::local_space LS;
642 LS = isl::local_space(Map.get_space());
643 int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
645 C = isl::constraint::alloc_inequality(LS);
646 C = C.set_constant_val(isl::val(Ctx, Num - 1));
647 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1);
648 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1);
649 Map = Map.add_constraint(C);
651 C = isl::constraint::alloc_inequality(LS);
652 C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1);
653 C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1);
654 C = C.set_constant_val(isl::val(Ctx, 0));
655 Map = Map.add_constraint(C);
656 AccessRelation = AccessRelation.apply_range(Map);
660 const std::string
661 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
662 switch (RT) {
663 case MemoryAccess::RT_NONE:
664 llvm_unreachable("Requested a reduction operator string for a memory "
665 "access which isn't a reduction");
666 case MemoryAccess::RT_ADD:
667 return "+";
668 case MemoryAccess::RT_MUL:
669 return "*";
670 case MemoryAccess::RT_BOR:
671 return "|";
672 case MemoryAccess::RT_BXOR:
673 return "^";
674 case MemoryAccess::RT_BAND:
675 return "&";
677 llvm_unreachable("Unknown reduction type");
680 /// Return the reduction type for a given binary operator.
681 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
682 const Instruction *Load) {
683 if (!BinOp)
684 return MemoryAccess::RT_NONE;
685 switch (BinOp->getOpcode()) {
686 case Instruction::FAdd:
687 if (!BinOp->hasUnsafeAlgebra())
688 return MemoryAccess::RT_NONE;
689 // Fall through
690 case Instruction::Add:
691 return MemoryAccess::RT_ADD;
692 case Instruction::Or:
693 return MemoryAccess::RT_BOR;
694 case Instruction::Xor:
695 return MemoryAccess::RT_BXOR;
696 case Instruction::And:
697 return MemoryAccess::RT_BAND;
698 case Instruction::FMul:
699 if (!BinOp->hasUnsafeAlgebra())
700 return MemoryAccess::RT_NONE;
701 // Fall through
702 case Instruction::Mul:
703 if (DisableMultiplicativeReductions)
704 return MemoryAccess::RT_NONE;
705 return MemoryAccess::RT_MUL;
706 default:
707 return MemoryAccess::RT_NONE;
711 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
712 isl::id ArrayId = getArrayId();
713 void *User = ArrayId.get_user();
714 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
715 return SAI;
718 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
719 isl::id ArrayId = getLatestArrayId();
720 void *User = ArrayId.get_user();
721 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
722 return SAI;
725 isl::id MemoryAccess::getOriginalArrayId() const {
726 return AccessRelation.get_tuple_id(isl::dim::out);
729 isl::id MemoryAccess::getLatestArrayId() const {
730 if (!hasNewAccessRelation())
731 return getOriginalArrayId();
732 return NewAccessRelation.get_tuple_id(isl::dim::out);
735 isl::map MemoryAccess::getAddressFunction() const {
736 return getAccessRelation().lexmin();
739 isl::pw_multi_aff
740 MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule) const {
741 isl::map Schedule, ScheduledAccRel;
742 isl::union_set UDomain;
744 UDomain = getStatement()->getDomain();
745 USchedule = USchedule.intersect_domain(UDomain);
746 Schedule = isl::map::from_union_map(USchedule);
747 ScheduledAccRel = getAddressFunction().apply_domain(Schedule);
748 return isl::pw_multi_aff::from_map(ScheduledAccRel);
751 isl::map MemoryAccess::getOriginalAccessRelation() const {
752 return AccessRelation;
755 std::string MemoryAccess::getOriginalAccessRelationStr() const {
756 return stringFromIslObj(AccessRelation.get());
759 isl::space MemoryAccess::getOriginalAccessRelationSpace() const {
760 return AccessRelation.get_space();
763 isl::map MemoryAccess::getNewAccessRelation() const {
764 return NewAccessRelation;
767 std::string MemoryAccess::getNewAccessRelationStr() const {
768 return stringFromIslObj(NewAccessRelation.get());
771 std::string MemoryAccess::getAccessRelationStr() const {
772 return getAccessRelation().to_str();
775 isl::basic_map MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
776 isl::space Space = isl::space(Statement->getIslCtx(), 0, 1);
777 Space = Space.align_params(Statement->getDomainSpace());
779 return isl::basic_map::from_domain_and_range(
780 isl::basic_set::universe(Statement->getDomainSpace()),
781 isl::basic_set::universe(Space));
784 // Formalize no out-of-bound access assumption
786 // When delinearizing array accesses we optimistically assume that the
787 // delinearized accesses do not access out of bound locations (the subscript
788 // expression of each array evaluates for each statement instance that is
789 // executed to a value that is larger than zero and strictly smaller than the
790 // size of the corresponding dimension). The only exception is the outermost
791 // dimension for which we do not need to assume any upper bound. At this point
792 // we formalize this assumption to ensure that at code generation time the
793 // relevant run-time checks can be generated.
795 // To find the set of constraints necessary to avoid out of bound accesses, we
796 // first build the set of data locations that are not within array bounds. We
797 // then apply the reverse access relation to obtain the set of iterations that
798 // may contain invalid accesses and reduce this set of iterations to the ones
799 // that are actually executed by intersecting them with the domain of the
800 // statement. If we now project out all loop dimensions, we obtain a set of
801 // parameters that may cause statement instances to be executed that may
802 // possibly yield out of bound memory accesses. The complement of these
803 // constraints is the set of constraints that needs to be assumed to ensure such
804 // statement instances are never executed.
805 void MemoryAccess::assumeNoOutOfBound() {
806 if (PollyIgnoreInbounds)
807 return;
808 auto *SAI = getScopArrayInfo();
809 isl::space Space = getOriginalAccessRelationSpace().range();
810 isl::set Outside = isl::set::empty(Space);
811 for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) {
812 isl::local_space LS(Space);
813 isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i);
814 isl::pw_aff Zero = isl::pw_aff(LS);
816 isl::set DimOutside = Var.lt_set(Zero);
817 isl::pw_aff SizeE = SAI->getDimensionSizePw(i);
818 SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set));
819 SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set));
820 DimOutside = DimOutside.unite(SizeE.le_set(Var));
822 Outside = Outside.unite(DimOutside);
825 Outside = Outside.apply(getAccessRelation().reverse());
826 Outside = Outside.intersect(Statement->getDomain());
827 Outside = Outside.params();
829 // Remove divs to avoid the construction of overly complicated assumptions.
830 // Doing so increases the set of parameter combinations that are assumed to
831 // not appear. This is always save, but may make the resulting run-time check
832 // bail out more often than strictly necessary.
833 Outside = Outside.remove_divs();
834 Outside = Outside.complement();
835 const auto &Loc = getAccessInstruction()
836 ? getAccessInstruction()->getDebugLoc()
837 : DebugLoc();
838 if (!PollyPreciseInbounds)
839 Outside = Outside.gist_params(Statement->getDomain().params());
840 Statement->getParent()->recordAssumption(INBOUNDS, Outside.release(), Loc,
841 AS_ASSUMPTION);
844 void MemoryAccess::buildMemIntrinsicAccessRelation() {
845 assert(isMemoryIntrinsic());
846 assert(Subscripts.size() == 2 && Sizes.size() == 1);
848 isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]);
849 isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA);
851 isl::map LengthMap;
852 if (Subscripts[1] == nullptr) {
853 LengthMap = isl::map::universe(SubscriptMap.get_space());
854 } else {
855 isl::pw_aff LengthPWA = getPwAff(Subscripts[1]);
856 LengthMap = isl::map::from_pw_aff(LengthPWA);
857 isl::space RangeSpace = LengthMap.get_space().range();
858 LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace));
860 LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0);
861 LengthMap = LengthMap.align_params(SubscriptMap.get_space());
862 SubscriptMap = SubscriptMap.align_params(LengthMap.get_space());
863 LengthMap = LengthMap.sum(SubscriptMap);
864 AccessRelation =
865 LengthMap.set_tuple_id(isl::dim::in, getStatement()->getDomainId());
868 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
869 ScalarEvolution *SE = Statement->getParent()->getSE();
871 auto MAI = MemAccInst(getAccessInstruction());
872 if (isa<MemIntrinsic>(MAI))
873 return;
875 Value *Ptr = MAI.getPointerOperand();
876 if (!Ptr || !SE->isSCEVable(Ptr->getType()))
877 return;
879 auto *PtrSCEV = SE->getSCEV(Ptr);
880 if (isa<SCEVCouldNotCompute>(PtrSCEV))
881 return;
883 auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
884 if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV))
885 PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
887 const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
888 if (Range.isFullSet())
889 return;
891 if (Range.isWrappedSet() || Range.isSignWrappedSet())
892 return;
894 bool isWrapping = Range.isSignWrappedSet();
896 unsigned BW = Range.getBitWidth();
897 const auto One = APInt(BW, 1);
898 const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin();
899 const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax();
901 auto Min = LB.sdiv(APInt(BW, ElementSize));
902 auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
904 assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
906 isl::map Relation = AccessRelation;
907 isl::set AccessRange = Relation.range();
908 AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0,
909 isl::dim::set);
910 AccessRelation = Relation.intersect_range(AccessRange);
913 void MemoryAccess::foldAccessRelation() {
914 if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1]))
915 return;
917 int Size = Subscripts.size();
919 isl::map NewAccessRelation = AccessRelation;
921 for (int i = Size - 2; i >= 0; --i) {
922 isl::space Space;
923 isl::map MapOne, MapTwo;
924 isl::pw_aff DimSize = getPwAff(Sizes[i + 1]);
926 isl::space SpaceSize = DimSize.get_space();
927 isl::id ParamId =
928 give(isl_space_get_dim_id(SpaceSize.get(), isl_dim_param, 0));
930 Space = AccessRelation.get_space();
931 Space = Space.range().map_from_set();
932 Space = Space.align_params(SpaceSize);
934 int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId);
936 MapOne = isl::map::universe(Space);
937 for (int j = 0; j < Size; ++j)
938 MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j);
939 MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0);
941 MapTwo = isl::map::universe(Space);
942 for (int j = 0; j < Size; ++j)
943 if (j < i || j > i + 1)
944 MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j);
946 isl::local_space LS(Space);
947 isl::constraint C;
948 C = isl::constraint::alloc_equality(LS);
949 C = C.set_constant_si(-1);
950 C = C.set_coefficient_si(isl::dim::in, i, 1);
951 C = C.set_coefficient_si(isl::dim::out, i, -1);
952 MapTwo = MapTwo.add_constraint(C);
953 C = isl::constraint::alloc_equality(LS);
954 C = C.set_coefficient_si(isl::dim::in, i + 1, 1);
955 C = C.set_coefficient_si(isl::dim::out, i + 1, -1);
956 C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1);
957 MapTwo = MapTwo.add_constraint(C);
958 MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1);
960 MapOne = MapOne.unite(MapTwo);
961 NewAccessRelation = NewAccessRelation.apply_range(MapOne);
964 isl::id BaseAddrId = getScopArrayInfo()->getBasePtrId();
965 isl::space Space = Statement->getDomainSpace();
966 NewAccessRelation = NewAccessRelation.set_tuple_id(
967 isl::dim::in, Space.get_tuple_id(isl::dim::set));
968 NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
969 NewAccessRelation = NewAccessRelation.gist_domain(Statement->getDomain());
971 // Access dimension folding might in certain cases increase the number of
972 // disjuncts in the memory access, which can possibly complicate the generated
973 // run-time checks and can lead to costly compilation.
974 if (!PollyPreciseFoldAccesses &&
975 isl_map_n_basic_map(NewAccessRelation.get()) >
976 isl_map_n_basic_map(AccessRelation.get())) {
977 } else {
978 AccessRelation = NewAccessRelation;
982 /// Check if @p Expr is divisible by @p Size.
983 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
984 assert(Size != 0);
985 if (Size == 1)
986 return true;
988 // Only one factor needs to be divisible.
989 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
990 for (auto *FactorExpr : MulExpr->operands())
991 if (isDivisible(FactorExpr, Size, SE))
992 return true;
993 return false;
996 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
997 // to be divisible.
998 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
999 for (auto *OpExpr : NAryExpr->operands())
1000 if (!isDivisible(OpExpr, Size, SE))
1001 return false;
1002 return true;
1005 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
1006 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
1007 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
1008 return MulSCEV == Expr;
1011 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
1012 assert(AccessRelation.is_null() && "AccessRelation already built");
1014 // Initialize the invalid domain which describes all iterations for which the
1015 // access relation is not modeled correctly.
1016 isl::set StmtInvalidDomain = getStatement()->getInvalidDomain();
1017 InvalidDomain = isl::set::empty(StmtInvalidDomain.get_space());
1019 isl::ctx Ctx = Id.get_ctx();
1020 isl::id BaseAddrId = SAI->getBasePtrId();
1022 if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) {
1023 buildMemIntrinsicAccessRelation();
1024 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1025 return;
1028 if (!isAffine()) {
1029 // We overapproximate non-affine accesses with a possible access to the
1030 // whole array. For read accesses it does not make a difference, if an
1031 // access must or may happen. However, for write accesses it is important to
1032 // differentiate between writes that must happen and writes that may happen.
1033 if (AccessRelation.is_null())
1034 AccessRelation = createBasicAccessMap(Statement);
1036 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1037 return;
1040 isl::space Space = isl::space(Ctx, 0, Statement->getNumIterators(), 0);
1041 AccessRelation = isl::map::universe(Space);
1043 for (int i = 0, Size = Subscripts.size(); i < Size; ++i) {
1044 isl::pw_aff Affine = getPwAff(Subscripts[i]);
1045 isl::map SubscriptMap = isl::map::from_pw_aff(Affine);
1046 AccessRelation = AccessRelation.flat_range_product(SubscriptMap);
1049 Space = Statement->getDomainSpace();
1050 AccessRelation = AccessRelation.set_tuple_id(
1051 isl::dim::in, Space.get_tuple_id(isl::dim::set));
1052 AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId);
1054 AccessRelation = AccessRelation.gist_domain(Statement->getDomain());
1057 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
1058 AccessType AccType, Value *BaseAddress,
1059 Type *ElementType, bool Affine,
1060 ArrayRef<const SCEV *> Subscripts,
1061 ArrayRef<const SCEV *> Sizes, Value *AccessValue,
1062 MemoryKind Kind)
1063 : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr),
1064 BaseAddr(BaseAddress), ElementType(ElementType),
1065 Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst),
1066 AccessValue(AccessValue), IsAffine(Affine),
1067 Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
1068 NewAccessRelation(nullptr), FAD(nullptr) {
1069 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1070 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1072 std::string IdName = Stmt->getBaseName() + Access;
1073 Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1076 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel)
1077 : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt),
1078 InvalidDomain(nullptr), AccessRelation(nullptr),
1079 NewAccessRelation(AccRel), FAD(nullptr) {
1080 isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out);
1081 auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
1082 Sizes.push_back(nullptr);
1083 for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++)
1084 Sizes.push_back(SAI->getDimensionSize(i));
1085 ElementType = SAI->getElementType();
1086 BaseAddr = SAI->getBasePtr();
1087 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
1088 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size());
1090 std::string IdName = Stmt->getBaseName() + Access;
1091 Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this);
1094 MemoryAccess::~MemoryAccess() = default;
1096 void MemoryAccess::realignParams() {
1097 isl::set Ctx = Statement->getParent()->getContext();
1098 InvalidDomain = InvalidDomain.gist_params(Ctx);
1099 AccessRelation = AccessRelation.gist_params(Ctx);
1102 const std::string MemoryAccess::getReductionOperatorStr() const {
1103 return MemoryAccess::getReductionOperatorStr(getReductionType());
1106 isl::id MemoryAccess::getId() const { return Id; }
1108 raw_ostream &polly::operator<<(raw_ostream &OS,
1109 MemoryAccess::ReductionType RT) {
1110 if (RT == MemoryAccess::RT_NONE)
1111 OS << "NONE";
1112 else
1113 OS << MemoryAccess::getReductionOperatorStr(RT);
1114 return OS;
1117 void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; }
1119 void MemoryAccess::print(raw_ostream &OS) const {
1120 switch (AccType) {
1121 case READ:
1122 OS.indent(12) << "ReadAccess :=\t";
1123 break;
1124 case MUST_WRITE:
1125 OS.indent(12) << "MustWriteAccess :=\t";
1126 break;
1127 case MAY_WRITE:
1128 OS.indent(12) << "MayWriteAccess :=\t";
1129 break;
1132 OS << "[Reduction Type: " << getReductionType() << "] ";
1134 if (FAD) {
1135 OS << "[Fortran array descriptor: " << FAD->getName();
1136 OS << "] ";
1139 OS << "[Scalar: " << isScalarKind() << "]\n";
1140 OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1141 if (hasNewAccessRelation())
1142 OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1145 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1146 LLVM_DUMP_METHOD void MemoryAccess::dump() const { print(errs()); }
1147 #endif
1149 isl::pw_aff MemoryAccess::getPwAff(const SCEV *E) {
1150 auto *Stmt = getStatement();
1151 PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1152 isl::set StmtDom = getStatement()->getDomain();
1153 StmtDom = StmtDom.reset_tuple_id();
1154 isl::set NewInvalidDom = StmtDom.intersect(isl::manage(PWAC.second));
1155 InvalidDomain = InvalidDomain.unite(NewInvalidDom);
1156 return isl::manage(PWAC.first);
1159 // Create a map in the size of the provided set domain, that maps from the
1160 // one element of the provided set domain to another element of the provided
1161 // set domain.
1162 // The mapping is limited to all points that are equal in all but the last
1163 // dimension and for which the last dimension of the input is strict smaller
1164 // than the last dimension of the output.
1166 // getEqualAndLarger(set[i0, i1, ..., iX]):
1168 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1169 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1171 static isl::map getEqualAndLarger(isl::space SetDomain) {
1172 isl::space Space = SetDomain.map_from_set();
1173 isl::map Map = isl::map::universe(Space);
1174 unsigned lastDimension = Map.dim(isl::dim::in) - 1;
1176 // Set all but the last dimension to be equal for the input and output
1178 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1179 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1180 for (unsigned i = 0; i < lastDimension; ++i)
1181 Map = Map.equate(isl::dim::in, i, isl::dim::out, i);
1183 // Set the last dimension of the input to be strict smaller than the
1184 // last dimension of the output.
1186 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1187 Map = Map.order_lt(isl::dim::in, lastDimension, isl::dim::out, lastDimension);
1188 return Map;
1191 isl::set MemoryAccess::getStride(isl::map Schedule) const {
1192 isl::map AccessRelation = getAccessRelation();
1193 isl::space Space = Schedule.get_space().range();
1194 isl::map NextScatt = getEqualAndLarger(Space);
1196 Schedule = Schedule.reverse();
1197 NextScatt = NextScatt.lexmin();
1199 NextScatt = NextScatt.apply_range(Schedule);
1200 NextScatt = NextScatt.apply_range(AccessRelation);
1201 NextScatt = NextScatt.apply_domain(Schedule);
1202 NextScatt = NextScatt.apply_domain(AccessRelation);
1204 isl::set Deltas = NextScatt.deltas();
1205 return Deltas;
1208 bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const {
1209 isl::set Stride, StrideX;
1210 bool IsStrideX;
1212 Stride = getStride(Schedule);
1213 StrideX = isl::set::universe(Stride.get_space());
1214 for (unsigned i = 0; i < StrideX.dim(isl::dim::set) - 1; i++)
1215 StrideX = StrideX.fix_si(isl::dim::set, i, 0);
1216 StrideX = StrideX.fix_si(isl::dim::set, StrideX.dim(isl::dim::set) - 1,
1217 StrideWidth);
1218 IsStrideX = Stride.is_subset(StrideX);
1220 return IsStrideX;
1223 bool MemoryAccess::isStrideZero(isl::map Schedule) const {
1224 return isStrideX(Schedule, 0);
1227 bool MemoryAccess::isStrideOne(isl::map Schedule) const {
1228 return isStrideX(Schedule, 1);
1231 void MemoryAccess::setAccessRelation(isl::map NewAccess) {
1232 AccessRelation = NewAccess;
1235 void MemoryAccess::setNewAccessRelation(isl::map NewAccess) {
1236 assert(NewAccess);
1238 #ifndef NDEBUG
1239 // Check domain space compatibility.
1240 isl::space NewSpace = NewAccess.get_space();
1241 isl::space NewDomainSpace = NewSpace.domain();
1242 isl::space OriginalDomainSpace = getStatement()->getDomainSpace();
1243 assert(OriginalDomainSpace.has_equal_tuples(NewDomainSpace));
1245 // Reads must be executed unconditionally. Writes might be executed in a
1246 // subdomain only.
1247 if (isRead()) {
1248 // Check whether there is an access for every statement instance.
1249 isl::set StmtDomain = getStatement()->getDomain();
1250 StmtDomain =
1251 StmtDomain.intersect_params(getStatement()->getParent()->getContext());
1252 isl::set NewDomain = NewAccess.domain();
1253 assert(StmtDomain.is_subset(NewDomain) &&
1254 "Partial READ accesses not supported");
1257 isl::space NewAccessSpace = NewAccess.get_space();
1258 assert(NewAccessSpace.has_tuple_id(isl::dim::set) &&
1259 "Must specify the array that is accessed");
1260 isl::id NewArrayId = NewAccessSpace.get_tuple_id(isl::dim::set);
1261 auto *SAI = static_cast<ScopArrayInfo *>(NewArrayId.get_user());
1262 assert(SAI && "Must set a ScopArrayInfo");
1264 if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1265 InvariantEquivClassTy *EqClass =
1266 getStatement()->getParent()->lookupInvariantEquivClass(
1267 SAI->getBasePtr());
1268 assert(EqClass &&
1269 "Access functions to indirect arrays must have an invariant and "
1270 "hoisted base pointer");
1273 // Check whether access dimensions correspond to number of dimensions of the
1274 // accesses array.
1275 auto Dims = SAI->getNumberOfDimensions();
1276 assert(NewAccessSpace.dim(isl::dim::set) == Dims &&
1277 "Access dims must match array dims");
1278 #endif
1280 NewAccess = NewAccess.gist_domain(getStatement()->getDomain());
1281 NewAccessRelation = NewAccess;
1284 bool MemoryAccess::isLatestPartialAccess() const {
1285 isl::set StmtDom = getStatement()->getDomain();
1286 isl::set AccDom = getLatestAccessRelation().domain();
1288 return isl_set_is_subset(StmtDom.keep(), AccDom.keep()) == isl_bool_false;
1291 //===----------------------------------------------------------------------===//
1293 isl::map ScopStmt::getSchedule() const {
1294 isl::set Domain = getDomain();
1295 if (Domain.is_empty())
1296 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1297 auto Schedule = getParent()->getSchedule();
1298 if (!Schedule)
1299 return nullptr;
1300 Schedule = Schedule.intersect_domain(isl::union_set(Domain));
1301 if (Schedule.is_empty())
1302 return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace())));
1303 isl::map M = M.from_union_map(Schedule);
1304 M = M.coalesce();
1305 M = M.gist_domain(Domain);
1306 M = M.coalesce();
1307 return M;
1310 void ScopStmt::restrictDomain(isl::set NewDomain) {
1311 assert(NewDomain.is_subset(Domain) &&
1312 "New domain is not a subset of old domain!");
1313 Domain = NewDomain;
1316 void ScopStmt::buildAccessRelations() {
1317 Scop &S = *getParent();
1318 for (MemoryAccess *Access : MemAccs) {
1319 Type *ElementType = Access->getElementType();
1321 MemoryKind Ty;
1322 if (Access->isPHIKind())
1323 Ty = MemoryKind::PHI;
1324 else if (Access->isExitPHIKind())
1325 Ty = MemoryKind::ExitPHI;
1326 else if (Access->isValueKind())
1327 Ty = MemoryKind::Value;
1328 else
1329 Ty = MemoryKind::Array;
1331 auto *SAI = S.getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1332 ElementType, Access->Sizes, Ty);
1333 Access->buildAccessRelation(SAI);
1334 S.addAccessData(Access);
1338 void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
1339 Instruction *AccessInst = Access->getAccessInstruction();
1341 if (Access->isArrayKind()) {
1342 MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1343 MAL.emplace_front(Access);
1344 } else if (Access->isValueKind() && Access->isWrite()) {
1345 Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1346 assert(!ValueWrites.lookup(AccessVal));
1348 ValueWrites[AccessVal] = Access;
1349 } else if (Access->isValueKind() && Access->isRead()) {
1350 Value *AccessVal = Access->getAccessValue();
1351 assert(!ValueReads.lookup(AccessVal));
1353 ValueReads[AccessVal] = Access;
1354 } else if (Access->isAnyPHIKind() && Access->isWrite()) {
1355 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1356 assert(!PHIWrites.lookup(PHI));
1358 PHIWrites[PHI] = Access;
1359 } else if (Access->isAnyPHIKind() && Access->isRead()) {
1360 PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1361 assert(!PHIReads.lookup(PHI));
1363 PHIReads[PHI] = Access;
1366 if (Prepend) {
1367 MemAccs.insert(MemAccs.begin(), Access);
1368 return;
1370 MemAccs.push_back(Access);
1373 void ScopStmt::realignParams() {
1374 for (MemoryAccess *MA : *this)
1375 MA->realignParams();
1377 isl::set Ctx = Parent.getContext();
1378 InvalidDomain = InvalidDomain.gist_params(Ctx);
1379 Domain = Domain.gist_params(Ctx);
1382 /// Add @p BSet to the set @p User if @p BSet is bounded.
1383 static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet,
1384 void *User) {
1385 isl_set **BoundedParts = static_cast<isl_set **>(User);
1386 if (isl_basic_set_is_bounded(BSet))
1387 *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet));
1388 else
1389 isl_basic_set_free(BSet);
1390 return isl_stat_ok;
1393 /// Return the bounded parts of @p S.
1394 static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) {
1395 isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S));
1396 isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts);
1397 isl_set_free(S);
1398 return BoundedParts;
1401 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1403 /// @returns A separation of @p S into first an unbounded then a bounded subset,
1404 /// both with regards to the dimension @p Dim.
1405 static std::pair<__isl_give isl_set *, __isl_give isl_set *>
1406 partitionSetParts(__isl_take isl_set *S, unsigned Dim) {
1407 for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++)
1408 S = isl_set_lower_bound_si(S, isl_dim_set, u, 0);
1410 unsigned NumDimsS = isl_set_n_dim(S);
1411 isl_set *OnlyDimS = isl_set_copy(S);
1413 // Remove dimensions that are greater than Dim as they are not interesting.
1414 assert(NumDimsS >= Dim + 1);
1415 OnlyDimS =
1416 isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1);
1418 // Create artificial parametric upper bounds for dimensions smaller than Dim
1419 // as we are not interested in them.
1420 OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim);
1421 for (unsigned u = 0; u < Dim; u++) {
1422 isl_constraint *C = isl_inequality_alloc(
1423 isl_local_space_from_space(isl_set_get_space(OnlyDimS)));
1424 C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1);
1425 C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1);
1426 OnlyDimS = isl_set_add_constraint(OnlyDimS, C);
1429 // Collect all bounded parts of OnlyDimS.
1430 isl_set *BoundedParts = collectBoundedParts(OnlyDimS);
1432 // Create the dimensions greater than Dim again.
1433 BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1,
1434 NumDimsS - Dim - 1);
1436 // Remove the artificial upper bound parameters again.
1437 BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim);
1439 isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts));
1440 return std::make_pair(UnboundedParts, BoundedParts);
1443 /// Set the dimension Ids from @p From in @p To.
1444 static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From,
1445 __isl_take isl_set *To) {
1446 for (unsigned u = 0, e = isl_set_n_dim(From); u < e; u++) {
1447 isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u);
1448 To = isl_set_set_dim_id(To, isl_dim_set, u, DimId);
1450 return To;
1453 /// Create the conditions under which @p L @p Pred @p R is true.
1454 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1455 __isl_take isl_pw_aff *L,
1456 __isl_take isl_pw_aff *R) {
1457 switch (Pred) {
1458 case ICmpInst::ICMP_EQ:
1459 return isl_pw_aff_eq_set(L, R);
1460 case ICmpInst::ICMP_NE:
1461 return isl_pw_aff_ne_set(L, R);
1462 case ICmpInst::ICMP_SLT:
1463 return isl_pw_aff_lt_set(L, R);
1464 case ICmpInst::ICMP_SLE:
1465 return isl_pw_aff_le_set(L, R);
1466 case ICmpInst::ICMP_SGT:
1467 return isl_pw_aff_gt_set(L, R);
1468 case ICmpInst::ICMP_SGE:
1469 return isl_pw_aff_ge_set(L, R);
1470 case ICmpInst::ICMP_ULT:
1471 return isl_pw_aff_lt_set(L, R);
1472 case ICmpInst::ICMP_UGT:
1473 return isl_pw_aff_gt_set(L, R);
1474 case ICmpInst::ICMP_ULE:
1475 return isl_pw_aff_le_set(L, R);
1476 case ICmpInst::ICMP_UGE:
1477 return isl_pw_aff_ge_set(L, R);
1478 default:
1479 llvm_unreachable("Non integer predicate not supported");
1483 /// Create the conditions under which @p L @p Pred @p R is true.
1485 /// Helper function that will make sure the dimensions of the result have the
1486 /// same isl_id's as the @p Domain.
1487 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1488 __isl_take isl_pw_aff *L,
1489 __isl_take isl_pw_aff *R,
1490 __isl_keep isl_set *Domain) {
1491 isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R);
1492 return setDimensionIds(Domain, ConsequenceCondSet);
1495 /// Compute the isl representation for the SCEV @p E in this BB.
1497 /// @param S The Scop in which @p BB resides in.
1498 /// @param BB The BB for which isl representation is to be
1499 /// computed.
1500 /// @param InvalidDomainMap A map of BB to their invalid domains.
1501 /// @param E The SCEV that should be translated.
1502 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
1504 /// Note that this function will also adjust the invalid context accordingly.
1506 __isl_give isl_pw_aff *
1507 getPwAff(Scop &S, BasicBlock *BB,
1508 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, const SCEV *E,
1509 bool NonNegative = false) {
1510 PWACtx PWAC = S.getPwAff(E, BB, NonNegative);
1511 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(isl::manage(PWAC.second));
1512 return PWAC.first;
1515 /// Build the conditions sets for the switch @p SI in the @p Domain.
1517 /// This will fill @p ConditionSets with the conditions under which control
1518 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1519 /// have as many elements as @p SI has successors.
1520 static bool
1521 buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L,
1522 __isl_keep isl_set *Domain,
1523 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1524 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1525 Value *Condition = getConditionFromTerminator(SI);
1526 assert(Condition && "No condition for switch");
1528 ScalarEvolution &SE = *S.getSE();
1529 isl_pw_aff *LHS, *RHS;
1530 LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
1532 unsigned NumSuccessors = SI->getNumSuccessors();
1533 ConditionSets.resize(NumSuccessors);
1534 for (auto &Case : SI->cases()) {
1535 unsigned Idx = Case.getSuccessorIndex();
1536 ConstantInt *CaseValue = Case.getCaseValue();
1538 RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue));
1539 isl_set *CaseConditionSet =
1540 buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain);
1541 ConditionSets[Idx] = isl_set_coalesce(
1542 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1545 assert(ConditionSets[0] == nullptr && "Default condition set was set");
1546 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1547 for (unsigned u = 2; u < NumSuccessors; u++)
1548 ConditionSetUnion =
1549 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1550 ConditionSets[0] = setDimensionIds(
1551 Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion));
1553 isl_pw_aff_free(LHS);
1555 return true;
1558 /// Build condition sets for unsigned ICmpInst(s).
1559 /// Special handling is required for unsigned operands to ensure that if
1560 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
1561 /// it should wrap around.
1563 /// @param IsStrictUpperBound holds information on the predicate relation
1564 /// between TestVal and UpperBound, i.e,
1565 /// TestVal < UpperBound OR TestVal <= UpperBound
1566 static __isl_give isl_set *
1567 buildUnsignedConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1568 __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal,
1569 const SCEV *SCEV_UpperBound,
1570 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1571 bool IsStrictUpperBound) {
1572 // Do not take NonNeg assumption on TestVal
1573 // as it might have MSB (Sign bit) set.
1574 isl_pw_aff *TestVal = getPwAff(S, BB, InvalidDomainMap, SCEV_TestVal, false);
1575 // Take NonNeg assumption on UpperBound.
1576 isl_pw_aff *UpperBound =
1577 getPwAff(S, BB, InvalidDomainMap, SCEV_UpperBound, true);
1579 // 0 <= TestVal
1580 isl_set *First =
1581 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
1582 isl_pw_aff_get_domain_space(TestVal))),
1583 isl_pw_aff_copy(TestVal));
1585 isl_set *Second;
1586 if (IsStrictUpperBound)
1587 // TestVal < UpperBound
1588 Second = isl_pw_aff_lt_set(TestVal, UpperBound);
1589 else
1590 // TestVal <= UpperBound
1591 Second = isl_pw_aff_le_set(TestVal, UpperBound);
1593 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
1594 ConsequenceCondSet = setDimensionIds(Domain, ConsequenceCondSet);
1595 return ConsequenceCondSet;
1598 /// Build the conditions sets for the branch condition @p Condition in
1599 /// the @p Domain.
1601 /// This will fill @p ConditionSets with the conditions under which control
1602 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1603 /// have as many elements as @p TI has successors. If @p TI is nullptr the
1604 /// context under which @p Condition is true/false will be returned as the
1605 /// new elements of @p ConditionSets.
1606 static bool
1607 buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition,
1608 TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain,
1609 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1610 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1611 isl_set *ConsequenceCondSet = nullptr;
1612 if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
1613 if (CCond->isZero())
1614 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1615 else
1616 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1617 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
1618 auto Opcode = BinOp->getOpcode();
1619 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1621 bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain,
1622 InvalidDomainMap, ConditionSets) &&
1623 buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain,
1624 InvalidDomainMap, ConditionSets);
1625 if (!Valid) {
1626 while (!ConditionSets.empty())
1627 isl_set_free(ConditionSets.pop_back_val());
1628 return false;
1631 isl_set_free(ConditionSets.pop_back_val());
1632 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1633 isl_set_free(ConditionSets.pop_back_val());
1634 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1636 if (Opcode == Instruction::And)
1637 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1638 else
1639 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1640 } else {
1641 auto *ICond = dyn_cast<ICmpInst>(Condition);
1642 assert(ICond &&
1643 "Condition of exiting branch was neither constant nor ICmp!");
1645 ScalarEvolution &SE = *S.getSE();
1646 isl_pw_aff *LHS, *RHS;
1647 // For unsigned comparisons we assumed the signed bit of neither operand
1648 // to be set. The comparison is equal to a signed comparison under this
1649 // assumption.
1650 bool NonNeg = ICond->isUnsigned();
1651 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
1652 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
1654 switch (ICond->getPredicate()) {
1655 case ICmpInst::ICMP_ULT:
1656 ConsequenceCondSet =
1657 buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1658 RightOperand, InvalidDomainMap, true);
1659 break;
1660 case ICmpInst::ICMP_ULE:
1661 ConsequenceCondSet =
1662 buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand,
1663 RightOperand, InvalidDomainMap, false);
1664 break;
1665 case ICmpInst::ICMP_UGT:
1666 ConsequenceCondSet =
1667 buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1668 LeftOperand, InvalidDomainMap, true);
1669 break;
1670 case ICmpInst::ICMP_UGE:
1671 ConsequenceCondSet =
1672 buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand,
1673 LeftOperand, InvalidDomainMap, false);
1674 break;
1675 default:
1676 LHS = getPwAff(S, BB, InvalidDomainMap, LeftOperand, NonNeg);
1677 RHS = getPwAff(S, BB, InvalidDomainMap, RightOperand, NonNeg);
1678 ConsequenceCondSet =
1679 buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain);
1680 break;
1684 // If no terminator was given we are only looking for parameter constraints
1685 // under which @p Condition is true/false.
1686 if (!TI)
1687 ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1688 assert(ConsequenceCondSet);
1689 ConsequenceCondSet = isl_set_coalesce(
1690 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1692 isl_set *AlternativeCondSet = nullptr;
1693 bool TooComplex =
1694 isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1696 if (!TooComplex) {
1697 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1698 isl_set_copy(ConsequenceCondSet));
1699 TooComplex =
1700 isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1703 if (TooComplex) {
1704 S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
1705 TI ? TI->getParent() : nullptr /* BasicBlock */);
1706 isl_set_free(AlternativeCondSet);
1707 isl_set_free(ConsequenceCondSet);
1708 return false;
1711 ConditionSets.push_back(ConsequenceCondSet);
1712 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1714 return true;
1717 /// Build the conditions sets for the terminator @p TI in the @p Domain.
1719 /// This will fill @p ConditionSets with the conditions under which control
1720 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1721 /// have as many elements as @p TI has successors.
1722 static bool
1723 buildConditionSets(Scop &S, BasicBlock *BB, TerminatorInst *TI, Loop *L,
1724 __isl_keep isl_set *Domain,
1725 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
1726 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1727 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1728 return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap,
1729 ConditionSets);
1731 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1733 if (TI->getNumSuccessors() == 1) {
1734 ConditionSets.push_back(isl_set_copy(Domain));
1735 return true;
1738 Value *Condition = getConditionFromTerminator(TI);
1739 assert(Condition && "No condition for Terminator");
1741 return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap,
1742 ConditionSets);
1745 void ScopStmt::buildDomain() {
1746 isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1748 Domain = getParent()->getDomainConditions(this);
1749 Domain = Domain.set_tuple_id(Id);
1752 void ScopStmt::collectSurroundingLoops() {
1753 for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; u++) {
1754 isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
1755 NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
1759 ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop)
1760 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R),
1761 Build(nullptr), SurroundingLoop(SurroundingLoop) {
1762 BaseName = getIslCompatibleName(
1763 "Stmt", R.getNameStr(), parent.getNextStmtIdx(), "", UseInstructionNames);
1766 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, Loop *SurroundingLoop,
1767 std::vector<Instruction *> Instructions, int Count)
1768 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1769 Build(nullptr), SurroundingLoop(SurroundingLoop),
1770 Instructions(Instructions) {
1771 std::string S = "";
1772 if (Count != 0)
1773 S += std::to_string(Count);
1774 BaseName = getIslCompatibleName("Stmt", &bb, parent.getNextStmtIdx(), S,
1775 UseInstructionNames);
1778 ScopStmt::ScopStmt(Scop &parent, isl::map SourceRel, isl::map TargetRel,
1779 isl::set NewDomain)
1780 : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain),
1781 Build(nullptr) {
1782 BaseName = getIslCompatibleName("CopyStmt_", "",
1783 std::to_string(parent.getCopyStmtsNum()));
1784 isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this);
1785 Domain = Domain.set_tuple_id(Id);
1786 TargetRel = TargetRel.set_tuple_id(isl::dim::in, Id);
1787 auto *Access =
1788 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1789 parent.addAccessFunction(Access);
1790 addAccess(Access);
1791 SourceRel = SourceRel.set_tuple_id(isl::dim::in, Id);
1792 Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1793 parent.addAccessFunction(Access);
1794 addAccess(Access);
1797 ScopStmt::~ScopStmt() = default;
1799 void ScopStmt::init(LoopInfo &LI) {
1800 assert(!Domain && "init must be called only once");
1802 buildDomain();
1803 collectSurroundingLoops();
1804 buildAccessRelations();
1806 if (DetectReductions)
1807 checkForReductions();
1810 /// Collect loads which might form a reduction chain with @p StoreMA.
1812 /// Check if the stored value for @p StoreMA is a binary operator with one or
1813 /// two loads as operands. If the binary operand is commutative & associative,
1814 /// used only once (by @p StoreMA) and its load operands are also used only
1815 /// once, we have found a possible reduction chain. It starts at an operand
1816 /// load and includes the binary operator and @p StoreMA.
1818 /// Note: We allow only one use to ensure the load and binary operator cannot
1819 /// escape this block or into any other store except @p StoreMA.
1820 void ScopStmt::collectCandiateReductionLoads(
1821 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1822 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1823 if (!Store)
1824 return;
1826 // Skip if there is not one binary operator between the load and the store
1827 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1828 if (!BinOp)
1829 return;
1831 // Skip if the binary operators has multiple uses
1832 if (BinOp->getNumUses() != 1)
1833 return;
1835 // Skip if the opcode of the binary operator is not commutative/associative
1836 if (!BinOp->isCommutative() || !BinOp->isAssociative())
1837 return;
1839 // Skip if the binary operator is outside the current SCoP
1840 if (BinOp->getParent() != Store->getParent())
1841 return;
1843 // Skip if it is a multiplicative reduction and we disabled them
1844 if (DisableMultiplicativeReductions &&
1845 (BinOp->getOpcode() == Instruction::Mul ||
1846 BinOp->getOpcode() == Instruction::FMul))
1847 return;
1849 // Check the binary operator operands for a candidate load
1850 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1851 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1852 if (!PossibleLoad0 && !PossibleLoad1)
1853 return;
1855 // A load is only a candidate if it cannot escape (thus has only this use)
1856 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
1857 if (PossibleLoad0->getParent() == Store->getParent())
1858 Loads.push_back(&getArrayAccessFor(PossibleLoad0));
1859 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
1860 if (PossibleLoad1->getParent() == Store->getParent())
1861 Loads.push_back(&getArrayAccessFor(PossibleLoad1));
1864 /// Check for reductions in this ScopStmt.
1866 /// Iterate over all store memory accesses and check for valid binary reduction
1867 /// like chains. For all candidates we check if they have the same base address
1868 /// and there are no other accesses which overlap with them. The base address
1869 /// check rules out impossible reductions candidates early. The overlap check,
1870 /// together with the "only one user" check in collectCandiateReductionLoads,
1871 /// guarantees that none of the intermediate results will escape during
1872 /// execution of the loop nest. We basically check here that no other memory
1873 /// access can access the same memory as the potential reduction.
1874 void ScopStmt::checkForReductions() {
1875 SmallVector<MemoryAccess *, 2> Loads;
1876 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1878 // First collect candidate load-store reduction chains by iterating over all
1879 // stores and collecting possible reduction loads.
1880 for (MemoryAccess *StoreMA : MemAccs) {
1881 if (StoreMA->isRead())
1882 continue;
1884 Loads.clear();
1885 collectCandiateReductionLoads(StoreMA, Loads);
1886 for (MemoryAccess *LoadMA : Loads)
1887 Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1890 // Then check each possible candidate pair.
1891 for (const auto &CandidatePair : Candidates) {
1892 bool Valid = true;
1893 isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1894 isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1896 // Skip those with obviously unequal base addresses.
1897 if (!LoadAccs.has_equal_space(StoreAccs)) {
1898 continue;
1901 // And check if the remaining for overlap with other memory accesses.
1902 isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1903 AllAccsRel = AllAccsRel.intersect_domain(getDomain());
1904 isl::set AllAccs = AllAccsRel.range();
1906 for (MemoryAccess *MA : MemAccs) {
1907 if (MA == CandidatePair.first || MA == CandidatePair.second)
1908 continue;
1910 isl::map AccRel = MA->getAccessRelation().intersect_domain(getDomain());
1911 isl::set Accs = AccRel.range();
1913 if (AllAccs.has_equal_space(Accs)) {
1914 isl::set OverlapAccs = Accs.intersect(AllAccs);
1915 Valid = Valid && OverlapAccs.is_empty();
1919 if (!Valid)
1920 continue;
1922 const LoadInst *Load =
1923 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1924 MemoryAccess::ReductionType RT =
1925 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1927 // If no overlapping access was found we mark the load and store as
1928 // reduction like.
1929 CandidatePair.first->markAsReductionLike(RT);
1930 CandidatePair.second->markAsReductionLike(RT);
1934 std::string ScopStmt::getDomainStr() const { return Domain.to_str(); }
1936 std::string ScopStmt::getScheduleStr() const {
1937 auto *S = getSchedule().release();
1938 if (!S)
1939 return {};
1940 auto Str = stringFromIslObj(S);
1941 isl_map_free(S);
1942 return Str;
1945 void ScopStmt::setInvalidDomain(isl::set ID) { InvalidDomain = ID; }
1947 BasicBlock *ScopStmt::getEntryBlock() const {
1948 if (isBlockStmt())
1949 return getBasicBlock();
1950 return getRegion()->getEntry();
1953 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1955 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1957 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1958 return NestLoops[Dimension];
1961 isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1963 isl::set ScopStmt::getDomain() const { return Domain; }
1965 isl::space ScopStmt::getDomainSpace() const { return Domain.get_space(); }
1967 isl::id ScopStmt::getDomainId() const { return Domain.get_tuple_id(); }
1969 void ScopStmt::printInstructions(raw_ostream &OS) const {
1970 OS << "Instructions {\n";
1972 for (Instruction *Inst : Instructions)
1973 OS.indent(16) << *Inst << "\n";
1975 OS.indent(12) << "}\n";
1978 void ScopStmt::print(raw_ostream &OS, bool PrintInstructions) const {
1979 OS << "\t" << getBaseName() << "\n";
1980 OS.indent(12) << "Domain :=\n";
1982 if (Domain) {
1983 OS.indent(16) << getDomainStr() << ";\n";
1984 } else
1985 OS.indent(16) << "n/a\n";
1987 OS.indent(12) << "Schedule :=\n";
1989 if (Domain) {
1990 OS.indent(16) << getScheduleStr() << ";\n";
1991 } else
1992 OS.indent(16) << "n/a\n";
1994 for (MemoryAccess *Access : MemAccs)
1995 Access->print(OS);
1997 if (PrintInstructions && isBlockStmt())
1998 printInstructions(OS.indent(12));
2001 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2002 LLVM_DUMP_METHOD void ScopStmt::dump() const { print(dbgs(), true); }
2003 #endif
2005 void ScopStmt::removeAccessData(MemoryAccess *MA) {
2006 if (MA->isRead() && MA->isOriginalValueKind()) {
2007 bool Found = ValueReads.erase(MA->getAccessValue());
2008 (void)Found;
2009 assert(Found && "Expected access data not found");
2011 if (MA->isWrite() && MA->isOriginalValueKind()) {
2012 bool Found = ValueWrites.erase(cast<Instruction>(MA->getAccessValue()));
2013 (void)Found;
2014 assert(Found && "Expected access data not found");
2016 if (MA->isWrite() && MA->isOriginalAnyPHIKind()) {
2017 bool Found = PHIWrites.erase(cast<PHINode>(MA->getAccessInstruction()));
2018 (void)Found;
2019 assert(Found && "Expected access data not found");
2021 if (MA->isRead() && MA->isOriginalAnyPHIKind()) {
2022 bool Found = PHIReads.erase(cast<PHINode>(MA->getAccessInstruction()));
2023 (void)Found;
2024 assert(Found && "Expected access data not found");
2028 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
2029 // Remove the memory accesses from this statement together with all scalar
2030 // accesses that were caused by it. MemoryKind::Value READs have no access
2031 // instruction, hence would not be removed by this function. However, it is
2032 // only used for invariant LoadInst accesses, its arguments are always affine,
2033 // hence synthesizable, and therefore there are no MemoryKind::Value READ
2034 // accesses to be removed.
2035 auto Predicate = [&](MemoryAccess *Acc) {
2036 return Acc->getAccessInstruction() == MA->getAccessInstruction();
2038 for (auto *MA : MemAccs) {
2039 if (Predicate(MA)) {
2040 removeAccessData(MA);
2041 Parent.removeAccessData(MA);
2044 MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
2045 MemAccs.end());
2046 InstructionToAccess.erase(MA->getAccessInstruction());
2049 void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) {
2050 auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
2051 assert(MAIt != MemAccs.end());
2052 MemAccs.erase(MAIt);
2054 removeAccessData(MA);
2055 Parent.removeAccessData(MA);
2057 auto It = InstructionToAccess.find(MA->getAccessInstruction());
2058 if (It != InstructionToAccess.end()) {
2059 It->second.remove(MA);
2060 if (It->second.empty())
2061 InstructionToAccess.erase(MA->getAccessInstruction());
2065 MemoryAccess *ScopStmt::ensureValueRead(Value *V) {
2066 MemoryAccess *Access = lookupInputAccessOf(V);
2067 if (Access)
2068 return Access;
2070 ScopArrayInfo *SAI =
2071 Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value);
2072 Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(),
2073 true, {}, {}, V, MemoryKind::Value);
2074 Parent.addAccessFunction(Access);
2075 Access->buildAccessRelation(SAI);
2076 addAccess(Access);
2077 Parent.addAccessData(Access);
2078 return Access;
2081 raw_ostream &polly::operator<<(raw_ostream &OS, const ScopStmt &S) {
2082 S.print(OS, PollyPrintInstructions);
2083 return OS;
2086 //===----------------------------------------------------------------------===//
2087 /// Scop class implement
2089 void Scop::setContext(__isl_take isl_set *NewContext) {
2090 NewContext = isl_set_align_params(NewContext, isl_set_get_space(Context));
2091 isl_set_free(Context);
2092 Context = NewContext;
2095 namespace {
2097 /// Remap parameter values but keep AddRecs valid wrt. invariant loads.
2098 struct SCEVSensitiveParameterRewriter
2099 : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
2100 const ValueToValueMap &VMap;
2102 public:
2103 SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap,
2104 ScalarEvolution &SE)
2105 : SCEVRewriteVisitor(SE), VMap(VMap) {}
2107 static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
2108 const ValueToValueMap &VMap) {
2109 SCEVSensitiveParameterRewriter SSPR(VMap, SE);
2110 return SSPR.visit(E);
2113 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
2114 auto *Start = visit(E->getStart());
2115 auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
2116 visit(E->getStepRecurrence(SE)),
2117 E->getLoop(), SCEV::FlagAnyWrap);
2118 return SE.getAddExpr(Start, AddRec);
2121 const SCEV *visitUnknown(const SCEVUnknown *E) {
2122 if (auto *NewValue = VMap.lookup(E->getValue()))
2123 return SE.getUnknown(NewValue);
2124 return E;
2128 /// Check whether we should remap a SCEV expression.
2129 struct SCEVFindInsideScop : public SCEVTraversal<SCEVFindInsideScop> {
2130 const ValueToValueMap &VMap;
2131 bool FoundInside = false;
2132 const Scop *S;
2134 public:
2135 SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE,
2136 const Scop *S)
2137 : SCEVTraversal(*this), VMap(VMap), S(S) {}
2139 static bool hasVariant(const SCEV *E, ScalarEvolution &SE,
2140 const ValueToValueMap &VMap, const Scop *S) {
2141 SCEVFindInsideScop SFIS(VMap, SE, S);
2142 SFIS.visitAll(E);
2143 return SFIS.FoundInside;
2146 bool follow(const SCEV *E) {
2147 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E)) {
2148 FoundInside |= S->getRegion().contains(AddRec->getLoop());
2149 } else if (auto *Unknown = dyn_cast<SCEVUnknown>(E)) {
2150 if (Instruction *I = dyn_cast<Instruction>(Unknown->getValue()))
2151 FoundInside |= S->getRegion().contains(I) && !VMap.count(I);
2153 return !FoundInside;
2156 bool isDone() { return FoundInside; }
2159 } // end anonymous namespace
2161 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const {
2162 // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution
2163 // doesn't like addition between an AddRec and an expression that
2164 // doesn't have a dominance relationship with it.)
2165 if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this))
2166 return E;
2168 // Rewrite SCEV.
2169 return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
2172 // This table of function names is used to translate parameter names in more
2173 // human-readable names. This makes it easier to interpret Polly analysis
2174 // results.
2175 StringMap<std::string> KnownNames = {
2176 {"_Z13get_global_idj", "global_id"},
2177 {"_Z12get_local_idj", "local_id"},
2178 {"_Z15get_global_sizej", "global_size"},
2179 {"_Z14get_local_sizej", "local_size"},
2180 {"_Z12get_work_dimv", "work_dim"},
2181 {"_Z17get_global_offsetj", "global_offset"},
2182 {"_Z12get_group_idj", "group_id"},
2183 {"_Z14get_num_groupsj", "num_groups"},
2186 static std::string getCallParamName(CallInst *Call) {
2187 std::string Result;
2188 raw_string_ostream OS(Result);
2189 std::string Name = Call->getCalledFunction()->getName();
2191 auto Iterator = KnownNames.find(Name);
2192 if (Iterator != KnownNames.end())
2193 Name = "__" + Iterator->getValue();
2194 OS << Name;
2195 for (auto &Operand : Call->arg_operands()) {
2196 ConstantInt *Op = cast<ConstantInt>(&Operand);
2197 OS << "_" << Op->getValue();
2199 OS.flush();
2200 return Result;
2203 void Scop::createParameterId(const SCEV *Parameter) {
2204 assert(Parameters.count(Parameter));
2205 assert(!ParameterIds.count(Parameter));
2207 std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
2209 if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
2210 Value *Val = ValueParameter->getValue();
2211 CallInst *Call = dyn_cast<CallInst>(Val);
2213 if (Call && isConstCall(Call)) {
2214 ParameterName = getCallParamName(Call);
2215 } else if (UseInstructionNames) {
2216 // If this parameter references a specific Value and this value has a name
2217 // we use this name as it is likely to be unique and more useful than just
2218 // a number.
2219 if (Val->hasName())
2220 ParameterName = Val->getName();
2221 else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) {
2222 auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
2223 if (LoadOrigin->hasName()) {
2224 ParameterName += "_loaded_from_";
2225 ParameterName +=
2226 LI->getPointerOperand()->stripInBoundsOffsets()->getName();
2231 ParameterName = getIslCompatibleName("", ParameterName, "");
2234 isl::id Id = isl::id::alloc(getIslCtx(), ParameterName,
2235 const_cast<void *>((const void *)Parameter));
2236 ParameterIds[Parameter] = Id;
2239 void Scop::addParams(const ParameterSetTy &NewParameters) {
2240 for (const SCEV *Parameter : NewParameters) {
2241 // Normalize the SCEV to get the representing element for an invariant load.
2242 Parameter = extractConstantFactor(Parameter, *SE).second;
2243 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2245 if (Parameters.insert(Parameter))
2246 createParameterId(Parameter);
2250 isl::id Scop::getIdForParam(const SCEV *Parameter) const {
2251 // Normalize the SCEV to get the representing element for an invariant load.
2252 Parameter = getRepresentingInvariantLoadSCEV(Parameter);
2253 return ParameterIds.lookup(Parameter);
2256 isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
2257 isl_set *DomainContext = isl_union_set_params(getDomains().release());
2258 return isl::manage(isl_set_intersect_params(C.release(), DomainContext));
2261 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
2262 return DT.dominates(BB, getEntry());
2265 void Scop::addUserAssumptions(
2266 AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
2267 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2268 for (auto &Assumption : AC.assumptions()) {
2269 auto *CI = dyn_cast_or_null<CallInst>(Assumption);
2270 if (!CI || CI->getNumArgOperands() != 1)
2271 continue;
2273 bool InScop = contains(CI);
2274 if (!InScop && !isDominatedBy(DT, CI->getParent()))
2275 continue;
2277 auto *L = LI.getLoopFor(CI->getParent());
2278 auto *Val = CI->getArgOperand(0);
2279 ParameterSetTy DetectedParams;
2280 if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) {
2281 ORE.emit(
2282 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
2283 << "Non-affine user assumption ignored.");
2284 continue;
2287 // Collect all newly introduced parameters.
2288 ParameterSetTy NewParams;
2289 for (auto *Param : DetectedParams) {
2290 Param = extractConstantFactor(Param, *SE).second;
2291 Param = getRepresentingInvariantLoadSCEV(Param);
2292 if (Parameters.count(Param))
2293 continue;
2294 NewParams.insert(Param);
2297 SmallVector<isl_set *, 2> ConditionSets;
2298 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
2299 BasicBlock *BB = InScop ? CI->getParent() : getRegion().getEntry();
2300 auto *Dom = InScop ? DomainMap[BB].copy() : isl_set_copy(Context);
2301 assert(Dom && "Cannot propagate a nullptr.");
2302 bool Valid = buildConditionSets(*this, BB, Val, TI, L, Dom,
2303 InvalidDomainMap, ConditionSets);
2304 isl_set_free(Dom);
2306 if (!Valid)
2307 continue;
2309 isl_set *AssumptionCtx = nullptr;
2310 if (InScop) {
2311 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
2312 isl_set_free(ConditionSets[0]);
2313 } else {
2314 AssumptionCtx = isl_set_complement(ConditionSets[1]);
2315 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
2318 // Project out newly introduced parameters as they are not otherwise useful.
2319 if (!NewParams.empty()) {
2320 for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
2321 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2322 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2323 isl_id_free(Id);
2325 if (!NewParams.count(Param))
2326 continue;
2328 AssumptionCtx =
2329 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2332 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
2333 << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
2334 Context = isl_set_intersect(Context, AssumptionCtx);
2338 void Scop::addUserContext() {
2339 if (UserContextStr.empty())
2340 return;
2342 isl_set *UserContext =
2343 isl_set_read_from_str(getIslCtx(), UserContextStr.c_str());
2344 isl_space *Space = getParamSpace().release();
2345 if (isl_space_dim(Space, isl_dim_param) !=
2346 isl_set_dim(UserContext, isl_dim_param)) {
2347 auto SpaceStr = isl_space_to_str(Space);
2348 errs() << "Error: the context provided in -polly-context has not the same "
2349 << "number of dimensions than the computed context. Due to this "
2350 << "mismatch, the -polly-context option is ignored. Please provide "
2351 << "the context in the parameter space: " << SpaceStr << ".\n";
2352 free(SpaceStr);
2353 isl_set_free(UserContext);
2354 isl_space_free(Space);
2355 return;
2358 for (unsigned i = 0; i < isl_space_dim(Space, isl_dim_param); i++) {
2359 auto *NameContext = isl_set_get_dim_name(Context, isl_dim_param, i);
2360 auto *NameUserContext = isl_set_get_dim_name(UserContext, isl_dim_param, i);
2362 if (strcmp(NameContext, NameUserContext) != 0) {
2363 auto SpaceStr = isl_space_to_str(Space);
2364 errs() << "Error: the name of dimension " << i
2365 << " provided in -polly-context "
2366 << "is '" << NameUserContext << "', but the name in the computed "
2367 << "context is '" << NameContext
2368 << "'. Due to this name mismatch, "
2369 << "the -polly-context option is ignored. Please provide "
2370 << "the context in the parameter space: " << SpaceStr << ".\n";
2371 free(SpaceStr);
2372 isl_set_free(UserContext);
2373 isl_space_free(Space);
2374 return;
2377 UserContext =
2378 isl_set_set_dim_id(UserContext, isl_dim_param, i,
2379 isl_space_get_dim_id(Space, isl_dim_param, i));
2382 Context = isl_set_intersect(Context, UserContext);
2383 isl_space_free(Space);
2386 void Scop::buildInvariantEquivalenceClasses() {
2387 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2389 const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2390 for (LoadInst *LInst : RIL) {
2391 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2393 Type *Ty = LInst->getType();
2394 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2395 if (ClassRep) {
2396 InvEquivClassVMap[LInst] = ClassRep;
2397 continue;
2400 ClassRep = LInst;
2401 InvariantEquivClasses.emplace_back(
2402 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2406 void Scop::buildContext() {
2407 isl_space *Space = isl_space_params_alloc(getIslCtx(), 0);
2408 Context = isl_set_universe(isl_space_copy(Space));
2409 InvalidContext = isl_set_empty(isl_space_copy(Space));
2410 AssumedContext = isl_set_universe(Space);
2413 void Scop::addParameterBounds() {
2414 unsigned PDim = 0;
2415 for (auto *Parameter : Parameters) {
2416 ConstantRange SRange = SE->getSignedRange(Parameter);
2417 Context =
2418 addRangeBoundsToSet(give(Context), SRange, PDim++, isl::dim::param)
2419 .release();
2423 static std::vector<isl::id> getFortranArrayIds(Scop::array_range Arrays) {
2424 std::vector<isl::id> OutermostSizeIds;
2425 for (auto Array : Arrays) {
2426 // To check if an array is a Fortran array, we check if it has a isl_pw_aff
2427 // for its outermost dimension. Fortran arrays will have this since the
2428 // outermost dimension size can be picked up from their runtime description.
2429 // TODO: actually need to check if it has a FAD, but for now this works.
2430 if (Array->getNumberOfDimensions() > 0) {
2431 isl::pw_aff PwAff = Array->getDimensionSizePw(0);
2432 if (!PwAff)
2433 continue;
2435 isl::id Id =
2436 isl::manage(isl_pw_aff_get_dim_id(PwAff.get(), isl_dim_param, 0));
2437 assert(!Id.is_null() &&
2438 "Invalid Id for PwAff expression in Fortran array");
2439 Id.dump();
2440 OutermostSizeIds.push_back(Id);
2443 return OutermostSizeIds;
2446 // The FORTRAN array size parameters are known to be non-negative.
2447 static isl_set *boundFortranArrayParams(__isl_give isl_set *Context,
2448 Scop::array_range Arrays) {
2449 std::vector<isl::id> OutermostSizeIds;
2450 OutermostSizeIds = getFortranArrayIds(Arrays);
2452 for (isl::id Id : OutermostSizeIds) {
2453 int dim = isl_set_find_dim_by_id(Context, isl_dim_param, Id.get());
2454 Context = isl_set_lower_bound_si(Context, isl_dim_param, dim, 0);
2457 return Context;
2460 void Scop::realignParams() {
2461 if (PollyIgnoreParamBounds)
2462 return;
2464 // Add all parameters into a common model.
2465 isl::space Space = getFullParamSpace();
2467 // Align the parameters of all data structures to the model.
2468 Context = isl_set_align_params(Context, Space.copy());
2470 // Bound the size of the fortran array dimensions.
2471 Context = boundFortranArrayParams(Context, arrays());
2473 // As all parameters are known add bounds to them.
2474 addParameterBounds();
2476 for (ScopStmt &Stmt : *this)
2477 Stmt.realignParams();
2478 // Simplify the schedule according to the context too.
2479 Schedule = isl_schedule_gist_domain_params(Schedule, getContext().release());
2482 static __isl_give isl_set *
2483 simplifyAssumptionContext(__isl_take isl_set *AssumptionContext,
2484 const Scop &S) {
2485 // If we have modeled all blocks in the SCoP that have side effects we can
2486 // simplify the context with the constraints that are needed for anything to
2487 // be executed at all. However, if we have error blocks in the SCoP we already
2488 // assumed some parameter combinations cannot occur and removed them from the
2489 // domains, thus we cannot use the remaining domain to simplify the
2490 // assumptions.
2491 if (!S.hasErrorBlock()) {
2492 isl_set *DomainParameters = isl_union_set_params(S.getDomains().release());
2493 AssumptionContext =
2494 isl_set_gist_params(AssumptionContext, DomainParameters);
2497 AssumptionContext =
2498 isl_set_gist_params(AssumptionContext, S.getContext().release());
2499 return AssumptionContext;
2502 void Scop::simplifyContexts() {
2503 // The parameter constraints of the iteration domains give us a set of
2504 // constraints that need to hold for all cases where at least a single
2505 // statement iteration is executed in the whole scop. We now simplify the
2506 // assumed context under the assumption that such constraints hold and at
2507 // least a single statement iteration is executed. For cases where no
2508 // statement instances are executed, the assumptions we have taken about
2509 // the executed code do not matter and can be changed.
2511 // WARNING: This only holds if the assumptions we have taken do not reduce
2512 // the set of statement instances that are executed. Otherwise we
2513 // may run into a case where the iteration domains suggest that
2514 // for a certain set of parameter constraints no code is executed,
2515 // but in the original program some computation would have been
2516 // performed. In such a case, modifying the run-time conditions and
2517 // possibly influencing the run-time check may cause certain scops
2518 // to not be executed.
2520 // Example:
2522 // When delinearizing the following code:
2524 // for (long i = 0; i < 100; i++)
2525 // for (long j = 0; j < m; j++)
2526 // A[i+p][j] = 1.0;
2528 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2529 // otherwise we would access out of bound data. Now, knowing that code is
2530 // only executed for the case m >= 0, it is sufficient to assume p >= 0.
2531 AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2532 InvalidContext =
2533 isl_set_align_params(InvalidContext, getParamSpace().release());
2536 /// Add the minimal/maximal access in @p Set to @p User.
2537 static isl::stat
2538 buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2539 isl::pw_multi_aff MinPMA, MaxPMA;
2540 isl::pw_aff LastDimAff;
2541 isl::aff OneAff;
2542 unsigned Pos;
2543 isl::ctx Ctx = Set.get_ctx();
2545 Set = Set.remove_divs();
2547 if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain)
2548 return isl::stat::error;
2550 // Restrict the number of parameters involved in the access as the lexmin/
2551 // lexmax computation will take too long if this number is high.
2553 // Experiments with a simple test case using an i7 4800MQ:
2555 // #Parameters involved | Time (in sec)
2556 // 6 | 0.01
2557 // 7 | 0.04
2558 // 8 | 0.12
2559 // 9 | 0.40
2560 // 10 | 1.54
2561 // 11 | 6.78
2562 // 12 | 30.38
2564 if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2565 unsigned InvolvedParams = 0;
2566 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
2567 if (Set.involves_dims(isl::dim::param, u, 1))
2568 InvolvedParams++;
2570 if (InvolvedParams > RunTimeChecksMaxParameters)
2571 return isl::stat::error;
2574 if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts)
2575 return isl::stat::error;
2577 MinPMA = Set.lexmin_pw_multi_aff();
2578 MaxPMA = Set.lexmax_pw_multi_aff();
2580 if (isl_ctx_last_error(Ctx.get()) == isl_error_quota)
2581 return isl::stat::error;
2583 MinPMA = MinPMA.coalesce();
2584 MaxPMA = MaxPMA.coalesce();
2586 // Adjust the last dimension of the maximal access by one as we want to
2587 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2588 // we test during code generation might now point after the end of the
2589 // allocated array but we will never dereference it anyway.
2590 assert(MaxPMA.dim(isl::dim::out) && "Assumed at least one output dimension");
2591 Pos = MaxPMA.dim(isl::dim::out) - 1;
2592 LastDimAff = MaxPMA.get_pw_aff(Pos);
2593 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2594 OneAff = OneAff.add_constant_si(1);
2595 LastDimAff = LastDimAff.add(OneAff);
2596 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2598 MinMaxAccesses.push_back(std::make_pair(MinPMA.copy(), MaxPMA.copy()));
2600 return isl::stat::ok;
2603 static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) {
2604 isl_set *Domain = MA->getStatement()->getDomain().release();
2605 Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain));
2606 return isl_set_reset_tuple_id(Domain);
2609 /// Wrapper function to calculate minimal/maximal accesses to each array.
2610 static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2611 Scop::MinMaxVectorTy &MinMaxAccesses) {
2612 MinMaxAccesses.reserve(AliasGroup.size());
2614 isl::union_set Domains = S.getDomains();
2615 isl::union_map Accesses = isl::union_map::empty(S.getParamSpace());
2617 for (MemoryAccess *MA : AliasGroup)
2618 Accesses = Accesses.add_map(give(MA->getAccessRelation().release()));
2620 Accesses = Accesses.intersect_domain(Domains);
2621 isl::union_set Locations = Accesses.range();
2622 Locations = Locations.coalesce();
2623 Locations = Locations.detect_equalities();
2625 auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat {
2626 return buildMinMaxAccess(Set, MinMaxAccesses, S);
2628 return Locations.foreach_set(Lambda) == isl::stat::ok;
2631 /// Helper to treat non-affine regions and basic blocks the same.
2633 ///{
2635 /// Return the block that is the representing block for @p RN.
2636 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2637 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2638 : RN->getNodeAs<BasicBlock>();
2641 /// Return the @p idx'th block that is executed after @p RN.
2642 static inline BasicBlock *
2643 getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) {
2644 if (RN->isSubRegion()) {
2645 assert(idx == 0);
2646 return RN->getNodeAs<Region>()->getExit();
2648 return TI->getSuccessor(idx);
2651 /// Return the smallest loop surrounding @p RN.
2652 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2653 if (!RN->isSubRegion()) {
2654 BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2655 Loop *L = LI.getLoopFor(BB);
2657 // Unreachable statements are not considered to belong to a LLVM loop, as
2658 // they are not part of an actual loop in the control flow graph.
2659 // Nevertheless, we handle certain unreachable statements that are common
2660 // when modeling run-time bounds checks as being part of the loop to be
2661 // able to model them and to later eliminate the run-time bounds checks.
2663 // Specifically, for basic blocks that terminate in an unreachable and
2664 // where the immediate predecessor is part of a loop, we assume these
2665 // basic blocks belong to the loop the predecessor belongs to. This
2666 // allows us to model the following code.
2668 // for (i = 0; i < N; i++) {
2669 // if (i > 1024)
2670 // abort(); <- this abort might be translated to an
2671 // unreachable
2673 // A[i] = ...
2674 // }
2675 if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
2676 L = LI.getLoopFor(BB->getPrevNode());
2677 return L;
2680 Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2681 Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2682 while (L && NonAffineSubRegion->contains(L))
2683 L = L->getParentLoop();
2684 return L;
2687 /// Get the number of blocks in @p L.
2689 /// The number of blocks in a loop are the number of basic blocks actually
2690 /// belonging to the loop, as well as all single basic blocks that the loop
2691 /// exits to and which terminate in an unreachable instruction. We do not
2692 /// allow such basic blocks in the exit of a scop, hence they belong to the
2693 /// scop and represent run-time conditions which we want to model and
2694 /// subsequently speculate away.
2696 /// @see getRegionNodeLoop for additional details.
2697 unsigned getNumBlocksInLoop(Loop *L) {
2698 unsigned NumBlocks = L->getNumBlocks();
2699 SmallVector<BasicBlock *, 4> ExitBlocks;
2700 L->getExitBlocks(ExitBlocks);
2702 for (auto ExitBlock : ExitBlocks) {
2703 if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2704 NumBlocks++;
2706 return NumBlocks;
2709 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2710 if (!RN->isSubRegion())
2711 return 1;
2713 Region *R = RN->getNodeAs<Region>();
2714 return std::distance(R->block_begin(), R->block_end());
2717 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2718 const DominatorTree &DT) {
2719 if (!RN->isSubRegion())
2720 return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2721 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2722 if (isErrorBlock(*BB, R, LI, DT))
2723 return true;
2724 return false;
2727 ///}
2729 static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain,
2730 unsigned Dim, Loop *L) {
2731 Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1);
2732 isl_id *DimId =
2733 isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L));
2734 return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId);
2737 isl::set Scop::getDomainConditions(const ScopStmt *Stmt) const {
2738 return getDomainConditions(Stmt->getEntryBlock());
2741 isl::set Scop::getDomainConditions(BasicBlock *BB) const {
2742 auto DIt = DomainMap.find(BB);
2743 if (DIt != DomainMap.end())
2744 return DIt->getSecond();
2746 auto &RI = *R.getRegionInfo();
2747 auto *BBR = RI.getRegionFor(BB);
2748 while (BBR->getEntry() == BB)
2749 BBR = BBR->getParent();
2750 return getDomainConditions(BBR->getEntry());
2753 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI,
2754 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2755 bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2756 auto *EntryBB = R->getEntry();
2757 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
2758 int LD = getRelativeLoopDepth(L);
2759 auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD + 1));
2761 while (LD-- >= 0) {
2762 S = addDomainDimId(S, LD + 1, L);
2763 L = L->getParentLoop();
2766 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
2767 DomainMap[EntryBB] = isl::manage(S);
2769 if (IsOnlyNonAffineRegion)
2770 return !containsErrorBlock(R->getNode(), *R, LI, DT);
2772 if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap))
2773 return false;
2775 if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap))
2776 return false;
2778 // Error blocks and blocks dominated by them have been assumed to never be
2779 // executed. Representing them in the Scop does not add any value. In fact,
2780 // it is likely to cause issues during construction of the ScopStmts. The
2781 // contents of error blocks have not been verified to be expressible and
2782 // will cause problems when building up a ScopStmt for them.
2783 // Furthermore, basic blocks dominated by error blocks may reference
2784 // instructions in the error block which, if the error block is not modeled,
2785 // can themselves not be constructed properly. To this end we will replace
2786 // the domains of error blocks and those only reachable via error blocks
2787 // with an empty set. Additionally, we will record for each block under which
2788 // parameter combination it would be reached via an error block in its
2789 // InvalidDomain. This information is needed during load hoisting.
2790 if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap))
2791 return false;
2793 return true;
2796 /// Adjust the dimensions of @p Dom that was constructed for @p OldL
2797 /// to be compatible to domains constructed for loop @p NewL.
2799 /// This function assumes @p NewL and @p OldL are equal or there is a CFG
2800 /// edge from @p OldL to @p NewL.
2801 static __isl_give isl_set *adjustDomainDimensions(Scop &S,
2802 __isl_take isl_set *Dom,
2803 Loop *OldL, Loop *NewL) {
2804 // If the loops are the same there is nothing to do.
2805 if (NewL == OldL)
2806 return Dom;
2808 int OldDepth = S.getRelativeLoopDepth(OldL);
2809 int NewDepth = S.getRelativeLoopDepth(NewL);
2810 // If both loops are non-affine loops there is nothing to do.
2811 if (OldDepth == -1 && NewDepth == -1)
2812 return Dom;
2814 // Distinguish three cases:
2815 // 1) The depth is the same but the loops are not.
2816 // => One loop was left one was entered.
2817 // 2) The depth increased from OldL to NewL.
2818 // => One loop was entered, none was left.
2819 // 3) The depth decreased from OldL to NewL.
2820 // => Loops were left were difference of the depths defines how many.
2821 if (OldDepth == NewDepth) {
2822 assert(OldL->getParentLoop() == NewL->getParentLoop());
2823 Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1);
2824 Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2825 Dom = addDomainDimId(Dom, NewDepth, NewL);
2826 } else if (OldDepth < NewDepth) {
2827 assert(OldDepth + 1 == NewDepth);
2828 auto &R = S.getRegion();
2829 (void)R;
2830 assert(NewL->getParentLoop() == OldL ||
2831 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2832 Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2833 Dom = addDomainDimId(Dom, NewDepth, NewL);
2834 } else {
2835 assert(OldDepth > NewDepth);
2836 int Diff = OldDepth - NewDepth;
2837 int NumDim = isl_set_n_dim(Dom);
2838 assert(NumDim >= Diff);
2839 Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff);
2842 return Dom;
2845 bool Scop::propagateInvalidStmtDomains(
2846 Region *R, DominatorTree &DT, LoopInfo &LI,
2847 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2848 ReversePostOrderTraversal<Region *> RTraversal(R);
2849 for (auto *RN : RTraversal) {
2851 // Recurse for affine subregions but go on for basic blocks and non-affine
2852 // subregions.
2853 if (RN->isSubRegion()) {
2854 Region *SubRegion = RN->getNodeAs<Region>();
2855 if (!isNonAffineSubRegion(SubRegion)) {
2856 propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap);
2857 continue;
2861 bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2862 BasicBlock *BB = getRegionNodeBasicBlock(RN);
2863 isl::set &Domain = DomainMap[BB];
2864 assert(Domain && "Cannot propagate a nullptr");
2866 isl::set InvalidDomain = InvalidDomainMap[BB];
2868 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
2870 if (!IsInvalidBlock) {
2871 InvalidDomain = InvalidDomain.intersect(Domain);
2872 } else {
2873 InvalidDomain = Domain;
2874 isl::set DomPar = Domain.params();
2875 recordAssumption(ERRORBLOCK, DomPar.release(),
2876 BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
2877 Domain = nullptr;
2880 if (InvalidDomain.is_empty()) {
2881 InvalidDomainMap[BB] = InvalidDomain;
2882 continue;
2885 auto *BBLoop = getRegionNodeLoop(RN, LI);
2886 auto *TI = BB->getTerminator();
2887 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
2888 for (unsigned u = 0; u < NumSuccs; u++) {
2889 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2891 // Skip successors outside the SCoP.
2892 if (!contains(SuccBB))
2893 continue;
2895 // Skip backedges.
2896 if (DT.dominates(SuccBB, BB))
2897 continue;
2899 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
2901 auto *AdjustedInvalidDomain = adjustDomainDimensions(
2902 *this, InvalidDomain.copy(), BBLoop, SuccBBLoop);
2904 auto *SuccInvalidDomain = InvalidDomainMap[SuccBB].copy();
2905 SuccInvalidDomain =
2906 isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain);
2907 SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain);
2908 unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain);
2910 InvalidDomainMap[SuccBB] = isl::manage(SuccInvalidDomain);
2912 // Check if the maximal number of domain disjunctions was reached.
2913 // In case this happens we will bail.
2914 if (NumConjucts < MaxDisjunctsInDomain)
2915 continue;
2917 InvalidDomainMap.erase(BB);
2918 invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
2919 return false;
2922 InvalidDomainMap[BB] = InvalidDomain;
2925 return true;
2928 void Scop::propagateDomainConstraintsToRegionExit(
2929 BasicBlock *BB, Loop *BBLoop,
2930 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI,
2931 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2932 // Check if the block @p BB is the entry of a region. If so we propagate it's
2933 // domain to the exit block of the region. Otherwise we are done.
2934 auto *RI = R.getRegionInfo();
2935 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
2936 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
2937 if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB))
2938 return;
2940 // Do not propagate the domain if there is a loop backedge inside the region
2941 // that would prevent the exit block from being executed.
2942 auto *L = BBLoop;
2943 while (L && contains(L)) {
2944 SmallVector<BasicBlock *, 4> LatchBBs;
2945 BBLoop->getLoopLatches(LatchBBs);
2946 for (auto *LatchBB : LatchBBs)
2947 if (BB != LatchBB && BBReg->contains(LatchBB))
2948 return;
2949 L = L->getParentLoop();
2952 isl::set Domain = DomainMap[BB];
2953 assert(Domain && "Cannot propagate a nullptr");
2955 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops());
2957 // Since the dimensions of @p BB and @p ExitBB might be different we have to
2958 // adjust the domain before we can propagate it.
2959 isl::set AdjustedDomain = isl::manage(
2960 adjustDomainDimensions(*this, Domain.copy(), BBLoop, ExitBBLoop));
2961 isl::set &ExitDomain = DomainMap[ExitBB];
2963 // If the exit domain is not yet created we set it otherwise we "add" the
2964 // current domain.
2965 ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
2967 // Initialize the invalid domain.
2968 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
2970 FinishedExitBlocks.insert(ExitBB);
2973 bool Scop::buildDomainsWithBranchConstraints(
2974 Region *R, DominatorTree &DT, LoopInfo &LI,
2975 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
2976 // To create the domain for each block in R we iterate over all blocks and
2977 // subregions in R and propagate the conditions under which the current region
2978 // element is executed. To this end we iterate in reverse post order over R as
2979 // it ensures that we first visit all predecessors of a region node (either a
2980 // basic block or a subregion) before we visit the region node itself.
2981 // Initially, only the domain for the SCoP region entry block is set and from
2982 // there we propagate the current domain to all successors, however we add the
2983 // condition that the successor is actually executed next.
2984 // As we are only interested in non-loop carried constraints here we can
2985 // simply skip loop back edges.
2987 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2988 ReversePostOrderTraversal<Region *> RTraversal(R);
2989 for (auto *RN : RTraversal) {
2990 // Recurse for affine subregions but go on for basic blocks and non-affine
2991 // subregions.
2992 if (RN->isSubRegion()) {
2993 Region *SubRegion = RN->getNodeAs<Region>();
2994 if (!isNonAffineSubRegion(SubRegion)) {
2995 if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI,
2996 InvalidDomainMap))
2997 return false;
2998 continue;
3002 if (containsErrorBlock(RN, getRegion(), LI, DT))
3003 HasErrorBlock = true;
3005 BasicBlock *BB = getRegionNodeBasicBlock(RN);
3006 TerminatorInst *TI = BB->getTerminator();
3008 if (isa<UnreachableInst>(TI))
3009 continue;
3011 isl::set Domain = DomainMap.lookup(BB);
3012 if (!Domain)
3013 continue;
3014 MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get()));
3016 auto *BBLoop = getRegionNodeLoop(RN, LI);
3017 // Propagate the domain from BB directly to blocks that have a superset
3018 // domain, at the moment only region exit nodes of regions that start in BB.
3019 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI,
3020 InvalidDomainMap);
3022 // If all successors of BB have been set a domain through the propagation
3023 // above we do not need to build condition sets but can just skip this
3024 // block. However, it is important to note that this is a local property
3025 // with regards to the region @p R. To this end FinishedExitBlocks is a
3026 // local variable.
3027 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
3028 return FinishedExitBlocks.count(SuccBB);
3030 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
3031 continue;
3033 // Build the condition sets for the successor nodes of the current region
3034 // node. If it is a non-affine subregion we will always execute the single
3035 // exit node, hence the single entry node domain is the condition set. For
3036 // basic blocks we use the helper function buildConditionSets.
3037 SmallVector<isl_set *, 8> ConditionSets;
3038 if (RN->isSubRegion())
3039 ConditionSets.push_back(Domain.copy());
3040 else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(),
3041 InvalidDomainMap, ConditionSets))
3042 return false;
3044 // Now iterate over the successors and set their initial domain based on
3045 // their condition set. We skip back edges here and have to be careful when
3046 // we leave a loop not to keep constraints over a dimension that doesn't
3047 // exist anymore.
3048 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
3049 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
3050 isl::set CondSet = isl::manage(ConditionSets[u]);
3051 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
3053 // Skip blocks outside the region.
3054 if (!contains(SuccBB))
3055 continue;
3057 // If we propagate the domain of some block to "SuccBB" we do not have to
3058 // adjust the domain.
3059 if (FinishedExitBlocks.count(SuccBB))
3060 continue;
3062 // Skip back edges.
3063 if (DT.dominates(SuccBB, BB))
3064 continue;
3066 Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops());
3068 CondSet = isl::manage(
3069 adjustDomainDimensions(*this, CondSet.copy(), BBLoop, SuccBBLoop));
3071 // Set the domain for the successor or merge it with an existing domain in
3072 // case there are multiple paths (without loop back edges) to the
3073 // successor block.
3074 isl::set &SuccDomain = DomainMap[SuccBB];
3076 if (SuccDomain) {
3077 SuccDomain = SuccDomain.unite(CondSet).coalesce();
3078 } else {
3079 // Initialize the invalid domain.
3080 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
3081 SuccDomain = CondSet;
3084 SuccDomain = SuccDomain.detect_equalities();
3086 // Check if the maximal number of domain disjunctions was reached.
3087 // In case this happens we will clean up and bail.
3088 if (isl_set_n_basic_set(SuccDomain.get()) < MaxDisjunctsInDomain)
3089 continue;
3091 invalidate(COMPLEXITY, DebugLoc());
3092 while (++u < ConditionSets.size())
3093 isl_set_free(ConditionSets[u]);
3094 return false;
3098 return true;
3101 isl::set Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain,
3102 DominatorTree &DT,
3103 LoopInfo &LI) {
3104 // If @p BB is the ScopEntry we are done
3105 if (R.getEntry() == BB)
3106 return isl::set::universe(Domain.get_space());
3108 // The region info of this function.
3109 auto &RI = *R.getRegionInfo();
3111 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops());
3113 // A domain to collect all predecessor domains, thus all conditions under
3114 // which the block is executed. To this end we start with the empty domain.
3115 isl::set PredDom = isl::set::empty(Domain.get_space());
3117 // Set of regions of which the entry block domain has been propagated to BB.
3118 // all predecessors inside any of the regions can be skipped.
3119 SmallSet<Region *, 8> PropagatedRegions;
3121 for (auto *PredBB : predecessors(BB)) {
3122 // Skip backedges.
3123 if (DT.dominates(BB, PredBB))
3124 continue;
3126 // If the predecessor is in a region we used for propagation we can skip it.
3127 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
3128 if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
3129 PredBBInRegion)) {
3130 continue;
3133 // Check if there is a valid region we can use for propagation, thus look
3134 // for a region that contains the predecessor and has @p BB as exit block.
3135 auto *PredR = RI.getRegionFor(PredBB);
3136 while (PredR->getExit() != BB && !PredR->contains(BB))
3137 PredR->getParent();
3139 // If a valid region for propagation was found use the entry of that region
3140 // for propagation, otherwise the PredBB directly.
3141 if (PredR->getExit() == BB) {
3142 PredBB = PredR->getEntry();
3143 PropagatedRegions.insert(PredR);
3146 auto *PredBBDom = getDomainConditions(PredBB).release();
3147 Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops());
3149 PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
3151 PredDom = PredDom.unite(isl::manage(PredBBDom));
3154 return PredDom;
3157 bool Scop::propagateDomainConstraints(
3158 Region *R, DominatorTree &DT, LoopInfo &LI,
3159 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3160 // Iterate over the region R and propagate the domain constrains from the
3161 // predecessors to the current node. In contrast to the
3162 // buildDomainsWithBranchConstraints function, this one will pull the domain
3163 // information from the predecessors instead of pushing it to the successors.
3164 // Additionally, we assume the domains to be already present in the domain
3165 // map here. However, we iterate again in reverse post order so we know all
3166 // predecessors have been visited before a block or non-affine subregion is
3167 // visited.
3169 ReversePostOrderTraversal<Region *> RTraversal(R);
3170 for (auto *RN : RTraversal) {
3171 // Recurse for affine subregions but go on for basic blocks and non-affine
3172 // subregions.
3173 if (RN->isSubRegion()) {
3174 Region *SubRegion = RN->getNodeAs<Region>();
3175 if (!isNonAffineSubRegion(SubRegion)) {
3176 if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap))
3177 return false;
3178 continue;
3182 BasicBlock *BB = getRegionNodeBasicBlock(RN);
3183 isl::set &Domain = DomainMap[BB];
3184 assert(Domain);
3186 // Under the union of all predecessor conditions we can reach this block.
3187 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI);
3188 Domain = Domain.intersect(PredDom).coalesce();
3189 Domain = Domain.align_params(getParamSpace());
3191 Loop *BBLoop = getRegionNodeLoop(RN, LI);
3192 if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop))
3193 if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap))
3194 return false;
3197 return true;
3200 /// Create a map to map from a given iteration to a subsequent iteration.
3202 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
3203 /// is incremented by one and all other dimensions are equal, e.g.,
3204 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
3206 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
3207 static __isl_give isl_map *
3208 createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) {
3209 auto *MapSpace = isl_space_map_from_set(SetSpace);
3210 auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace));
3211 for (unsigned u = 0; u < isl_map_dim(NextIterationMap, isl_dim_in); u++)
3212 if (u != Dim)
3213 NextIterationMap =
3214 isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u);
3215 auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace));
3216 C = isl_constraint_set_constant_si(C, 1);
3217 C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1);
3218 C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1);
3219 NextIterationMap = isl_map_add_constraint(NextIterationMap, C);
3220 return NextIterationMap;
3223 bool Scop::addLoopBoundsToHeaderDomain(
3224 Loop *L, LoopInfo &LI, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
3225 int LoopDepth = getRelativeLoopDepth(L);
3226 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
3228 BasicBlock *HeaderBB = L->getHeader();
3229 assert(DomainMap.count(HeaderBB));
3230 isl::set &HeaderBBDom = DomainMap[HeaderBB];
3232 isl::map NextIterationMap = isl::manage(
3233 createNextIterationMap(HeaderBBDom.get_space().release(), LoopDepth));
3235 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
3237 SmallVector<BasicBlock *, 4> LatchBlocks;
3238 L->getLoopLatches(LatchBlocks);
3240 for (BasicBlock *LatchBB : LatchBlocks) {
3241 // If the latch is only reachable via error statements we skip it.
3242 isl::set LatchBBDom = DomainMap.lookup(LatchBB);
3243 if (!LatchBBDom)
3244 continue;
3246 isl::set BackedgeCondition = nullptr;
3248 TerminatorInst *TI = LatchBB->getTerminator();
3249 BranchInst *BI = dyn_cast<BranchInst>(TI);
3250 assert(BI && "Only branch instructions allowed in loop latches");
3252 if (BI->isUnconditional())
3253 BackedgeCondition = LatchBBDom;
3254 else {
3255 SmallVector<isl_set *, 8> ConditionSets;
3256 int idx = BI->getSuccessor(0) != HeaderBB;
3257 if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(),
3258 InvalidDomainMap, ConditionSets))
3259 return false;
3261 // Free the non back edge condition set as we do not need it.
3262 isl_set_free(ConditionSets[1 - idx]);
3264 BackedgeCondition = isl::manage(ConditionSets[idx]);
3267 int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
3268 assert(LatchLoopDepth >= LoopDepth);
3269 BackedgeCondition = BackedgeCondition.project_out(
3270 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
3271 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
3274 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
3275 for (int i = 0; i < LoopDepth; i++)
3276 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
3278 isl::set UnionBackedgeConditionComplement =
3279 UnionBackedgeCondition.complement();
3280 UnionBackedgeConditionComplement =
3281 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
3283 UnionBackedgeConditionComplement =
3284 UnionBackedgeConditionComplement.apply(ForwardMap);
3285 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
3286 HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
3288 auto Parts = partitionSetParts(HeaderBBDom.copy(), LoopDepth);
3289 HeaderBBDom = isl::manage(Parts.second);
3291 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
3292 // the bounded assumptions to the context as they are already implied by the
3293 // <nsw> tag.
3294 if (Affinator.hasNSWAddRecForLoop(L)) {
3295 isl_set_free(Parts.first);
3296 return true;
3299 isl_set *UnboundedCtx = isl_set_params(Parts.first);
3300 recordAssumption(INFINITELOOP, UnboundedCtx,
3301 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
3302 return true;
3305 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
3306 Value *PointerBase = MA->getOriginalBaseAddr();
3308 auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
3309 if (!PointerBaseInst)
3310 return nullptr;
3312 auto *BasePtrStmt = getStmtFor(PointerBaseInst);
3313 if (!BasePtrStmt)
3314 return nullptr;
3316 return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
3319 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
3320 isl::union_map Writes) {
3321 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) {
3322 return getNonHoistableCtx(BasePtrMA, Writes).is_null();
3325 Value *BaseAddr = MA->getOriginalBaseAddr();
3326 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
3327 if (!isa<LoadInst>(BasePtrInst))
3328 return contains(BasePtrInst);
3330 return false;
3333 bool Scop::buildAliasChecks(AliasAnalysis &AA) {
3334 if (!PollyUseRuntimeAliasChecks)
3335 return true;
3337 if (buildAliasGroups(AA)) {
3338 // Aliasing assumptions do not go through addAssumption but we still want to
3339 // collect statistics so we do it here explicitly.
3340 if (MinMaxAliasGroups.size())
3341 AssumptionsAliasing++;
3342 return true;
3345 // If a problem occurs while building the alias groups we need to delete
3346 // this SCoP and pretend it wasn't valid in the first place. To this end
3347 // we make the assumed context infeasible.
3348 invalidate(ALIASING, DebugLoc());
3350 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3351 << " could not be created as the number of parameters involved "
3352 "is too high. The SCoP will be "
3353 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3354 "the maximal number of parameters but be advised that the "
3355 "compile time might increase exponentially.\n\n");
3356 return false;
3359 std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3360 Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3361 AliasSetTracker AST(AA);
3363 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3364 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3365 for (ScopStmt &Stmt : *this) {
3367 isl_set *StmtDomain = Stmt.getDomain().release();
3368 bool StmtDomainEmpty = isl_set_is_empty(StmtDomain);
3369 isl_set_free(StmtDomain);
3371 // Statements with an empty domain will never be executed.
3372 if (StmtDomainEmpty)
3373 continue;
3375 for (MemoryAccess *MA : Stmt) {
3376 if (MA->isScalarKind())
3377 continue;
3378 if (!MA->isRead())
3379 HasWriteAccess.insert(MA->getScopArrayInfo());
3380 MemAccInst Acc(MA->getAccessInstruction());
3381 if (MA->isRead() && isa<MemTransferInst>(Acc))
3382 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3383 else
3384 PtrToAcc[Acc.getPointerOperand()] = MA;
3385 AST.add(Acc);
3389 AliasGroupVectorTy AliasGroups;
3390 for (AliasSet &AS : AST) {
3391 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3392 continue;
3393 AliasGroupTy AG;
3394 for (auto &PR : AS)
3395 AG.push_back(PtrToAcc[PR.getValue()]);
3396 if (AG.size() < 2)
3397 continue;
3398 AliasGroups.push_back(std::move(AG));
3401 return std::make_tuple(AliasGroups, HasWriteAccess);
3404 void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3405 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3406 AliasGroupTy NewAG;
3407 AliasGroupTy &AG = AliasGroups[u];
3408 AliasGroupTy::iterator AGI = AG.begin();
3409 isl_set *AGDomain = getAccessDomain(*AGI);
3410 while (AGI != AG.end()) {
3411 MemoryAccess *MA = *AGI;
3412 isl_set *MADomain = getAccessDomain(MA);
3413 if (isl_set_is_disjoint(AGDomain, MADomain)) {
3414 NewAG.push_back(MA);
3415 AGI = AG.erase(AGI);
3416 isl_set_free(MADomain);
3417 } else {
3418 AGDomain = isl_set_union(AGDomain, MADomain);
3419 AGI++;
3422 if (NewAG.size() > 1)
3423 AliasGroups.push_back(std::move(NewAG));
3424 isl_set_free(AGDomain);
3428 bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3429 // To create sound alias checks we perform the following steps:
3430 // o) We partition each group into read only and non read only accesses.
3431 // o) For each group with more than one base pointer we then compute minimal
3432 // and maximal accesses to each array of a group in read only and non
3433 // read only partitions separately.
3434 AliasGroupVectorTy AliasGroups;
3435 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3437 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3439 splitAliasGroupsByDomain(AliasGroups);
3441 for (AliasGroupTy &AG : AliasGroups) {
3442 if (!hasFeasibleRuntimeContext())
3443 return false;
3446 IslMaxOperationsGuard MaxOpGuard(getIslCtx(), OptComputeOut);
3447 bool Valid = buildAliasGroup(AG, HasWriteAccess);
3448 if (!Valid)
3449 return false;
3451 if (isl_ctx_last_error(getIslCtx()) == isl_error_quota) {
3452 invalidate(COMPLEXITY, DebugLoc());
3453 return false;
3457 return true;
3460 bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3461 DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3462 AliasGroupTy ReadOnlyAccesses;
3463 AliasGroupTy ReadWriteAccesses;
3464 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3465 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3467 if (AliasGroup.size() < 2)
3468 return true;
3470 for (MemoryAccess *Access : AliasGroup) {
3471 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3472 Access->getAccessInstruction())
3473 << "Possibly aliasing pointer, use restrict keyword.");
3474 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3475 if (HasWriteAccess.count(Array)) {
3476 ReadWriteArrays.insert(Array);
3477 ReadWriteAccesses.push_back(Access);
3478 } else {
3479 ReadOnlyArrays.insert(Array);
3480 ReadOnlyAccesses.push_back(Access);
3484 // If there are no read-only pointers, and less than two read-write pointers,
3485 // no alias check is needed.
3486 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3487 return true;
3489 // If there is no read-write pointer, no alias check is needed.
3490 if (ReadWriteArrays.empty())
3491 return true;
3493 // For non-affine accesses, no alias check can be generated as we cannot
3494 // compute a sufficiently tight lower and upper bound: bail out.
3495 for (MemoryAccess *MA : AliasGroup) {
3496 if (!MA->isAffine()) {
3497 invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3498 MA->getAccessInstruction()->getParent());
3499 return false;
3503 // Ensure that for all memory accesses for which we generate alias checks,
3504 // their base pointers are available.
3505 for (MemoryAccess *MA : AliasGroup) {
3506 if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3507 addRequiredInvariantLoad(
3508 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3511 MinMaxAliasGroups.emplace_back();
3512 MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3513 MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3514 MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3516 bool Valid;
3518 Valid =
3519 calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3521 if (!Valid)
3522 return false;
3524 // Bail out if the number of values we need to compare is too large.
3525 // This is important as the number of comparisons grows quadratically with
3526 // the number of values we need to compare.
3527 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3528 RunTimeChecksMaxArraysPerGroup)
3529 return false;
3531 Valid =
3532 calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3534 if (!Valid)
3535 return false;
3537 return true;
3540 /// Get the smallest loop that contains @p S but is not in @p S.
3541 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3542 // Start with the smallest loop containing the entry and expand that
3543 // loop until it contains all blocks in the region. If there is a loop
3544 // containing all blocks in the region check if it is itself contained
3545 // and if so take the parent loop as it will be the smallest containing
3546 // the region but not contained by it.
3547 Loop *L = LI.getLoopFor(S.getEntry());
3548 while (L) {
3549 bool AllContained = true;
3550 for (auto *BB : S.blocks())
3551 AllContained &= L->contains(BB);
3552 if (AllContained)
3553 break;
3554 L = L->getParentLoop();
3557 return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
3560 int Scop::NextScopID = 0;
3562 std::string Scop::CurrentFunc;
3564 int Scop::getNextID(std::string ParentFunc) {
3565 if (ParentFunc != CurrentFunc) {
3566 CurrentFunc = ParentFunc;
3567 NextScopID = 0;
3569 return NextScopID++;
3572 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3573 ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE)
3574 : SE(&ScalarEvolution), R(R), name(R.getNameStr()),
3575 HasSingleExitEdge(R.getExitingBlock()), DC(DC), ORE(ORE),
3576 IslCtx(isl_ctx_alloc(), isl_ctx_free), Affinator(this, LI),
3577 ID(getNextID((*R.getEntry()->getParent()).getName().str())) {
3578 if (IslOnErrorAbort)
3579 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT);
3580 buildContext();
3583 Scop::~Scop() {
3584 isl_set_free(Context);
3585 isl_set_free(AssumedContext);
3586 isl_set_free(InvalidContext);
3587 isl_schedule_free(Schedule);
3589 ParameterIds.clear();
3591 for (auto &AS : RecordedAssumptions)
3592 isl_set_free(AS.Set);
3594 // Free the alias groups
3595 for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) {
3596 for (MinMaxAccessTy &MMA : MinMaxAccessPair.first) {
3597 isl_pw_multi_aff_free(MMA.first);
3598 isl_pw_multi_aff_free(MMA.second);
3600 for (MinMaxAccessTy &MMA : MinMaxAccessPair.second) {
3601 isl_pw_multi_aff_free(MMA.first);
3602 isl_pw_multi_aff_free(MMA.second);
3606 for (const auto &IAClass : InvariantEquivClasses)
3607 isl_set_free(IAClass.ExecutionContext);
3609 // Explicitly release all Scop objects and the underlying isl objects before
3610 // we release the isl context.
3611 Stmts.clear();
3612 ScopArrayInfoSet.clear();
3613 ScopArrayInfoMap.clear();
3614 ScopArrayNameMap.clear();
3615 AccessFunctions.clear();
3618 void Scop::foldSizeConstantsToRight() {
3619 isl_union_set *Accessed = isl_union_map_range(getAccesses().release());
3621 for (auto Array : arrays()) {
3622 if (Array->getNumberOfDimensions() <= 1)
3623 continue;
3625 isl_space *Space = Array->getSpace().release();
3627 Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed));
3629 if (!isl_union_set_contains(Accessed, Space)) {
3630 isl_space_free(Space);
3631 continue;
3634 isl_set *Elements = isl_union_set_extract_set(Accessed, Space);
3636 isl_map *Transform =
3637 isl_map_universe(isl_space_map_from_set(Array->getSpace().release()));
3639 std::vector<int> Int;
3641 int Dims = isl_set_dim(Elements, isl_dim_set);
3642 for (int i = 0; i < Dims; i++) {
3643 isl_set *DimOnly =
3644 isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i);
3645 DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1);
3646 DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0);
3648 isl_basic_set *DimHull = isl_set_affine_hull(DimOnly);
3650 if (i == Dims - 1) {
3651 Int.push_back(1);
3652 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3653 isl_basic_set_free(DimHull);
3654 continue;
3657 if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) {
3658 isl_aff *Diff = isl_basic_set_get_div(DimHull, 0);
3659 isl_val *Val = isl_aff_get_denominator_val(Diff);
3660 isl_aff_free(Diff);
3662 int ValInt = 1;
3664 if (isl_val_is_int(Val))
3665 ValInt = isl_val_get_num_si(Val);
3666 isl_val_free(Val);
3668 Int.push_back(ValInt);
3670 isl_constraint *C = isl_constraint_alloc_equality(
3671 isl_local_space_from_space(isl_map_get_space(Transform)));
3672 C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt);
3673 C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1);
3674 Transform = isl_map_add_constraint(Transform, C);
3675 isl_basic_set_free(DimHull);
3676 continue;
3679 isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull);
3680 ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0);
3682 int ValInt = 1;
3683 if (isl_basic_set_is_equal(ZeroSet, DimHull)) {
3684 ValInt = 0;
3687 Int.push_back(ValInt);
3688 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3689 isl_basic_set_free(DimHull);
3690 isl_basic_set_free(ZeroSet);
3693 isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform));
3695 if (!isl_set_is_subset(Elements, MappedElements)) {
3696 isl_set_free(Elements);
3697 isl_set_free(MappedElements);
3698 isl_map_free(Transform);
3699 continue;
3702 isl_set_free(MappedElements);
3704 bool CanFold = true;
3706 if (Int[0] <= 1)
3707 CanFold = false;
3709 unsigned NumDims = Array->getNumberOfDimensions();
3710 for (unsigned i = 1; i < NumDims - 1; i++)
3711 if (Int[0] != Int[i] && Int[i])
3712 CanFold = false;
3714 if (!CanFold) {
3715 isl_set_free(Elements);
3716 isl_map_free(Transform);
3717 continue;
3720 for (auto &Access : AccessFunctions)
3721 if (Access->getScopArrayInfo() == Array)
3722 Access->setAccessRelation(Access->getAccessRelation().apply_range(
3723 isl::manage(isl_map_copy(Transform))));
3725 isl_map_free(Transform);
3727 std::vector<const SCEV *> Sizes;
3728 for (unsigned i = 0; i < NumDims; i++) {
3729 auto Size = Array->getDimensionSize(i);
3731 if (i == NumDims - 1)
3732 Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3733 Sizes.push_back(Size);
3736 Array->updateSizes(Sizes, false /* CheckConsistency */);
3738 isl_set_free(Elements);
3740 isl_union_set_free(Accessed);
3743 void Scop::markFortranArrays() {
3744 for (ScopStmt &Stmt : Stmts) {
3745 for (MemoryAccess *MemAcc : Stmt) {
3746 Value *FAD = MemAcc->getFortranArrayDescriptor();
3747 if (!FAD)
3748 continue;
3750 // TODO: const_cast-ing to edit
3751 ScopArrayInfo *SAI =
3752 const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
3753 assert(SAI && "memory access into a Fortran array does not "
3754 "have an associated ScopArrayInfo");
3755 SAI->applyAndSetFAD(FAD);
3760 void Scop::finalizeAccesses() {
3761 updateAccessDimensionality();
3762 foldSizeConstantsToRight();
3763 foldAccessRelations();
3764 assumeNoOutOfBounds();
3765 markFortranArrays();
3768 void Scop::updateAccessDimensionality() {
3769 // Check all array accesses for each base pointer and find a (virtual) element
3770 // size for the base pointer that divides all access functions.
3771 for (ScopStmt &Stmt : *this)
3772 for (MemoryAccess *Access : Stmt) {
3773 if (!Access->isArrayKind())
3774 continue;
3775 ScopArrayInfo *Array =
3776 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3778 if (Array->getNumberOfDimensions() != 1)
3779 continue;
3780 unsigned DivisibleSize = Array->getElemSizeInBytes();
3781 const SCEV *Subscript = Access->getSubscript(0);
3782 while (!isDivisible(Subscript, DivisibleSize, *SE))
3783 DivisibleSize /= 2;
3784 auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3785 Array->updateElementType(Ty);
3788 for (auto &Stmt : *this)
3789 for (auto &Access : Stmt)
3790 Access->updateDimensionality();
3793 void Scop::foldAccessRelations() {
3794 for (auto &Stmt : *this)
3795 for (auto &Access : Stmt)
3796 Access->foldAccessRelation();
3799 void Scop::assumeNoOutOfBounds() {
3800 for (auto &Stmt : *this)
3801 for (auto &Access : Stmt)
3802 Access->assumeNoOutOfBound();
3805 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
3806 if (Stmt.isRegionStmt())
3807 for (BasicBlock *BB : Stmt.getRegion()->blocks()) {
3808 StmtMap.erase(BB);
3809 for (Instruction &Inst : *BB)
3810 InstStmtMap.erase(&Inst);
3812 else {
3813 StmtMap.erase(Stmt.getBasicBlock());
3814 for (Instruction *Inst : Stmt.getInstructions())
3815 InstStmtMap.erase(Inst);
3819 void Scop::removeStmts(std::function<bool(ScopStmt &)> ShouldDelete) {
3820 for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
3821 if (!ShouldDelete(*StmtIt)) {
3822 StmtIt++;
3823 continue;
3826 removeFromStmtMap(*StmtIt);
3827 StmtIt = Stmts.erase(StmtIt);
3831 void Scop::removeStmtNotInDomainMap() {
3832 auto ShouldDelete = [this](ScopStmt &Stmt) -> bool {
3833 return !this->DomainMap.lookup(Stmt.getEntryBlock());
3835 removeStmts(ShouldDelete);
3838 void Scop::simplifySCoP(bool AfterHoisting) {
3839 auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool {
3840 bool RemoveStmt = Stmt.isEmpty();
3842 // Remove read only statements only after invariant load hoisting.
3843 if (!RemoveStmt && AfterHoisting) {
3844 bool OnlyRead = true;
3845 for (MemoryAccess *MA : Stmt) {
3846 if (MA->isRead())
3847 continue;
3849 OnlyRead = false;
3850 break;
3853 RemoveStmt = OnlyRead;
3855 return RemoveStmt;
3858 removeStmts(ShouldDelete);
3861 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3862 LoadInst *LInst = dyn_cast<LoadInst>(Val);
3863 if (!LInst)
3864 return nullptr;
3866 if (Value *Rep = InvEquivClassVMap.lookup(LInst))
3867 LInst = cast<LoadInst>(Rep);
3869 Type *Ty = LInst->getType();
3870 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3871 for (auto &IAClass : InvariantEquivClasses) {
3872 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3873 continue;
3875 auto &MAs = IAClass.InvariantAccesses;
3876 for (auto *MA : MAs)
3877 if (MA->getAccessInstruction() == Val)
3878 return &IAClass;
3881 return nullptr;
3884 bool isAParameter(llvm::Value *maybeParam, const Function &F) {
3885 for (const llvm::Argument &Arg : F.args())
3886 if (&Arg == maybeParam)
3887 return true;
3889 return false;
3892 bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3893 bool MAInvalidCtxIsEmpty,
3894 bool NonHoistableCtxIsEmpty) {
3895 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3896 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3897 if (PollyAllowDereferenceOfAllFunctionParams &&
3898 isAParameter(LInst->getPointerOperand(), getFunction()))
3899 return true;
3901 // TODO: We can provide more information for better but more expensive
3902 // results.
3903 if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3904 LInst->getAlignment(), DL))
3905 return false;
3907 // If the location might be overwritten we do not hoist it unconditionally.
3909 // TODO: This is probably too conservative.
3910 if (!NonHoistableCtxIsEmpty)
3911 return false;
3913 // If a dereferenceable load is in a statement that is modeled precisely we
3914 // can hoist it.
3915 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3916 return true;
3918 // Even if the statement is not modeled precisely we can hoist the load if it
3919 // does not involve any parameters that might have been specialized by the
3920 // statement domain.
3921 for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++)
3922 if (!isa<SCEVConstant>(MA->getSubscript(u)))
3923 return false;
3924 return true;
3927 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3928 if (InvMAs.empty())
3929 return;
3931 isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3932 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3934 // Get the context under which the statement is executed but remove the error
3935 // context under which this statement is reached.
3936 isl::set DomainCtx = Stmt.getDomain().params();
3937 DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3939 if (isl_set_n_basic_set(DomainCtx.get()) >= MaxDisjunctsInDomain) {
3940 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3941 invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3942 return;
3945 // Project out all parameters that relate to loads in the statement. Otherwise
3946 // we could have cyclic dependences on the constraints under which the
3947 // hoisted loads are executed and we could not determine an order in which to
3948 // pre-load them. This happens because not only lower bounds are part of the
3949 // domain but also upper bounds.
3950 for (auto &InvMA : InvMAs) {
3951 auto *MA = InvMA.MA;
3952 Instruction *AccInst = MA->getAccessInstruction();
3953 if (SE->isSCEVable(AccInst->getType())) {
3954 SetVector<Value *> Values;
3955 for (const SCEV *Parameter : Parameters) {
3956 Values.clear();
3957 findValues(Parameter, *SE, Values);
3958 if (!Values.count(AccInst))
3959 continue;
3961 if (isl::id ParamId = getIdForParam(Parameter)) {
3962 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3963 if (Dim >= 0)
3964 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3970 for (auto &InvMA : InvMAs) {
3971 auto *MA = InvMA.MA;
3972 isl::set NHCtx = InvMA.NonHoistableCtx;
3974 // Check for another invariant access that accesses the same location as
3975 // MA and if found consolidate them. Otherwise create a new equivalence
3976 // class at the end of InvariantEquivClasses.
3977 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3978 Type *Ty = LInst->getType();
3979 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3981 isl::set MAInvalidCtx = MA->getInvalidContext();
3982 bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3983 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3985 isl::set MACtx;
3986 // Check if we know that this pointer can be speculatively accessed.
3987 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3988 NonHoistableCtxIsEmpty)) {
3989 MACtx = isl::set::universe(DomainCtx.get_space());
3990 } else {
3991 MACtx = DomainCtx;
3992 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3993 MACtx = MACtx.gist_params(getContext());
3996 bool Consolidated = false;
3997 for (auto &IAClass : InvariantEquivClasses) {
3998 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3999 continue;
4001 // If the pointer and the type is equal check if the access function wrt.
4002 // to the domain is equal too. It can happen that the domain fixes
4003 // parameter values and these can be different for distinct part of the
4004 // SCoP. If this happens we cannot consolidate the loads but need to
4005 // create a new invariant load equivalence class.
4006 auto &MAs = IAClass.InvariantAccesses;
4007 if (!MAs.empty()) {
4008 auto *LastMA = MAs.front();
4010 isl::set AR = MA->getAccessRelation().range();
4011 isl::set LastAR = LastMA->getAccessRelation().range();
4012 bool SameAR = AR.is_equal(LastAR);
4014 if (!SameAR)
4015 continue;
4018 // Add MA to the list of accesses that are in this class.
4019 MAs.push_front(MA);
4021 Consolidated = true;
4023 // Unify the execution context of the class and this statement.
4024 isl::set IAClassDomainCtx = isl::manage(IAClass.ExecutionContext);
4025 if (IAClassDomainCtx)
4026 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
4027 else
4028 IAClassDomainCtx = MACtx;
4029 IAClass.ExecutionContext = IAClassDomainCtx.release();
4030 break;
4033 if (Consolidated)
4034 continue;
4036 // If we did not consolidate MA, thus did not find an equivalence class
4037 // for it, we create a new one.
4038 InvariantEquivClasses.emplace_back(InvariantEquivClassTy{
4039 PointerSCEV, MemoryAccessList{MA}, MACtx.release(), Ty});
4043 /// Check if an access range is too complex.
4045 /// An access range is too complex, if it contains either many disjuncts or
4046 /// very complex expressions. As a simple heuristic, we assume if a set to
4047 /// be too complex if the sum of existentially quantified dimensions and
4048 /// set dimensions is larger than a threshold. This reliably detects both
4049 /// sets with many disjuncts as well as sets with many divisions as they
4050 /// arise in h264.
4052 /// @param AccessRange The range to check for complexity.
4054 /// @returns True if the access range is too complex.
4055 static bool isAccessRangeTooComplex(isl::set AccessRange) {
4056 unsigned NumTotalDims = 0;
4058 auto CountDimensions = [&NumTotalDims](isl::basic_set BSet) -> isl::stat {
4059 NumTotalDims += BSet.dim(isl::dim::div);
4060 NumTotalDims += BSet.dim(isl::dim::set);
4061 return isl::stat::ok;
4064 AccessRange.foreach_basic_set(CountDimensions);
4066 if (NumTotalDims > MaxDimensionsInAccessRange)
4067 return true;
4069 return false;
4072 isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
4073 // TODO: Loads that are not loop carried, hence are in a statement with
4074 // zero iterators, are by construction invariant, though we
4075 // currently "hoist" them anyway. This is necessary because we allow
4076 // them to be treated as parameters (e.g., in conditions) and our code
4077 // generation would otherwise use the old value.
4079 auto &Stmt = *Access->getStatement();
4080 BasicBlock *BB = Stmt.getEntryBlock();
4082 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
4083 Access->isMemoryIntrinsic())
4084 return nullptr;
4086 // Skip accesses that have an invariant base pointer which is defined but
4087 // not loaded inside the SCoP. This can happened e.g., if a readnone call
4088 // returns a pointer that is used as a base address. However, as we want
4089 // to hoist indirect pointers, we allow the base pointer to be defined in
4090 // the region if it is also a memory access. Each ScopArrayInfo object
4091 // that has a base pointer origin has a base pointer that is loaded and
4092 // that it is invariant, thus it will be hoisted too. However, if there is
4093 // no base pointer origin we check that the base pointer is defined
4094 // outside the region.
4095 auto *LI = cast<LoadInst>(Access->getAccessInstruction());
4096 if (hasNonHoistableBasePtrInScop(Access, Writes))
4097 return nullptr;
4099 isl::map AccessRelation = give(Access->getAccessRelation().release());
4100 assert(!AccessRelation.is_empty());
4102 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
4103 return nullptr;
4105 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
4106 isl::set SafeToLoad;
4108 auto &DL = getFunction().getParent()->getDataLayout();
4109 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
4110 DL)) {
4111 SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
4112 } else if (BB != LI->getParent()) {
4113 // Skip accesses in non-affine subregions as they might not be executed
4114 // under the same condition as the entry of the non-affine subregion.
4115 return nullptr;
4116 } else {
4117 SafeToLoad = AccessRelation.range();
4120 if (isAccessRangeTooComplex(AccessRelation.range()))
4121 return nullptr;
4123 isl::union_map Written = Writes.intersect_range(SafeToLoad);
4124 isl::set WrittenCtx = Written.params();
4125 bool IsWritten = !WrittenCtx.is_empty();
4127 if (!IsWritten)
4128 return WrittenCtx;
4130 WrittenCtx = WrittenCtx.remove_divs();
4131 bool TooComplex =
4132 isl_set_n_basic_set(WrittenCtx.get()) >= MaxDisjunctsInDomain;
4133 if (TooComplex || !isRequiredInvariantLoad(LI))
4134 return nullptr;
4136 addAssumption(INVARIANTLOAD, WrittenCtx.copy(), LI->getDebugLoc(),
4137 AS_RESTRICTION, LI->getParent());
4138 return WrittenCtx;
4141 void Scop::verifyInvariantLoads() {
4142 auto &RIL = getRequiredInvariantLoads();
4143 for (LoadInst *LI : RIL) {
4144 assert(LI && contains(LI));
4145 // If there exists a statement in the scop which has a memory access for
4146 // @p LI, then mark this scop as infeasible for optimization.
4147 for (ScopStmt &Stmt : Stmts)
4148 if (Stmt.getArrayAccessOrNULLFor(LI)) {
4149 invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
4150 return;
4155 void Scop::hoistInvariantLoads() {
4156 if (!PollyInvariantLoadHoisting)
4157 return;
4159 isl::union_map Writes = getWrites();
4160 for (ScopStmt &Stmt : *this) {
4161 InvariantAccessesTy InvariantAccesses;
4163 for (MemoryAccess *Access : Stmt)
4164 if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
4165 InvariantAccesses.push_back({Access, NHCtx});
4167 // Transfer the memory access from the statement to the SCoP.
4168 for (auto InvMA : InvariantAccesses)
4169 Stmt.removeMemoryAccess(InvMA.MA);
4170 addInvariantLoads(Stmt, InvariantAccesses);
4174 /// Find the canonical scop array info object for a set of invariant load
4175 /// hoisted loads. The canonical array is the one that corresponds to the
4176 /// first load in the list of accesses which is used as base pointer of a
4177 /// scop array.
4178 static const ScopArrayInfo *findCanonicalArray(Scop *S,
4179 MemoryAccessList &Accesses) {
4180 for (MemoryAccess *Access : Accesses) {
4181 const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
4182 Access->getAccessInstruction(), MemoryKind::Array);
4183 if (CanonicalArray)
4184 return CanonicalArray;
4186 return nullptr;
4189 /// Check if @p Array severs as base array in an invariant load.
4190 static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
4191 for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
4192 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
4193 if (Access2->getScopArrayInfo() == Array)
4194 return true;
4195 return false;
4198 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
4199 /// with a reference to @p New.
4200 static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
4201 const ScopArrayInfo *New) {
4202 for (ScopStmt &Stmt : *S)
4203 for (MemoryAccess *Access : Stmt) {
4204 if (Access->getLatestScopArrayInfo() != Old)
4205 continue;
4207 isl::id Id = New->getBasePtrId();
4208 isl::map Map = Access->getAccessRelation();
4209 Map = Map.set_tuple_id(isl::dim::out, Id);
4210 Access->setAccessRelation(Map);
4214 void Scop::canonicalizeDynamicBasePtrs() {
4215 for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
4216 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
4218 const ScopArrayInfo *CanonicalBasePtrSAI =
4219 findCanonicalArray(this, BasePtrAccesses);
4221 if (!CanonicalBasePtrSAI)
4222 continue;
4224 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
4225 const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
4226 BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
4227 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
4228 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
4229 continue;
4231 // we currently do not canonicalize arrays where some accesses are
4232 // hoisted as invariant loads. If we would, we need to update the access
4233 // function of the invariant loads as well. However, as this is not a
4234 // very common situation, we leave this for now to avoid further
4235 // complexity increases.
4236 if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
4237 continue;
4239 replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
4244 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
4245 ArrayRef<const SCEV *> Sizes,
4246 MemoryKind Kind,
4247 const char *BaseName) {
4248 assert((BasePtr || BaseName) &&
4249 "BasePtr and BaseName can not be nullptr at the same time.");
4250 assert(!(BasePtr && BaseName) && "BaseName is redundant.");
4251 auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
4252 : ScopArrayNameMap[BaseName];
4253 if (!SAI) {
4254 auto &DL = getFunction().getParent()->getDataLayout();
4255 SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
4256 DL, this, BaseName));
4257 ScopArrayInfoSet.insert(SAI.get());
4258 } else {
4259 SAI->updateElementType(ElementType);
4260 // In case of mismatching array sizes, we bail out by setting the run-time
4261 // context to false.
4262 if (!SAI->updateSizes(Sizes))
4263 invalidate(DELINEARIZATION, DebugLoc());
4265 return SAI.get();
4268 ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
4269 const std::string &BaseName,
4270 const std::vector<unsigned> &Sizes) {
4271 auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
4272 std::vector<const SCEV *> SCEVSizes;
4274 for (auto size : Sizes)
4275 if (size)
4276 SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
4277 else
4278 SCEVSizes.push_back(nullptr);
4280 auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
4281 MemoryKind::Array, BaseName.c_str());
4282 return SAI;
4285 const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr,
4286 MemoryKind Kind) {
4287 auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
4288 return SAI;
4291 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
4292 auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind);
4293 assert(SAI && "No ScopArrayInfo available for this base pointer");
4294 return SAI;
4297 std::string Scop::getContextStr() const { return getContext().to_str(); }
4299 std::string Scop::getAssumedContextStr() const {
4300 assert(AssumedContext && "Assumed context not yet built");
4301 return stringFromIslObj(AssumedContext);
4304 std::string Scop::getInvalidContextStr() const {
4305 return stringFromIslObj(InvalidContext);
4308 std::string Scop::getNameStr() const {
4309 std::string ExitName, EntryName;
4310 std::tie(EntryName, ExitName) = getEntryExitStr();
4311 return EntryName + "---" + ExitName;
4314 std::pair<std::string, std::string> Scop::getEntryExitStr() const {
4315 std::string ExitName, EntryName;
4316 raw_string_ostream ExitStr(ExitName);
4317 raw_string_ostream EntryStr(EntryName);
4319 R.getEntry()->printAsOperand(EntryStr, false);
4320 EntryStr.str();
4322 if (R.getExit()) {
4323 R.getExit()->printAsOperand(ExitStr, false);
4324 ExitStr.str();
4325 } else
4326 ExitName = "FunctionExit";
4328 return std::make_pair(EntryName, ExitName);
4331 isl::set Scop::getContext() const { return isl::manage(isl_set_copy(Context)); }
4332 isl::space Scop::getParamSpace() const { return getContext().get_space(); }
4334 isl::space Scop::getFullParamSpace() const {
4335 std::vector<isl::id> FortranIDs;
4336 FortranIDs = getFortranArrayIds(arrays());
4338 isl::space Space = isl::space::params_alloc(
4339 getIslCtx(), ParameterIds.size() + FortranIDs.size());
4341 unsigned PDim = 0;
4342 for (const SCEV *Parameter : Parameters) {
4343 isl::id Id = getIdForParam(Parameter);
4344 Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4347 for (isl::id Id : FortranIDs)
4348 Space = Space.set_dim_id(isl::dim::param, PDim++, Id);
4350 return Space;
4353 isl::set Scop::getAssumedContext() const {
4354 assert(AssumedContext && "Assumed context not yet built");
4355 return isl::manage(isl_set_copy(AssumedContext));
4358 bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
4359 if (PollyProcessUnprofitable)
4360 return true;
4362 if (isEmpty())
4363 return false;
4365 unsigned OptimizableStmtsOrLoops = 0;
4366 for (auto &Stmt : *this) {
4367 if (Stmt.getNumIterators() == 0)
4368 continue;
4370 bool ContainsArrayAccs = false;
4371 bool ContainsScalarAccs = false;
4372 for (auto *MA : Stmt) {
4373 if (MA->isRead())
4374 continue;
4375 ContainsArrayAccs |= MA->isLatestArrayKind();
4376 ContainsScalarAccs |= MA->isLatestScalarKind();
4379 if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
4380 OptimizableStmtsOrLoops += Stmt.getNumIterators();
4383 return OptimizableStmtsOrLoops > 1;
4386 bool Scop::hasFeasibleRuntimeContext() const {
4387 auto *PositiveContext = getAssumedContext().release();
4388 auto *NegativeContext = getInvalidContext().release();
4389 PositiveContext =
4390 addNonEmptyDomainConstraints(isl::manage(PositiveContext)).release();
4391 bool IsFeasible = !(isl_set_is_empty(PositiveContext) ||
4392 isl_set_is_subset(PositiveContext, NegativeContext));
4393 isl_set_free(PositiveContext);
4394 if (!IsFeasible) {
4395 isl_set_free(NegativeContext);
4396 return false;
4399 auto *DomainContext = isl_union_set_params(getDomains().release());
4400 IsFeasible = !isl_set_is_subset(DomainContext, NegativeContext);
4401 IsFeasible &= !isl_set_is_subset(Context, NegativeContext);
4402 isl_set_free(NegativeContext);
4403 isl_set_free(DomainContext);
4405 return IsFeasible;
4408 static std::string toString(AssumptionKind Kind) {
4409 switch (Kind) {
4410 case ALIASING:
4411 return "No-aliasing";
4412 case INBOUNDS:
4413 return "Inbounds";
4414 case WRAPPING:
4415 return "No-overflows";
4416 case UNSIGNED:
4417 return "Signed-unsigned";
4418 case COMPLEXITY:
4419 return "Low complexity";
4420 case PROFITABLE:
4421 return "Profitable";
4422 case ERRORBLOCK:
4423 return "No-error";
4424 case INFINITELOOP:
4425 return "Finite loop";
4426 case INVARIANTLOAD:
4427 return "Invariant load";
4428 case DELINEARIZATION:
4429 return "Delinearization";
4431 llvm_unreachable("Unknown AssumptionKind!");
4434 bool Scop::isEffectiveAssumption(__isl_keep isl_set *Set, AssumptionSign Sign) {
4435 if (Sign == AS_ASSUMPTION) {
4436 if (isl_set_is_subset(Context, Set))
4437 return false;
4439 if (isl_set_is_subset(AssumedContext, Set))
4440 return false;
4441 } else {
4442 if (isl_set_is_disjoint(Set, Context))
4443 return false;
4445 if (isl_set_is_subset(Set, InvalidContext))
4446 return false;
4448 return true;
4451 bool Scop::trackAssumption(AssumptionKind Kind, __isl_keep isl_set *Set,
4452 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4453 if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign))
4454 return false;
4456 // Do never emit trivial assumptions as they only clutter the output.
4457 if (!PollyRemarksMinimal) {
4458 isl_set *Univ = nullptr;
4459 if (Sign == AS_ASSUMPTION)
4460 Univ = isl_set_universe(isl_set_get_space(Set));
4462 bool IsTrivial = (Sign == AS_RESTRICTION && isl_set_is_empty(Set)) ||
4463 (Sign == AS_ASSUMPTION && isl_set_is_equal(Univ, Set));
4464 isl_set_free(Univ);
4466 if (IsTrivial)
4467 return false;
4470 switch (Kind) {
4471 case ALIASING:
4472 AssumptionsAliasing++;
4473 break;
4474 case INBOUNDS:
4475 AssumptionsInbounds++;
4476 break;
4477 case WRAPPING:
4478 AssumptionsWrapping++;
4479 break;
4480 case UNSIGNED:
4481 AssumptionsUnsigned++;
4482 break;
4483 case COMPLEXITY:
4484 AssumptionsComplexity++;
4485 break;
4486 case PROFITABLE:
4487 AssumptionsUnprofitable++;
4488 break;
4489 case ERRORBLOCK:
4490 AssumptionsErrorBlock++;
4491 break;
4492 case INFINITELOOP:
4493 AssumptionsInfiniteLoop++;
4494 break;
4495 case INVARIANTLOAD:
4496 AssumptionsInvariantLoad++;
4497 break;
4498 case DELINEARIZATION:
4499 AssumptionsDelinearization++;
4500 break;
4503 auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t";
4504 std::string Msg = toString(Kind) + Suffix + stringFromIslObj(Set);
4505 if (BB)
4506 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB)
4507 << Msg);
4508 else
4509 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc,
4510 R.getEntry())
4511 << Msg);
4512 return true;
4515 void Scop::addAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4516 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4517 // Simplify the assumptions/restrictions first.
4518 Set = isl_set_gist_params(Set, getContext().release());
4520 if (!trackAssumption(Kind, Set, Loc, Sign, BB)) {
4521 isl_set_free(Set);
4522 return;
4525 if (Sign == AS_ASSUMPTION) {
4526 AssumedContext = isl_set_intersect(AssumedContext, Set);
4527 AssumedContext = isl_set_coalesce(AssumedContext);
4528 } else {
4529 InvalidContext = isl_set_union(InvalidContext, Set);
4530 InvalidContext = isl_set_coalesce(InvalidContext);
4534 void Scop::recordAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4535 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4536 assert((isl_set_is_params(Set) || BB) &&
4537 "Assumptions without a basic block must be parameter sets");
4538 RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4541 void Scop::addRecordedAssumptions() {
4542 while (!RecordedAssumptions.empty()) {
4543 const Assumption &AS = RecordedAssumptions.pop_back_val();
4545 if (!AS.BB) {
4546 addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
4547 continue;
4550 // If the domain was deleted the assumptions are void.
4551 isl_set *Dom = getDomainConditions(AS.BB).release();
4552 if (!Dom) {
4553 isl_set_free(AS.Set);
4554 continue;
4557 // If a basic block was given use its domain to simplify the assumption.
4558 // In case of restrictions we know they only have to hold on the domain,
4559 // thus we can intersect them with the domain of the block. However, for
4560 // assumptions the domain has to imply them, thus:
4561 // _ _____
4562 // Dom => S <==> A v B <==> A - B
4564 // To avoid the complement we will register A - B as a restriction not an
4565 // assumption.
4566 isl_set *S = AS.Set;
4567 if (AS.Sign == AS_RESTRICTION)
4568 S = isl_set_params(isl_set_intersect(S, Dom));
4569 else /* (AS.Sign == AS_ASSUMPTION) */
4570 S = isl_set_params(isl_set_subtract(Dom, S));
4572 addAssumption(AS.Kind, S, AS.Loc, AS_RESTRICTION, AS.BB);
4576 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
4577 DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n");
4578 addAssumption(Kind, isl_set_empty(getParamSpace().release()), Loc,
4579 AS_ASSUMPTION, BB);
4582 isl::set Scop::getInvalidContext() const {
4583 return isl::manage(isl_set_copy(InvalidContext));
4586 void Scop::printContext(raw_ostream &OS) const {
4587 OS << "Context:\n";
4588 OS.indent(4) << Context << "\n";
4590 OS.indent(4) << "Assumed Context:\n";
4591 OS.indent(4) << AssumedContext << "\n";
4593 OS.indent(4) << "Invalid Context:\n";
4594 OS.indent(4) << InvalidContext << "\n";
4596 unsigned Dim = 0;
4597 for (const SCEV *Parameter : Parameters)
4598 OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4601 void Scop::printAliasAssumptions(raw_ostream &OS) const {
4602 int noOfGroups = 0;
4603 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4604 if (Pair.second.size() == 0)
4605 noOfGroups += 1;
4606 else
4607 noOfGroups += Pair.second.size();
4610 OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4611 if (MinMaxAliasGroups.empty()) {
4612 OS.indent(8) << "n/a\n";
4613 return;
4616 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4618 // If the group has no read only accesses print the write accesses.
4619 if (Pair.second.empty()) {
4620 OS.indent(8) << "[[";
4621 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4622 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4623 << ">";
4625 OS << " ]]\n";
4628 for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4629 OS.indent(8) << "[[";
4630 OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4631 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4632 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4633 << ">";
4635 OS << " ]]\n";
4640 void Scop::printStatements(raw_ostream &OS, bool PrintInstructions) const {
4641 OS << "Statements {\n";
4643 for (const ScopStmt &Stmt : *this) {
4644 OS.indent(4);
4645 Stmt.print(OS, PrintInstructions);
4648 OS.indent(4) << "}\n";
4651 void Scop::printArrayInfo(raw_ostream &OS) const {
4652 OS << "Arrays {\n";
4654 for (auto &Array : arrays())
4655 Array->print(OS);
4657 OS.indent(4) << "}\n";
4659 OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4661 for (auto &Array : arrays())
4662 Array->print(OS, /* SizeAsPwAff */ true);
4664 OS.indent(4) << "}\n";
4667 void Scop::print(raw_ostream &OS, bool PrintInstructions) const {
4668 OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4669 OS.indent(4) << "Region: " << getNameStr() << "\n";
4670 OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
4671 OS.indent(4) << "Invariant Accesses: {\n";
4672 for (const auto &IAClass : InvariantEquivClasses) {
4673 const auto &MAs = IAClass.InvariantAccesses;
4674 if (MAs.empty()) {
4675 OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4676 } else {
4677 MAs.front()->print(OS);
4678 OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4679 << "\n";
4682 OS.indent(4) << "}\n";
4683 printContext(OS.indent(4));
4684 printArrayInfo(OS.indent(4));
4685 printAliasAssumptions(OS);
4686 printStatements(OS.indent(4), PrintInstructions);
4689 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4690 LLVM_DUMP_METHOD void Scop::dump() const { print(dbgs(), true); }
4691 #endif
4693 isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
4695 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4696 bool NonNegative) {
4697 // First try to use the SCEVAffinator to generate a piecewise defined
4698 // affine function from @p E in the context of @p BB. If that tasks becomes to
4699 // complex the affinator might return a nullptr. In such a case we invalidate
4700 // the SCoP and return a dummy value. This way we do not need to add error
4701 // handling code to all users of this function.
4702 auto PWAC = Affinator.getPwAff(E, BB);
4703 if (PWAC.first) {
4704 // TODO: We could use a heuristic and either use:
4705 // SCEVAffinator::takeNonNegativeAssumption
4706 // or
4707 // SCEVAffinator::interpretAsUnsigned
4708 // to deal with unsigned or "NonNegative" SCEVs.
4709 if (NonNegative)
4710 Affinator.takeNonNegativeAssumption(PWAC);
4711 return PWAC;
4714 auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
4715 invalidate(COMPLEXITY, DL, BB);
4716 return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4719 isl::union_set Scop::getDomains() const {
4720 isl_space *EmptySpace = isl_space_params_alloc(getIslCtx(), 0);
4721 isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4723 for (const ScopStmt &Stmt : *this)
4724 Domain = isl_union_set_add_set(Domain, Stmt.getDomain().release());
4726 return isl::manage(Domain);
4729 isl::pw_aff Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4730 PWACtx PWAC = getPwAff(E, BB);
4731 isl_set_free(PWAC.second);
4732 return isl::manage(PWAC.first);
4735 isl::union_map
4736 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4737 isl::union_map Accesses = isl::union_map::empty(getParamSpace());
4739 for (ScopStmt &Stmt : *this) {
4740 for (MemoryAccess *MA : Stmt) {
4741 if (!Predicate(*MA))
4742 continue;
4744 isl::set Domain = Stmt.getDomain();
4745 isl::map AccessDomain = MA->getAccessRelation();
4746 AccessDomain = AccessDomain.intersect_domain(Domain);
4747 Accesses = Accesses.add_map(AccessDomain);
4751 return Accesses.coalesce();
4754 isl::union_map Scop::getMustWrites() {
4755 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4758 isl::union_map Scop::getMayWrites() {
4759 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4762 isl::union_map Scop::getWrites() {
4763 return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4766 isl::union_map Scop::getReads() {
4767 return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4770 isl::union_map Scop::getAccesses() {
4771 return getAccessesOfType([](MemoryAccess &MA) { return true; });
4774 isl::union_map Scop::getAccesses(ScopArrayInfo *Array) {
4775 return getAccessesOfType(
4776 [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; });
4779 // Check whether @p Node is an extension node.
4781 // @return true if @p Node is an extension node.
4782 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4783 if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4784 return isl_bool_error;
4785 else
4786 return isl_bool_true;
4789 bool Scop::containsExtensionNode(__isl_keep isl_schedule *Schedule) {
4790 return isl_schedule_foreach_schedule_node_top_down(Schedule, isNotExtNode,
4791 nullptr) == isl_stat_error;
4794 isl::union_map Scop::getSchedule() const {
4795 auto *Tree = getScheduleTree().release();
4796 if (containsExtensionNode(Tree)) {
4797 isl_schedule_free(Tree);
4798 return nullptr;
4800 auto *S = isl_schedule_get_map(Tree);
4801 isl_schedule_free(Tree);
4802 return isl::manage(S);
4805 isl::schedule Scop::getScheduleTree() const {
4806 return isl::manage(isl_schedule_intersect_domain(isl_schedule_copy(Schedule),
4807 getDomains().release()));
4810 void Scop::setSchedule(__isl_take isl_union_map *NewSchedule) {
4811 auto *S = isl_schedule_from_domain(getDomains().release());
4812 S = isl_schedule_insert_partial_schedule(
4813 S, isl_multi_union_pw_aff_from_union_map(NewSchedule));
4814 isl_schedule_free(Schedule);
4815 Schedule = S;
4818 void Scop::setScheduleTree(__isl_take isl_schedule *NewSchedule) {
4819 isl_schedule_free(Schedule);
4820 Schedule = NewSchedule;
4823 bool Scop::restrictDomains(isl::union_set Domain) {
4824 bool Changed = false;
4825 for (ScopStmt &Stmt : *this) {
4826 isl::union_set StmtDomain = isl::union_set(Stmt.getDomain());
4827 isl::union_set NewStmtDomain = StmtDomain.intersect(Domain);
4829 if (StmtDomain.is_subset(NewStmtDomain))
4830 continue;
4832 Changed = true;
4834 NewStmtDomain = NewStmtDomain.coalesce();
4836 if (NewStmtDomain.is_empty())
4837 Stmt.restrictDomain(isl::set::empty(Stmt.getDomainSpace()));
4838 else
4839 Stmt.restrictDomain(isl::set(NewStmtDomain));
4841 return Changed;
4844 ScalarEvolution *Scop::getSE() const { return SE; }
4846 // Create an isl_multi_union_aff that defines an identity mapping from the
4847 // elements of USet to their N-th dimension.
4849 // # Example:
4851 // Domain: { A[i,j]; B[i,j,k] }
4852 // N: 1
4854 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4856 // @param USet A union set describing the elements for which to generate a
4857 // mapping.
4858 // @param N The dimension to map to.
4859 // @returns A mapping from USet to its N-th dimension.
4860 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
4861 assert(N >= 0);
4862 assert(USet);
4863 assert(!USet.is_empty());
4865 auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
4867 auto Lambda = [&Result, N](isl::set S) -> isl::stat {
4868 int Dim = S.dim(isl::dim::set);
4869 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
4870 N, Dim - N);
4871 if (N > 1)
4872 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
4874 Result = Result.add_pw_multi_aff(PMA);
4875 return isl::stat::ok;
4878 isl::stat Res = USet.foreach_set(Lambda);
4879 (void)Res;
4881 assert(Res == isl::stat::ok);
4883 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
4886 void Scop::addScopStmt(BasicBlock *BB, Loop *SurroundingLoop,
4887 std::vector<Instruction *> Instructions, int Count) {
4888 assert(BB && "Unexpected nullptr!");
4889 Stmts.emplace_back(*this, *BB, SurroundingLoop, Instructions, Count);
4890 auto *Stmt = &Stmts.back();
4891 StmtMap[BB].push_back(Stmt);
4892 for (Instruction *Inst : Instructions) {
4893 assert(!InstStmtMap.count(Inst) &&
4894 "Unexpected statement corresponding to the instruction.");
4895 InstStmtMap[Inst] = Stmt;
4899 void Scop::addScopStmt(Region *R, Loop *SurroundingLoop) {
4900 assert(R && "Unexpected nullptr!");
4901 Stmts.emplace_back(*this, *R, SurroundingLoop);
4902 auto *Stmt = &Stmts.back();
4903 for (BasicBlock *BB : R->blocks()) {
4904 StmtMap[BB].push_back(Stmt);
4905 for (Instruction &Inst : *BB) {
4906 assert(!InstStmtMap.count(&Inst) &&
4907 "Unexpected statement corresponding to the instruction.");
4908 InstStmtMap[&Inst] = Stmt;
4913 ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel,
4914 isl::set Domain) {
4915 #ifndef NDEBUG
4916 isl::set SourceDomain = SourceRel.domain();
4917 isl::set TargetDomain = TargetRel.domain();
4918 assert(Domain.is_subset(TargetDomain) &&
4919 "Target access not defined for complete statement domain");
4920 assert(Domain.is_subset(SourceDomain) &&
4921 "Source access not defined for complete statement domain");
4922 #endif
4923 Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4924 CopyStmtsNum++;
4925 return &(Stmts.back());
4928 void Scop::buildSchedule(LoopInfo &LI) {
4929 Loop *L = getLoopSurroundingScop(*this, LI);
4930 LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4931 buildSchedule(getRegion().getNode(), LoopStack, LI);
4932 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4933 Schedule = LoopStack[0].Schedule;
4936 /// To generate a schedule for the elements in a Region we traverse the Region
4937 /// in reverse-post-order and add the contained RegionNodes in traversal order
4938 /// to the schedule of the loop that is currently at the top of the LoopStack.
4939 /// For loop-free codes, this results in a correct sequential ordering.
4941 /// Example:
4942 /// bb1(0)
4943 /// / \.
4944 /// bb2(1) bb3(2)
4945 /// \ / \.
4946 /// bb4(3) bb5(4)
4947 /// \ /
4948 /// bb6(5)
4950 /// Including loops requires additional processing. Whenever a loop header is
4951 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
4952 /// from an empty schedule, we first process all RegionNodes that are within
4953 /// this loop and complete the sequential schedule at this loop-level before
4954 /// processing about any other nodes. To implement this
4955 /// loop-nodes-first-processing, the reverse post-order traversal is
4956 /// insufficient. Hence, we additionally check if the traversal yields
4957 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4958 /// These region-nodes are then queue and only traverse after the all nodes
4959 /// within the current loop have been processed.
4960 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4961 Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4963 ReversePostOrderTraversal<Region *> RTraversal(R);
4964 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4965 std::deque<RegionNode *> DelayList;
4966 bool LastRNWaiting = false;
4968 // Iterate over the region @p R in reverse post-order but queue
4969 // sub-regions/blocks iff they are not part of the last encountered but not
4970 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4971 // that we queued the last sub-region/block from the reverse post-order
4972 // iterator. If it is set we have to explore the next sub-region/block from
4973 // the iterator (if any) to guarantee progress. If it is not set we first try
4974 // the next queued sub-region/blocks.
4975 while (!WorkList.empty() || !DelayList.empty()) {
4976 RegionNode *RN;
4978 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
4979 RN = WorkList.front();
4980 WorkList.pop_front();
4981 LastRNWaiting = false;
4982 } else {
4983 RN = DelayList.front();
4984 DelayList.pop_front();
4987 Loop *L = getRegionNodeLoop(RN, LI);
4988 if (!contains(L))
4989 L = OuterScopLoop;
4991 Loop *LastLoop = LoopStack.back().L;
4992 if (LastLoop != L) {
4993 if (LastLoop && !LastLoop->contains(L)) {
4994 LastRNWaiting = true;
4995 DelayList.push_back(RN);
4996 continue;
4998 LoopStack.push_back({L, nullptr, 0});
5000 buildSchedule(RN, LoopStack, LI);
5004 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
5005 if (RN->isSubRegion()) {
5006 auto *LocalRegion = RN->getNodeAs<Region>();
5007 if (!isNonAffineSubRegion(LocalRegion)) {
5008 buildSchedule(LocalRegion, LoopStack, LI);
5009 return;
5013 auto &LoopData = LoopStack.back();
5014 LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN);
5016 for (auto *Stmt : getStmtListFor(RN)) {
5017 auto *UDomain = isl_union_set_from_set(Stmt->getDomain().release());
5018 auto *StmtSchedule = isl_schedule_from_domain(UDomain);
5019 LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule);
5022 // Check if we just processed the last node in this loop. If we did, finalize
5023 // the loop by:
5025 // - adding new schedule dimensions
5026 // - folding the resulting schedule into the parent loop schedule
5027 // - dropping the loop schedule from the LoopStack.
5029 // Then continue to check surrounding loops, which might also have been
5030 // completed by this node.
5031 while (LoopData.L &&
5032 LoopData.NumBlocksProcessed == getNumBlocksInLoop(LoopData.L)) {
5033 auto *Schedule = LoopData.Schedule;
5034 auto NumBlocksProcessed = LoopData.NumBlocksProcessed;
5036 LoopStack.pop_back();
5037 auto &NextLoopData = LoopStack.back();
5039 if (Schedule) {
5040 isl::union_set Domain = give(isl_schedule_get_domain(Schedule));
5041 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, LoopStack.size());
5042 Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA.release());
5043 NextLoopData.Schedule =
5044 combineInSequence(NextLoopData.Schedule, Schedule);
5047 NextLoopData.NumBlocksProcessed += NumBlocksProcessed;
5048 LoopData = NextLoopData;
5052 ArrayRef<ScopStmt *> Scop::getStmtListFor(BasicBlock *BB) const {
5053 auto StmtMapIt = StmtMap.find(BB);
5054 if (StmtMapIt == StmtMap.end())
5055 return {};
5056 return StmtMapIt->second;
5059 ScopStmt *Scop::getLastStmtFor(BasicBlock *BB) const {
5060 ArrayRef<ScopStmt *> StmtList = getStmtListFor(BB);
5061 if (!StmtList.empty())
5062 return StmtList.back();
5063 return nullptr;
5066 ArrayRef<ScopStmt *> Scop::getStmtListFor(RegionNode *RN) const {
5067 if (RN->isSubRegion())
5068 return getStmtListFor(RN->getNodeAs<Region>());
5069 return getStmtListFor(RN->getNodeAs<BasicBlock>());
5072 ArrayRef<ScopStmt *> Scop::getStmtListFor(Region *R) const {
5073 return getStmtListFor(R->getEntry());
5076 int Scop::getRelativeLoopDepth(const Loop *L) const {
5077 if (!L || !R.contains(L))
5078 return -1;
5079 // outermostLoopInRegion always returns nullptr for top level regions
5080 if (R.isTopLevelRegion()) {
5081 // LoopInfo's depths start at 1, we start at 0
5082 return L->getLoopDepth() - 1;
5083 } else {
5084 Loop *OuterLoop = R.outermostLoopInRegion(const_cast<Loop *>(L));
5085 assert(OuterLoop);
5086 return L->getLoopDepth() - OuterLoop->getLoopDepth();
5090 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
5091 for (auto &SAI : arrays()) {
5092 if (SAI->getName() == BaseName)
5093 return SAI;
5095 return nullptr;
5098 void Scop::addAccessData(MemoryAccess *Access) {
5099 const ScopArrayInfo *SAI = Access->getOriginalScopArrayInfo();
5100 assert(SAI && "can only use after access relations have been constructed");
5102 if (Access->isOriginalValueKind() && Access->isRead())
5103 ValueUseAccs[SAI].push_back(Access);
5104 else if (Access->isOriginalAnyPHIKind() && Access->isWrite())
5105 PHIIncomingAccs[SAI].push_back(Access);
5108 void Scop::removeAccessData(MemoryAccess *Access) {
5109 if (Access->isOriginalValueKind() && Access->isRead()) {
5110 auto &Uses = ValueUseAccs[Access->getScopArrayInfo()];
5111 std::remove(Uses.begin(), Uses.end(), Access);
5112 } else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) {
5113 auto &Incomings = PHIIncomingAccs[Access->getScopArrayInfo()];
5114 std::remove(Incomings.begin(), Incomings.end(), Access);
5118 MemoryAccess *Scop::getValueDef(const ScopArrayInfo *SAI) const {
5119 assert(SAI->isValueKind());
5121 Instruction *Val = dyn_cast<Instruction>(SAI->getBasePtr());
5122 if (!Val)
5123 return nullptr;
5125 ScopStmt *Stmt = getStmtFor(Val);
5126 if (!Stmt)
5127 return nullptr;
5129 return Stmt->lookupValueWriteOf(Val);
5132 ArrayRef<MemoryAccess *> Scop::getValueUses(const ScopArrayInfo *SAI) const {
5133 assert(SAI->isValueKind());
5134 auto It = ValueUseAccs.find(SAI);
5135 if (It == ValueUseAccs.end())
5136 return {};
5137 return It->second;
5140 MemoryAccess *Scop::getPHIRead(const ScopArrayInfo *SAI) const {
5141 assert(SAI->isPHIKind() || SAI->isExitPHIKind());
5143 if (SAI->isExitPHIKind())
5144 return nullptr;
5146 PHINode *PHI = cast<PHINode>(SAI->getBasePtr());
5147 ScopStmt *Stmt = getStmtFor(PHI);
5148 assert(Stmt && "PHINode must be within the SCoP");
5150 return Stmt->lookupPHIReadOf(PHI);
5153 ArrayRef<MemoryAccess *> Scop::getPHIIncomings(const ScopArrayInfo *SAI) const {
5154 assert(SAI->isPHIKind() || SAI->isExitPHIKind());
5155 auto It = PHIIncomingAccs.find(SAI);
5156 if (It == PHIIncomingAccs.end())
5157 return {};
5158 return It->second;
5161 bool Scop::isEscaping(Instruction *Inst) {
5162 assert(contains(Inst) && "The concept of escaping makes only sense for "
5163 "values defined inside the SCoP");
5165 for (Use &Use : Inst->uses()) {
5166 BasicBlock *UserBB = getUseBlock(Use);
5167 if (!contains(UserBB))
5168 return true;
5170 // When the SCoP region exit needs to be simplified, PHIs in the region exit
5171 // move to a new basic block such that its incoming blocks are not in the
5172 // SCoP anymore.
5173 if (hasSingleExitEdge() && isa<PHINode>(Use.getUser()) &&
5174 isExit(cast<PHINode>(Use.getUser())->getParent()))
5175 return true;
5177 return false;
5180 Scop::ScopStatistics Scop::getStatistics() const {
5181 ScopStatistics Result;
5182 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5183 auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
5185 int NumTotalLoops = LoopStat.NumLoops;
5186 Result.NumBoxedLoops = getBoxedLoops().size();
5187 Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
5189 for (const ScopStmt &Stmt : *this) {
5190 isl::set Domain = Stmt.getDomain().intersect_params(getContext());
5191 bool IsInLoop = Stmt.getNumIterators() >= 1;
5192 for (MemoryAccess *MA : Stmt) {
5193 if (!MA->isWrite())
5194 continue;
5196 if (MA->isLatestValueKind()) {
5197 Result.NumValueWrites += 1;
5198 if (IsInLoop)
5199 Result.NumValueWritesInLoops += 1;
5202 if (MA->isLatestAnyPHIKind()) {
5203 Result.NumPHIWrites += 1;
5204 if (IsInLoop)
5205 Result.NumPHIWritesInLoops += 1;
5208 isl::set AccSet =
5209 MA->getAccessRelation().intersect_domain(Domain).range();
5210 if (AccSet.is_singleton()) {
5211 Result.NumSingletonWrites += 1;
5212 if (IsInLoop)
5213 Result.NumSingletonWritesInLoops += 1;
5217 #endif
5218 return Result;
5221 raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
5222 scop.print(OS, PollyPrintInstructions);
5223 return OS;
5226 //===----------------------------------------------------------------------===//
5227 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
5228 AU.addRequired<LoopInfoWrapperPass>();
5229 AU.addRequired<RegionInfoPass>();
5230 AU.addRequired<DominatorTreeWrapperPass>();
5231 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5232 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5233 AU.addRequired<AAResultsWrapperPass>();
5234 AU.addRequired<AssumptionCacheTracker>();
5235 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5236 AU.setPreservesAll();
5239 void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
5240 Scop::ScopStatistics ScopStats) {
5241 assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
5243 NumScops++;
5244 NumLoopsInScop += Stats.NumLoops;
5245 MaxNumLoopsInScop =
5246 std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
5248 if (Stats.MaxDepth == 1)
5249 NumScopsDepthOne++;
5250 else if (Stats.MaxDepth == 2)
5251 NumScopsDepthTwo++;
5252 else if (Stats.MaxDepth == 3)
5253 NumScopsDepthThree++;
5254 else if (Stats.MaxDepth == 4)
5255 NumScopsDepthFour++;
5256 else if (Stats.MaxDepth == 5)
5257 NumScopsDepthFive++;
5258 else
5259 NumScopsDepthLarger++;
5261 NumAffineLoops += ScopStats.NumAffineLoops;
5262 NumBoxedLoops += ScopStats.NumBoxedLoops;
5264 NumValueWrites += ScopStats.NumValueWrites;
5265 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
5266 NumPHIWrites += ScopStats.NumPHIWrites;
5267 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
5268 NumSingletonWrites += ScopStats.NumSingletonWrites;
5269 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
5272 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
5273 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5275 if (!SD.isMaxRegionInScop(*R))
5276 return false;
5278 Function *F = R->getEntry()->getParent();
5279 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5280 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5281 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5282 auto const &DL = F->getParent()->getDataLayout();
5283 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5284 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
5285 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5287 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5288 S = SB.getScop(); // take ownership of scop object
5290 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5291 if (S) {
5292 ScopDetection::LoopStats Stats =
5293 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5294 updateLoopCountStatistic(Stats, S->getStatistics());
5296 #endif
5298 return false;
5301 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
5302 if (S)
5303 S->print(OS, PollyPrintInstructions);
5304 else
5305 OS << "Invalid Scop!\n";
5308 char ScopInfoRegionPass::ID = 0;
5310 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
5312 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops",
5313 "Polly - Create polyhedral description of Scops", false,
5314 false);
5315 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5316 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5317 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5318 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5319 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5320 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5321 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5322 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
5323 "Polly - Create polyhedral description of Scops", false,
5324 false)
5326 //===----------------------------------------------------------------------===//
5327 ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE,
5328 LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT,
5329 AssumptionCache &AC, OptimizationRemarkEmitter &ORE)
5330 : DL(DL), SD(SD), SE(SE), LI(LI), AA(AA), DT(DT), AC(AC), ORE(ORE) {
5331 recompute();
5334 void ScopInfo::recompute() {
5335 RegionToScopMap.clear();
5336 /// Create polyhedral description of scops for all the valid regions of a
5337 /// function.
5338 for (auto &It : SD) {
5339 Region *R = const_cast<Region *>(It);
5340 if (!SD.isMaxRegionInScop(*R))
5341 continue;
5343 ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE);
5344 std::unique_ptr<Scop> S = SB.getScop();
5345 if (!S)
5346 continue;
5347 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
5348 ScopDetection::LoopStats Stats =
5349 ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
5350 updateLoopCountStatistic(Stats, S->getStatistics());
5351 #endif
5352 bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
5353 assert(Inserted && "Building Scop for the same region twice!");
5354 (void)Inserted;
5358 bool ScopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
5359 FunctionAnalysisManager::Invalidator &Inv) {
5360 // Check whether the analysis, all analyses on functions have been preserved
5361 // or anything we're holding references to is being invalidated
5362 auto PAC = PA.getChecker<ScopInfoAnalysis>();
5363 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
5364 Inv.invalidate<ScopAnalysis>(F, PA) ||
5365 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
5366 Inv.invalidate<LoopAnalysis>(F, PA) ||
5367 Inv.invalidate<AAManager>(F, PA) ||
5368 Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
5369 Inv.invalidate<AssumptionAnalysis>(F, PA);
5372 AnalysisKey ScopInfoAnalysis::Key;
5374 ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F,
5375 FunctionAnalysisManager &FAM) {
5376 auto &SD = FAM.getResult<ScopAnalysis>(F);
5377 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
5378 auto &LI = FAM.getResult<LoopAnalysis>(F);
5379 auto &AA = FAM.getResult<AAManager>(F);
5380 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
5381 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
5382 auto &DL = F.getParent()->getDataLayout();
5383 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5384 return {DL, SD, SE, LI, AA, DT, AC, ORE};
5387 PreservedAnalyses ScopInfoPrinterPass::run(Function &F,
5388 FunctionAnalysisManager &FAM) {
5389 auto &SI = FAM.getResult<ScopInfoAnalysis>(F);
5390 // Since the legacy PM processes Scops in bottom up, we print them in reverse
5391 // order here to keep the output persistent
5392 for (auto &It : reverse(SI)) {
5393 if (It.second)
5394 It.second->print(Stream, PollyPrintInstructions);
5395 else
5396 Stream << "Invalid Scop!\n";
5398 return PreservedAnalyses::all();
5401 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5402 AU.addRequired<LoopInfoWrapperPass>();
5403 AU.addRequired<RegionInfoPass>();
5404 AU.addRequired<DominatorTreeWrapperPass>();
5405 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
5406 AU.addRequiredTransitive<ScopDetectionWrapperPass>();
5407 AU.addRequired<AAResultsWrapperPass>();
5408 AU.addRequired<AssumptionCacheTracker>();
5409 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
5410 AU.setPreservesAll();
5413 bool ScopInfoWrapperPass::runOnFunction(Function &F) {
5414 auto &SD = getAnalysis<ScopDetectionWrapperPass>().getSD();
5415 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5416 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5417 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
5418 auto const &DL = F.getParent()->getDataLayout();
5419 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5420 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5421 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5423 Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC, ORE});
5424 return false;
5427 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
5428 for (auto &It : *Result) {
5429 if (It.second)
5430 It.second->print(OS, PollyPrintInstructions);
5431 else
5432 OS << "Invalid Scop!\n";
5436 char ScopInfoWrapperPass::ID = 0;
5438 Pass *polly::createScopInfoWrapperPassPass() {
5439 return new ScopInfoWrapperPass();
5442 INITIALIZE_PASS_BEGIN(
5443 ScopInfoWrapperPass, "polly-function-scops",
5444 "Polly - Create polyhedral description of all Scops of a function", false,
5445 false);
5446 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
5447 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
5448 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
5449 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
5450 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
5451 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
5452 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
5453 INITIALIZE_PASS_END(
5454 ScopInfoWrapperPass, "polly-function-scops",
5455 "Polly - Create polyhedral description of all Scops of a function", false,
5456 false)