Port SCEVAffinator to the isl c++ bindings
[polly-mirror.git] / lib / Support / SCEVAffinator.cpp
blob1a0e15c3d1785fb3ec6d25f193168b331fa240ce
1 //===--------- SCEVAffinator.cpp - Create Scops from LLVM IR -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Create a polyhedral description for a SCEV value.
12 //===----------------------------------------------------------------------===//
14 #include "polly/Support/SCEVAffinator.h"
15 #include "polly/Options.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/Support/GICHelper.h"
18 #include "polly/Support/SCEVValidator.h"
19 #include "polly/Support/ScopHelper.h"
20 #include "isl/aff.h"
21 #include "isl/local_space.h"
22 #include "isl/set.h"
23 #include "isl/val.h"
25 using namespace llvm;
26 using namespace polly;
28 static cl::opt<bool> IgnoreIntegerWrapping(
29 "polly-ignore-integer-wrapping",
30 cl::desc("Do not build run-time checks to proof absence of integer "
31 "wrapping"),
32 cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
34 // The maximal number of basic sets we allow during the construction of a
35 // piecewise affine function. More complex ones will result in very high
36 // compile time.
37 static int const MaxDisjunctionsInPwAff = 100;
39 // The maximal number of bits for which a general expression is modeled
40 // precisely.
41 static unsigned const MaxSmallBitWidth = 7;
43 /// Add the number of basic sets in @p Domain to @p User
44 static isl_stat addNumBasicSets(__isl_take isl_set *Domain,
45 __isl_take isl_aff *Aff, void *User) {
46 auto *NumBasicSets = static_cast<unsigned *>(User);
47 *NumBasicSets += isl_set_n_basic_set(Domain);
48 isl_set_free(Domain);
49 isl_aff_free(Aff);
50 return isl_stat_ok;
53 /// Determine if @p PWAC is too complex to continue.
54 static bool isTooComplex(PWACtx PWAC) {
55 unsigned NumBasicSets = 0;
56 isl_pw_aff_foreach_piece(PWAC.first.keep(), addNumBasicSets, &NumBasicSets);
57 if (NumBasicSets <= MaxDisjunctionsInPwAff)
58 return false;
59 return true;
62 /// Return the flag describing the possible wrapping of @p Expr.
63 static SCEV::NoWrapFlags getNoWrapFlags(const SCEV *Expr) {
64 if (auto *NAry = dyn_cast<SCEVNAryExpr>(Expr))
65 return NAry->getNoWrapFlags();
66 return SCEV::NoWrapMask;
69 static PWACtx combine(PWACtx PWAC0, PWACtx PWAC1,
70 __isl_give isl_pw_aff *(Fn)(__isl_take isl_pw_aff *,
71 __isl_take isl_pw_aff *)) {
72 PWAC0.first = isl::manage(Fn(PWAC0.first.take(), PWAC1.first.take()));
73 PWAC0.second = PWAC0.second.unite(PWAC1.second);
74 return PWAC0;
77 static __isl_give isl_pw_aff *getWidthExpValOnDomain(unsigned Width,
78 __isl_take isl_set *Dom) {
79 auto *Ctx = isl_set_get_ctx(Dom);
80 auto *WidthVal = isl_val_int_from_ui(Ctx, Width);
81 auto *ExpVal = isl_val_2exp(WidthVal);
82 return isl_pw_aff_val_on_domain(Dom, ExpVal);
85 SCEVAffinator::SCEVAffinator(Scop *S, LoopInfo &LI)
86 : S(S), Ctx(S->getIslCtx().get()), SE(*S->getSE()), LI(LI),
87 TD(S->getFunction().getParent()->getDataLayout()) {}
89 Loop *SCEVAffinator::getScope() { return BB ? LI.getLoopFor(BB) : nullptr; }
91 void SCEVAffinator::interpretAsUnsigned(PWACtx &PWAC, unsigned Width) {
92 auto *NonNegDom = isl_pw_aff_nonneg_set(PWAC.first.copy());
93 auto *NonNegPWA =
94 isl_pw_aff_intersect_domain(PWAC.first.copy(), isl_set_copy(NonNegDom));
95 auto *ExpPWA = getWidthExpValOnDomain(Width, isl_set_complement(NonNegDom));
96 PWAC.first = isl::manage(isl_pw_aff_union_add(
97 NonNegPWA, isl_pw_aff_add(PWAC.first.take(), ExpPWA)));
100 void SCEVAffinator::takeNonNegativeAssumption(PWACtx &PWAC) {
101 auto *NegPWA = isl_pw_aff_neg(PWAC.first.copy());
102 auto *NegDom = isl_pw_aff_pos_set(NegPWA);
103 PWAC.second =
104 isl::manage(isl_set_union(PWAC.second.take(), isl_set_copy(NegDom)));
105 auto *Restriction = BB ? NegDom : isl_set_params(NegDom);
106 auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
107 S->recordAssumption(UNSIGNED, isl::manage(Restriction), DL, AS_RESTRICTION,
108 BB);
111 PWACtx SCEVAffinator::getPWACtxFromPWA(isl::pw_aff PWA) {
112 return std::make_pair(PWA, isl::set::empty(isl::space(Ctx, 0, NumIterators)));
115 PWACtx SCEVAffinator::getPwAff(const SCEV *Expr, BasicBlock *BB) {
116 this->BB = BB;
118 if (BB) {
119 auto *DC = S->getDomainConditions(BB).release();
120 NumIterators = isl_set_n_dim(DC);
121 isl_set_free(DC);
122 } else
123 NumIterators = 0;
125 return visit(Expr);
128 PWACtx SCEVAffinator::checkForWrapping(const SCEV *Expr, PWACtx PWAC) const {
129 // If the SCEV flags do contain NSW (no signed wrap) then PWA already
130 // represents Expr in modulo semantic (it is not allowed to overflow), thus we
131 // are done. Otherwise, we will compute:
132 // PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1)
133 // whereas n is the number of bits of the Expr, hence:
134 // n = bitwidth(ExprType)
136 if (IgnoreIntegerWrapping || (getNoWrapFlags(Expr) & SCEV::FlagNSW))
137 return PWAC;
139 isl::pw_aff PWAMod = addModuloSemantic(PWAC.first, Expr->getType());
141 isl::set NotEqualSet = PWAC.first.ne_set(PWAMod);
142 PWAC.second = PWAC.second.unite(NotEqualSet).coalesce();
144 const DebugLoc &Loc = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
145 if (!BB)
146 NotEqualSet = NotEqualSet.params();
147 NotEqualSet = NotEqualSet.coalesce();
149 if (!NotEqualSet.is_empty())
150 S->recordAssumption(WRAPPING, NotEqualSet, Loc, AS_RESTRICTION, BB);
152 return PWAC;
155 isl::pw_aff SCEVAffinator::addModuloSemantic(isl::pw_aff PWA,
156 Type *ExprType) const {
157 unsigned Width = TD.getTypeSizeInBits(ExprType);
159 auto ModVal = isl::val::int_from_ui(Ctx, Width);
160 ModVal = ModVal.two_exp();
162 isl::set Domain = PWA.domain();
163 isl::pw_aff AddPW =
164 isl::manage(getWidthExpValOnDomain(Width - 1, Domain.take()));
165 return PWA.add(AddPW).mod(ModVal).sub(AddPW);
168 bool SCEVAffinator::hasNSWAddRecForLoop(Loop *L) const {
169 for (const auto &CachedPair : CachedExpressions) {
170 auto *AddRec = dyn_cast<SCEVAddRecExpr>(CachedPair.first.first);
171 if (!AddRec)
172 continue;
173 if (AddRec->getLoop() != L)
174 continue;
175 if (AddRec->getNoWrapFlags() & SCEV::FlagNSW)
176 return true;
179 return false;
182 bool SCEVAffinator::computeModuloForExpr(const SCEV *Expr) {
183 unsigned Width = TD.getTypeSizeInBits(Expr->getType());
184 // We assume nsw expressions never overflow.
185 if (auto *NAry = dyn_cast<SCEVNAryExpr>(Expr))
186 if (NAry->getNoWrapFlags() & SCEV::FlagNSW)
187 return false;
188 return Width <= MaxSmallBitWidth;
191 PWACtx SCEVAffinator::visit(const SCEV *Expr) {
193 auto Key = std::make_pair(Expr, BB);
194 PWACtx PWAC = CachedExpressions[Key];
195 if (PWAC.first)
196 return PWAC;
198 auto ConstantAndLeftOverPair = extractConstantFactor(Expr, SE);
199 auto *Factor = ConstantAndLeftOverPair.first;
200 Expr = ConstantAndLeftOverPair.second;
202 auto *Scope = getScope();
203 S->addParams(getParamsInAffineExpr(&S->getRegion(), Scope, Expr, SE));
205 // In case the scev is a valid parameter, we do not further analyze this
206 // expression, but create a new parameter in the isl_pw_aff. This allows us
207 // to treat subexpressions that we cannot translate into an piecewise affine
208 // expression, as constant parameters of the piecewise affine expression.
209 if (isl_id *Id = S->getIdForParam(Expr).release()) {
210 isl_space *Space = isl_space_set_alloc(Ctx.get(), 1, NumIterators);
211 Space = isl_space_set_dim_id(Space, isl_dim_param, 0, Id);
213 isl_set *Domain = isl_set_universe(isl_space_copy(Space));
214 isl_aff *Affine = isl_aff_zero_on_domain(isl_local_space_from_space(Space));
215 Affine = isl_aff_add_coefficient_si(Affine, isl_dim_param, 0, 1);
217 PWAC = getPWACtxFromPWA(isl::manage(isl_pw_aff_alloc(Domain, Affine)));
218 } else {
219 PWAC = SCEVVisitor<SCEVAffinator, PWACtx>::visit(Expr);
220 if (computeModuloForExpr(Expr))
221 PWAC.first = addModuloSemantic(PWAC.first, Expr->getType());
222 else
223 PWAC = checkForWrapping(Expr, PWAC);
226 if (!Factor->getType()->isIntegerTy(1)) {
227 PWAC = combine(PWAC, visitConstant(Factor), isl_pw_aff_mul);
228 if (computeModuloForExpr(Key.first))
229 PWAC.first = addModuloSemantic(PWAC.first, Expr->getType());
232 // For compile time reasons we need to simplify the PWAC before we cache and
233 // return it.
234 PWAC.first = PWAC.first.coalesce();
235 if (!computeModuloForExpr(Key.first))
236 PWAC = checkForWrapping(Key.first, PWAC);
238 CachedExpressions[Key] = PWAC;
239 return PWAC;
242 PWACtx SCEVAffinator::visitConstant(const SCEVConstant *Expr) {
243 ConstantInt *Value = Expr->getValue();
244 isl_val *v;
246 // LLVM does not define if an integer value is interpreted as a signed or
247 // unsigned value. Hence, without further information, it is unknown how
248 // this value needs to be converted to GMP. At the moment, we only support
249 // signed operations. So we just interpret it as signed. Later, there are
250 // two options:
252 // 1. We always interpret any value as signed and convert the values on
253 // demand.
254 // 2. We pass down the signedness of the calculation and use it to interpret
255 // this constant correctly.
256 v = isl_valFromAPInt(Ctx.get(), Value->getValue(), /* isSigned */ true);
258 isl_space *Space = isl_space_set_alloc(Ctx.get(), 0, NumIterators);
259 isl_local_space *ls = isl_local_space_from_space(Space);
260 return getPWACtxFromPWA(
261 isl::manage(isl_pw_aff_from_aff(isl_aff_val_on_domain(ls, v))));
264 PWACtx
265 SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) {
266 // Truncate operations are basically modulo operations, thus we can
267 // model them that way. However, for large types we assume the operand
268 // to fit in the new type size instead of introducing a modulo with a very
269 // large constant.
271 auto *Op = Expr->getOperand();
272 auto OpPWAC = visit(Op);
274 unsigned Width = TD.getTypeSizeInBits(Expr->getType());
276 if (computeModuloForExpr(Expr))
277 return OpPWAC;
279 auto *Dom = OpPWAC.first.domain().take();
280 auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom);
281 auto *GreaterDom =
282 isl_pw_aff_ge_set(OpPWAC.first.copy(), isl_pw_aff_copy(ExpPWA));
283 auto *SmallerDom =
284 isl_pw_aff_lt_set(OpPWAC.first.copy(), isl_pw_aff_neg(ExpPWA));
285 auto *OutOfBoundsDom = isl_set_union(SmallerDom, GreaterDom);
286 OpPWAC.second =
287 OpPWAC.second.unite(isl::manage(isl_set_copy(OutOfBoundsDom)));
289 if (!BB) {
290 assert(isl_set_dim(OutOfBoundsDom, isl_dim_set) == 0 &&
291 "Expected a zero dimensional set for non-basic-block domains");
292 OutOfBoundsDom = isl_set_params(OutOfBoundsDom);
295 S->recordAssumption(UNSIGNED, isl::manage(OutOfBoundsDom), DebugLoc(),
296 AS_RESTRICTION, BB);
298 return OpPWAC;
301 PWACtx
302 SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
303 // A zero-extended value can be interpreted as a piecewise defined signed
304 // value. If the value was non-negative it stays the same, otherwise it
305 // is the sum of the original value and 2^n where n is the bit-width of
306 // the original (or operand) type. Examples:
307 // zext i8 127 to i32 -> { [127] }
308 // zext i8 -1 to i32 -> { [256 + (-1)] } = { [255] }
309 // zext i8 %v to i32 -> [v] -> { [v] | v >= 0; [256 + v] | v < 0 }
311 // However, LLVM/Scalar Evolution uses zero-extend (potentially lead by a
312 // truncate) to represent some forms of modulo computation. The left-hand side
313 // of the condition in the code below would result in the SCEV
314 // "zext i1 <false, +, true>for.body" which is just another description
315 // of the C expression "i & 1 != 0" or, equivalently, "i % 2 != 0".
317 // for (i = 0; i < N; i++)
318 // if (i & 1 != 0 /* == i % 2 */)
319 // /* do something */
321 // If we do not make the modulo explicit but only use the mechanism described
322 // above we will get the very restrictive assumption "N < 3", because for all
323 // values of N >= 3 the SCEVAddRecExpr operand of the zero-extend would wrap.
324 // Alternatively, we can make the modulo in the operand explicit in the
325 // resulting piecewise function and thereby avoid the assumption on N. For the
326 // example this would result in the following piecewise affine function:
327 // { [i0] -> [(1)] : 2*floor((-1 + i0)/2) = -1 + i0;
328 // [i0] -> [(0)] : 2*floor((i0)/2) = i0 }
329 // To this end we can first determine if the (immediate) operand of the
330 // zero-extend can wrap and, in case it might, we will use explicit modulo
331 // semantic to compute the result instead of emitting non-wrapping
332 // assumptions.
334 // Note that operands with large bit-widths are less likely to be negative
335 // because it would result in a very large access offset or loop bound after
336 // the zero-extend. To this end one can optimistically assume the operand to
337 // be positive and avoid the piecewise definition if the bit-width is bigger
338 // than some threshold (here MaxZextSmallBitWidth).
340 // We choose to go with a hybrid solution of all modeling techniques described
341 // above. For small bit-widths (up to MaxZextSmallBitWidth) we will model the
342 // wrapping explicitly and use a piecewise defined function. However, if the
343 // bit-width is bigger than MaxZextSmallBitWidth we will employ overflow
344 // assumptions and assume the "former negative" piece will not exist.
346 auto *Op = Expr->getOperand();
347 auto OpPWAC = visit(Op);
349 // If the width is to big we assume the negative part does not occur.
350 if (!computeModuloForExpr(Op)) {
351 takeNonNegativeAssumption(OpPWAC);
352 return OpPWAC;
355 // If the width is small build the piece for the non-negative part and
356 // the one for the negative part and unify them.
357 unsigned Width = TD.getTypeSizeInBits(Op->getType());
358 interpretAsUnsigned(OpPWAC, Width);
359 return OpPWAC;
362 PWACtx SCEVAffinator::visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
363 // As all values are represented as signed, a sign extension is a noop.
364 return visit(Expr->getOperand());
367 PWACtx SCEVAffinator::visitAddExpr(const SCEVAddExpr *Expr) {
368 PWACtx Sum = visit(Expr->getOperand(0));
370 for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
371 Sum = combine(Sum, visit(Expr->getOperand(i)), isl_pw_aff_add);
372 if (isTooComplex(Sum))
373 return std::make_pair(nullptr, nullptr);
376 return Sum;
379 PWACtx SCEVAffinator::visitMulExpr(const SCEVMulExpr *Expr) {
380 PWACtx Prod = visit(Expr->getOperand(0));
382 for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
383 Prod = combine(Prod, visit(Expr->getOperand(i)), isl_pw_aff_mul);
384 if (isTooComplex(Prod))
385 return std::make_pair(nullptr, nullptr);
388 return Prod;
391 PWACtx SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
392 assert(Expr->isAffine() && "Only affine AddRecurrences allowed");
394 auto Flags = Expr->getNoWrapFlags();
396 // Directly generate isl_pw_aff for Expr if 'start' is zero.
397 if (Expr->getStart()->isZero()) {
398 assert(S->contains(Expr->getLoop()) &&
399 "Scop does not contain the loop referenced in this AddRec");
401 PWACtx Step = visit(Expr->getOperand(1));
402 isl_space *Space = isl_space_set_alloc(Ctx.get(), 0, NumIterators);
403 isl_local_space *LocalSpace = isl_local_space_from_space(Space);
405 unsigned loopDimension = S->getRelativeLoopDepth(Expr->getLoop());
407 isl_aff *LAff = isl_aff_set_coefficient_si(
408 isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
409 isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
411 Step.first = Step.first.mul(isl::manage(LPwAff));
412 return Step;
415 // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
416 // if 'start' is not zero.
417 // TODO: Using the original SCEV no-wrap flags is not always safe, however
418 // as our code generation is reordering the expression anyway it doesn't
419 // really matter.
420 const SCEV *ZeroStartExpr =
421 SE.getAddRecExpr(SE.getConstant(Expr->getStart()->getType(), 0),
422 Expr->getStepRecurrence(SE), Expr->getLoop(), Flags);
424 PWACtx Result = visit(ZeroStartExpr);
425 PWACtx Start = visit(Expr->getStart());
426 Result = combine(Result, Start, isl_pw_aff_add);
427 return Result;
430 PWACtx SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr *Expr) {
431 PWACtx Max = visit(Expr->getOperand(0));
433 for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
434 Max = combine(Max, visit(Expr->getOperand(i)), isl_pw_aff_max);
435 if (isTooComplex(Max))
436 return std::make_pair(nullptr, nullptr);
439 return Max;
442 PWACtx SCEVAffinator::visitUMaxExpr(const SCEVUMaxExpr *Expr) {
443 llvm_unreachable("SCEVUMaxExpr not yet supported");
446 PWACtx SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) {
447 // The handling of unsigned division is basically the same as for signed
448 // division, except the interpretation of the operands. As the divisor
449 // has to be constant in both cases we can simply interpret it as an
450 // unsigned value without additional complexity in the representation.
451 // For the dividend we could choose from the different representation
452 // schemes introduced for zero-extend operations but for now we will
453 // simply use an assumption.
454 auto *Dividend = Expr->getLHS();
455 auto *Divisor = Expr->getRHS();
456 assert(isa<SCEVConstant>(Divisor) &&
457 "UDiv is no parameter but has a non-constant RHS.");
459 auto DividendPWAC = visit(Dividend);
460 auto DivisorPWAC = visit(Divisor);
462 if (SE.isKnownNegative(Divisor)) {
463 // Interpret negative divisors unsigned. This is a special case of the
464 // piece-wise defined value described for zero-extends as we already know
465 // the actual value of the constant divisor.
466 unsigned Width = TD.getTypeSizeInBits(Expr->getType());
467 auto *DivisorDom = DivisorPWAC.first.domain().take();
468 auto *WidthExpPWA = getWidthExpValOnDomain(Width, DivisorDom);
469 DivisorPWAC.first = DivisorPWAC.first.add(isl::manage(WidthExpPWA));
472 // TODO: One can represent the dividend as piece-wise function to be more
473 // precise but therefor a heuristic is needed.
475 // Assume a non-negative dividend.
476 takeNonNegativeAssumption(DividendPWAC);
478 DividendPWAC = combine(DividendPWAC, DivisorPWAC, isl_pw_aff_div);
479 DividendPWAC.first = DividendPWAC.first.floor();
481 return DividendPWAC;
484 PWACtx SCEVAffinator::visitSDivInstruction(Instruction *SDiv) {
485 assert(SDiv->getOpcode() == Instruction::SDiv && "Assumed SDiv instruction!");
487 auto *Scope = getScope();
488 auto *Divisor = SDiv->getOperand(1);
489 auto *DivisorSCEV = SE.getSCEVAtScope(Divisor, Scope);
490 auto DivisorPWAC = visit(DivisorSCEV);
491 assert(isa<SCEVConstant>(DivisorSCEV) &&
492 "SDiv is no parameter but has a non-constant RHS.");
494 auto *Dividend = SDiv->getOperand(0);
495 auto *DividendSCEV = SE.getSCEVAtScope(Dividend, Scope);
496 auto DividendPWAC = visit(DividendSCEV);
497 DividendPWAC = combine(DividendPWAC, DivisorPWAC, isl_pw_aff_tdiv_q);
498 return DividendPWAC;
501 PWACtx SCEVAffinator::visitSRemInstruction(Instruction *SRem) {
502 assert(SRem->getOpcode() == Instruction::SRem && "Assumed SRem instruction!");
504 auto *Scope = getScope();
505 auto *Divisor = SRem->getOperand(1);
506 auto *DivisorSCEV = SE.getSCEVAtScope(Divisor, Scope);
507 auto DivisorPWAC = visit(DivisorSCEV);
508 assert(isa<ConstantInt>(Divisor) &&
509 "SRem is no parameter but has a non-constant RHS.");
511 auto *Dividend = SRem->getOperand(0);
512 auto *DividendSCEV = SE.getSCEVAtScope(Dividend, Scope);
513 auto DividendPWAC = visit(DividendSCEV);
514 DividendPWAC = combine(DividendPWAC, DivisorPWAC, isl_pw_aff_tdiv_r);
515 return DividendPWAC;
518 PWACtx SCEVAffinator::visitUnknown(const SCEVUnknown *Expr) {
519 if (Instruction *I = dyn_cast<Instruction>(Expr->getValue())) {
520 switch (I->getOpcode()) {
521 case Instruction::IntToPtr:
522 return visit(SE.getSCEVAtScope(I->getOperand(0), getScope()));
523 case Instruction::PtrToInt:
524 return visit(SE.getSCEVAtScope(I->getOperand(0), getScope()));
525 case Instruction::SDiv:
526 return visitSDivInstruction(I);
527 case Instruction::SRem:
528 return visitSRemInstruction(I);
529 default:
530 break; // Fall through.
534 llvm_unreachable(
535 "Unknowns SCEV was neither parameter nor a valid instruction.");