1 //===--------- SCEVAffinator.cpp - Create Scops from LLVM IR -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Create a polyhedral description for a SCEV value.
12 //===----------------------------------------------------------------------===//
14 #include "polly/Support/SCEVAffinator.h"
15 #include "polly/Options.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/Support/GICHelper.h"
18 #include "polly/Support/SCEVValidator.h"
19 #include "polly/Support/ScopHelper.h"
21 #include "isl/local_space.h"
26 using namespace polly
;
28 static cl::opt
<bool> IgnoreIntegerWrapping(
29 "polly-ignore-integer-wrapping",
30 cl::desc("Do not build run-time checks to proof absence of integer "
32 cl::Hidden
, cl::ZeroOrMore
, cl::init(false), cl::cat(PollyCategory
));
34 // The maximal number of basic sets we allow during the construction of a
35 // piecewise affine function. More complex ones will result in very high
37 static int const MaxDisjunctionsInPwAff
= 100;
39 // The maximal number of bits for which a general expression is modeled
41 static unsigned const MaxSmallBitWidth
= 7;
43 /// Add the number of basic sets in @p Domain to @p User
44 static isl_stat
addNumBasicSets(__isl_take isl_set
*Domain
,
45 __isl_take isl_aff
*Aff
, void *User
) {
46 auto *NumBasicSets
= static_cast<unsigned *>(User
);
47 *NumBasicSets
+= isl_set_n_basic_set(Domain
);
53 /// Determine if @p PWAC is too complex to continue.
54 static bool isTooComplex(PWACtx PWAC
) {
55 unsigned NumBasicSets
= 0;
56 isl_pw_aff_foreach_piece(PWAC
.first
.keep(), addNumBasicSets
, &NumBasicSets
);
57 if (NumBasicSets
<= MaxDisjunctionsInPwAff
)
62 /// Return the flag describing the possible wrapping of @p Expr.
63 static SCEV::NoWrapFlags
getNoWrapFlags(const SCEV
*Expr
) {
64 if (auto *NAry
= dyn_cast
<SCEVNAryExpr
>(Expr
))
65 return NAry
->getNoWrapFlags();
66 return SCEV::NoWrapMask
;
69 static PWACtx
combine(PWACtx PWAC0
, PWACtx PWAC1
,
70 __isl_give isl_pw_aff
*(Fn
)(__isl_take isl_pw_aff
*,
71 __isl_take isl_pw_aff
*)) {
72 PWAC0
.first
= isl::manage(Fn(PWAC0
.first
.take(), PWAC1
.first
.take()));
73 PWAC0
.second
= PWAC0
.second
.unite(PWAC1
.second
);
77 static __isl_give isl_pw_aff
*getWidthExpValOnDomain(unsigned Width
,
78 __isl_take isl_set
*Dom
) {
79 auto *Ctx
= isl_set_get_ctx(Dom
);
80 auto *WidthVal
= isl_val_int_from_ui(Ctx
, Width
);
81 auto *ExpVal
= isl_val_2exp(WidthVal
);
82 return isl_pw_aff_val_on_domain(Dom
, ExpVal
);
85 SCEVAffinator::SCEVAffinator(Scop
*S
, LoopInfo
&LI
)
86 : S(S
), Ctx(S
->getIslCtx().get()), SE(*S
->getSE()), LI(LI
),
87 TD(S
->getFunction().getParent()->getDataLayout()) {}
89 Loop
*SCEVAffinator::getScope() { return BB
? LI
.getLoopFor(BB
) : nullptr; }
91 void SCEVAffinator::interpretAsUnsigned(PWACtx
&PWAC
, unsigned Width
) {
92 auto *NonNegDom
= isl_pw_aff_nonneg_set(PWAC
.first
.copy());
94 isl_pw_aff_intersect_domain(PWAC
.first
.copy(), isl_set_copy(NonNegDom
));
95 auto *ExpPWA
= getWidthExpValOnDomain(Width
, isl_set_complement(NonNegDom
));
96 PWAC
.first
= isl::manage(isl_pw_aff_union_add(
97 NonNegPWA
, isl_pw_aff_add(PWAC
.first
.take(), ExpPWA
)));
100 void SCEVAffinator::takeNonNegativeAssumption(PWACtx
&PWAC
) {
101 auto *NegPWA
= isl_pw_aff_neg(PWAC
.first
.copy());
102 auto *NegDom
= isl_pw_aff_pos_set(NegPWA
);
104 isl::manage(isl_set_union(PWAC
.second
.take(), isl_set_copy(NegDom
)));
105 auto *Restriction
= BB
? NegDom
: isl_set_params(NegDom
);
106 auto DL
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
107 S
->recordAssumption(UNSIGNED
, isl::manage(Restriction
), DL
, AS_RESTRICTION
,
111 PWACtx
SCEVAffinator::getPWACtxFromPWA(isl::pw_aff PWA
) {
112 return std::make_pair(PWA
, isl::set::empty(isl::space(Ctx
, 0, NumIterators
)));
115 PWACtx
SCEVAffinator::getPwAff(const SCEV
*Expr
, BasicBlock
*BB
) {
119 auto *DC
= S
->getDomainConditions(BB
).release();
120 NumIterators
= isl_set_n_dim(DC
);
128 PWACtx
SCEVAffinator::checkForWrapping(const SCEV
*Expr
, PWACtx PWAC
) const {
129 // If the SCEV flags do contain NSW (no signed wrap) then PWA already
130 // represents Expr in modulo semantic (it is not allowed to overflow), thus we
131 // are done. Otherwise, we will compute:
132 // PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1)
133 // whereas n is the number of bits of the Expr, hence:
134 // n = bitwidth(ExprType)
136 if (IgnoreIntegerWrapping
|| (getNoWrapFlags(Expr
) & SCEV::FlagNSW
))
139 isl::pw_aff PWAMod
= addModuloSemantic(PWAC
.first
, Expr
->getType());
141 isl::set NotEqualSet
= PWAC
.first
.ne_set(PWAMod
);
142 PWAC
.second
= PWAC
.second
.unite(NotEqualSet
).coalesce();
144 const DebugLoc
&Loc
= BB
? BB
->getTerminator()->getDebugLoc() : DebugLoc();
146 NotEqualSet
= NotEqualSet
.params();
147 NotEqualSet
= NotEqualSet
.coalesce();
149 if (!NotEqualSet
.is_empty())
150 S
->recordAssumption(WRAPPING
, NotEqualSet
, Loc
, AS_RESTRICTION
, BB
);
155 isl::pw_aff
SCEVAffinator::addModuloSemantic(isl::pw_aff PWA
,
156 Type
*ExprType
) const {
157 unsigned Width
= TD
.getTypeSizeInBits(ExprType
);
159 auto ModVal
= isl::val::int_from_ui(Ctx
, Width
);
160 ModVal
= ModVal
.two_exp();
162 isl::set Domain
= PWA
.domain();
164 isl::manage(getWidthExpValOnDomain(Width
- 1, Domain
.take()));
165 return PWA
.add(AddPW
).mod(ModVal
).sub(AddPW
);
168 bool SCEVAffinator::hasNSWAddRecForLoop(Loop
*L
) const {
169 for (const auto &CachedPair
: CachedExpressions
) {
170 auto *AddRec
= dyn_cast
<SCEVAddRecExpr
>(CachedPair
.first
.first
);
173 if (AddRec
->getLoop() != L
)
175 if (AddRec
->getNoWrapFlags() & SCEV::FlagNSW
)
182 bool SCEVAffinator::computeModuloForExpr(const SCEV
*Expr
) {
183 unsigned Width
= TD
.getTypeSizeInBits(Expr
->getType());
184 // We assume nsw expressions never overflow.
185 if (auto *NAry
= dyn_cast
<SCEVNAryExpr
>(Expr
))
186 if (NAry
->getNoWrapFlags() & SCEV::FlagNSW
)
188 return Width
<= MaxSmallBitWidth
;
191 PWACtx
SCEVAffinator::visit(const SCEV
*Expr
) {
193 auto Key
= std::make_pair(Expr
, BB
);
194 PWACtx PWAC
= CachedExpressions
[Key
];
198 auto ConstantAndLeftOverPair
= extractConstantFactor(Expr
, SE
);
199 auto *Factor
= ConstantAndLeftOverPair
.first
;
200 Expr
= ConstantAndLeftOverPair
.second
;
202 auto *Scope
= getScope();
203 S
->addParams(getParamsInAffineExpr(&S
->getRegion(), Scope
, Expr
, SE
));
205 // In case the scev is a valid parameter, we do not further analyze this
206 // expression, but create a new parameter in the isl_pw_aff. This allows us
207 // to treat subexpressions that we cannot translate into an piecewise affine
208 // expression, as constant parameters of the piecewise affine expression.
209 if (isl_id
*Id
= S
->getIdForParam(Expr
).release()) {
210 isl_space
*Space
= isl_space_set_alloc(Ctx
.get(), 1, NumIterators
);
211 Space
= isl_space_set_dim_id(Space
, isl_dim_param
, 0, Id
);
213 isl_set
*Domain
= isl_set_universe(isl_space_copy(Space
));
214 isl_aff
*Affine
= isl_aff_zero_on_domain(isl_local_space_from_space(Space
));
215 Affine
= isl_aff_add_coefficient_si(Affine
, isl_dim_param
, 0, 1);
217 PWAC
= getPWACtxFromPWA(isl::manage(isl_pw_aff_alloc(Domain
, Affine
)));
219 PWAC
= SCEVVisitor
<SCEVAffinator
, PWACtx
>::visit(Expr
);
220 if (computeModuloForExpr(Expr
))
221 PWAC
.first
= addModuloSemantic(PWAC
.first
, Expr
->getType());
223 PWAC
= checkForWrapping(Expr
, PWAC
);
226 if (!Factor
->getType()->isIntegerTy(1)) {
227 PWAC
= combine(PWAC
, visitConstant(Factor
), isl_pw_aff_mul
);
228 if (computeModuloForExpr(Key
.first
))
229 PWAC
.first
= addModuloSemantic(PWAC
.first
, Expr
->getType());
232 // For compile time reasons we need to simplify the PWAC before we cache and
234 PWAC
.first
= PWAC
.first
.coalesce();
235 if (!computeModuloForExpr(Key
.first
))
236 PWAC
= checkForWrapping(Key
.first
, PWAC
);
238 CachedExpressions
[Key
] = PWAC
;
242 PWACtx
SCEVAffinator::visitConstant(const SCEVConstant
*Expr
) {
243 ConstantInt
*Value
= Expr
->getValue();
246 // LLVM does not define if an integer value is interpreted as a signed or
247 // unsigned value. Hence, without further information, it is unknown how
248 // this value needs to be converted to GMP. At the moment, we only support
249 // signed operations. So we just interpret it as signed. Later, there are
252 // 1. We always interpret any value as signed and convert the values on
254 // 2. We pass down the signedness of the calculation and use it to interpret
255 // this constant correctly.
256 v
= isl_valFromAPInt(Ctx
.get(), Value
->getValue(), /* isSigned */ true);
258 isl_space
*Space
= isl_space_set_alloc(Ctx
.get(), 0, NumIterators
);
259 isl_local_space
*ls
= isl_local_space_from_space(Space
);
260 return getPWACtxFromPWA(
261 isl::manage(isl_pw_aff_from_aff(isl_aff_val_on_domain(ls
, v
))));
265 SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr
*Expr
) {
266 // Truncate operations are basically modulo operations, thus we can
267 // model them that way. However, for large types we assume the operand
268 // to fit in the new type size instead of introducing a modulo with a very
271 auto *Op
= Expr
->getOperand();
272 auto OpPWAC
= visit(Op
);
274 unsigned Width
= TD
.getTypeSizeInBits(Expr
->getType());
276 if (computeModuloForExpr(Expr
))
279 auto *Dom
= OpPWAC
.first
.domain().take();
280 auto *ExpPWA
= getWidthExpValOnDomain(Width
- 1, Dom
);
282 isl_pw_aff_ge_set(OpPWAC
.first
.copy(), isl_pw_aff_copy(ExpPWA
));
284 isl_pw_aff_lt_set(OpPWAC
.first
.copy(), isl_pw_aff_neg(ExpPWA
));
285 auto *OutOfBoundsDom
= isl_set_union(SmallerDom
, GreaterDom
);
287 OpPWAC
.second
.unite(isl::manage(isl_set_copy(OutOfBoundsDom
)));
290 assert(isl_set_dim(OutOfBoundsDom
, isl_dim_set
) == 0 &&
291 "Expected a zero dimensional set for non-basic-block domains");
292 OutOfBoundsDom
= isl_set_params(OutOfBoundsDom
);
295 S
->recordAssumption(UNSIGNED
, isl::manage(OutOfBoundsDom
), DebugLoc(),
302 SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr
*Expr
) {
303 // A zero-extended value can be interpreted as a piecewise defined signed
304 // value. If the value was non-negative it stays the same, otherwise it
305 // is the sum of the original value and 2^n where n is the bit-width of
306 // the original (or operand) type. Examples:
307 // zext i8 127 to i32 -> { [127] }
308 // zext i8 -1 to i32 -> { [256 + (-1)] } = { [255] }
309 // zext i8 %v to i32 -> [v] -> { [v] | v >= 0; [256 + v] | v < 0 }
311 // However, LLVM/Scalar Evolution uses zero-extend (potentially lead by a
312 // truncate) to represent some forms of modulo computation. The left-hand side
313 // of the condition in the code below would result in the SCEV
314 // "zext i1 <false, +, true>for.body" which is just another description
315 // of the C expression "i & 1 != 0" or, equivalently, "i % 2 != 0".
317 // for (i = 0; i < N; i++)
318 // if (i & 1 != 0 /* == i % 2 */)
319 // /* do something */
321 // If we do not make the modulo explicit but only use the mechanism described
322 // above we will get the very restrictive assumption "N < 3", because for all
323 // values of N >= 3 the SCEVAddRecExpr operand of the zero-extend would wrap.
324 // Alternatively, we can make the modulo in the operand explicit in the
325 // resulting piecewise function and thereby avoid the assumption on N. For the
326 // example this would result in the following piecewise affine function:
327 // { [i0] -> [(1)] : 2*floor((-1 + i0)/2) = -1 + i0;
328 // [i0] -> [(0)] : 2*floor((i0)/2) = i0 }
329 // To this end we can first determine if the (immediate) operand of the
330 // zero-extend can wrap and, in case it might, we will use explicit modulo
331 // semantic to compute the result instead of emitting non-wrapping
334 // Note that operands with large bit-widths are less likely to be negative
335 // because it would result in a very large access offset or loop bound after
336 // the zero-extend. To this end one can optimistically assume the operand to
337 // be positive and avoid the piecewise definition if the bit-width is bigger
338 // than some threshold (here MaxZextSmallBitWidth).
340 // We choose to go with a hybrid solution of all modeling techniques described
341 // above. For small bit-widths (up to MaxZextSmallBitWidth) we will model the
342 // wrapping explicitly and use a piecewise defined function. However, if the
343 // bit-width is bigger than MaxZextSmallBitWidth we will employ overflow
344 // assumptions and assume the "former negative" piece will not exist.
346 auto *Op
= Expr
->getOperand();
347 auto OpPWAC
= visit(Op
);
349 // If the width is to big we assume the negative part does not occur.
350 if (!computeModuloForExpr(Op
)) {
351 takeNonNegativeAssumption(OpPWAC
);
355 // If the width is small build the piece for the non-negative part and
356 // the one for the negative part and unify them.
357 unsigned Width
= TD
.getTypeSizeInBits(Op
->getType());
358 interpretAsUnsigned(OpPWAC
, Width
);
362 PWACtx
SCEVAffinator::visitSignExtendExpr(const SCEVSignExtendExpr
*Expr
) {
363 // As all values are represented as signed, a sign extension is a noop.
364 return visit(Expr
->getOperand());
367 PWACtx
SCEVAffinator::visitAddExpr(const SCEVAddExpr
*Expr
) {
368 PWACtx Sum
= visit(Expr
->getOperand(0));
370 for (int i
= 1, e
= Expr
->getNumOperands(); i
< e
; ++i
) {
371 Sum
= combine(Sum
, visit(Expr
->getOperand(i
)), isl_pw_aff_add
);
372 if (isTooComplex(Sum
))
373 return std::make_pair(nullptr, nullptr);
379 PWACtx
SCEVAffinator::visitMulExpr(const SCEVMulExpr
*Expr
) {
380 PWACtx Prod
= visit(Expr
->getOperand(0));
382 for (int i
= 1, e
= Expr
->getNumOperands(); i
< e
; ++i
) {
383 Prod
= combine(Prod
, visit(Expr
->getOperand(i
)), isl_pw_aff_mul
);
384 if (isTooComplex(Prod
))
385 return std::make_pair(nullptr, nullptr);
391 PWACtx
SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr
*Expr
) {
392 assert(Expr
->isAffine() && "Only affine AddRecurrences allowed");
394 auto Flags
= Expr
->getNoWrapFlags();
396 // Directly generate isl_pw_aff for Expr if 'start' is zero.
397 if (Expr
->getStart()->isZero()) {
398 assert(S
->contains(Expr
->getLoop()) &&
399 "Scop does not contain the loop referenced in this AddRec");
401 PWACtx Step
= visit(Expr
->getOperand(1));
402 isl_space
*Space
= isl_space_set_alloc(Ctx
.get(), 0, NumIterators
);
403 isl_local_space
*LocalSpace
= isl_local_space_from_space(Space
);
405 unsigned loopDimension
= S
->getRelativeLoopDepth(Expr
->getLoop());
407 isl_aff
*LAff
= isl_aff_set_coefficient_si(
408 isl_aff_zero_on_domain(LocalSpace
), isl_dim_in
, loopDimension
, 1);
409 isl_pw_aff
*LPwAff
= isl_pw_aff_from_aff(LAff
);
411 Step
.first
= Step
.first
.mul(isl::manage(LPwAff
));
415 // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
416 // if 'start' is not zero.
417 // TODO: Using the original SCEV no-wrap flags is not always safe, however
418 // as our code generation is reordering the expression anyway it doesn't
420 const SCEV
*ZeroStartExpr
=
421 SE
.getAddRecExpr(SE
.getConstant(Expr
->getStart()->getType(), 0),
422 Expr
->getStepRecurrence(SE
), Expr
->getLoop(), Flags
);
424 PWACtx Result
= visit(ZeroStartExpr
);
425 PWACtx Start
= visit(Expr
->getStart());
426 Result
= combine(Result
, Start
, isl_pw_aff_add
);
430 PWACtx
SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr
*Expr
) {
431 PWACtx Max
= visit(Expr
->getOperand(0));
433 for (int i
= 1, e
= Expr
->getNumOperands(); i
< e
; ++i
) {
434 Max
= combine(Max
, visit(Expr
->getOperand(i
)), isl_pw_aff_max
);
435 if (isTooComplex(Max
))
436 return std::make_pair(nullptr, nullptr);
442 PWACtx
SCEVAffinator::visitUMaxExpr(const SCEVUMaxExpr
*Expr
) {
443 llvm_unreachable("SCEVUMaxExpr not yet supported");
446 PWACtx
SCEVAffinator::visitUDivExpr(const SCEVUDivExpr
*Expr
) {
447 // The handling of unsigned division is basically the same as for signed
448 // division, except the interpretation of the operands. As the divisor
449 // has to be constant in both cases we can simply interpret it as an
450 // unsigned value without additional complexity in the representation.
451 // For the dividend we could choose from the different representation
452 // schemes introduced for zero-extend operations but for now we will
453 // simply use an assumption.
454 auto *Dividend
= Expr
->getLHS();
455 auto *Divisor
= Expr
->getRHS();
456 assert(isa
<SCEVConstant
>(Divisor
) &&
457 "UDiv is no parameter but has a non-constant RHS.");
459 auto DividendPWAC
= visit(Dividend
);
460 auto DivisorPWAC
= visit(Divisor
);
462 if (SE
.isKnownNegative(Divisor
)) {
463 // Interpret negative divisors unsigned. This is a special case of the
464 // piece-wise defined value described for zero-extends as we already know
465 // the actual value of the constant divisor.
466 unsigned Width
= TD
.getTypeSizeInBits(Expr
->getType());
467 auto *DivisorDom
= DivisorPWAC
.first
.domain().take();
468 auto *WidthExpPWA
= getWidthExpValOnDomain(Width
, DivisorDom
);
469 DivisorPWAC
.first
= DivisorPWAC
.first
.add(isl::manage(WidthExpPWA
));
472 // TODO: One can represent the dividend as piece-wise function to be more
473 // precise but therefor a heuristic is needed.
475 // Assume a non-negative dividend.
476 takeNonNegativeAssumption(DividendPWAC
);
478 DividendPWAC
= combine(DividendPWAC
, DivisorPWAC
, isl_pw_aff_div
);
479 DividendPWAC
.first
= DividendPWAC
.first
.floor();
484 PWACtx
SCEVAffinator::visitSDivInstruction(Instruction
*SDiv
) {
485 assert(SDiv
->getOpcode() == Instruction::SDiv
&& "Assumed SDiv instruction!");
487 auto *Scope
= getScope();
488 auto *Divisor
= SDiv
->getOperand(1);
489 auto *DivisorSCEV
= SE
.getSCEVAtScope(Divisor
, Scope
);
490 auto DivisorPWAC
= visit(DivisorSCEV
);
491 assert(isa
<SCEVConstant
>(DivisorSCEV
) &&
492 "SDiv is no parameter but has a non-constant RHS.");
494 auto *Dividend
= SDiv
->getOperand(0);
495 auto *DividendSCEV
= SE
.getSCEVAtScope(Dividend
, Scope
);
496 auto DividendPWAC
= visit(DividendSCEV
);
497 DividendPWAC
= combine(DividendPWAC
, DivisorPWAC
, isl_pw_aff_tdiv_q
);
501 PWACtx
SCEVAffinator::visitSRemInstruction(Instruction
*SRem
) {
502 assert(SRem
->getOpcode() == Instruction::SRem
&& "Assumed SRem instruction!");
504 auto *Scope
= getScope();
505 auto *Divisor
= SRem
->getOperand(1);
506 auto *DivisorSCEV
= SE
.getSCEVAtScope(Divisor
, Scope
);
507 auto DivisorPWAC
= visit(DivisorSCEV
);
508 assert(isa
<ConstantInt
>(Divisor
) &&
509 "SRem is no parameter but has a non-constant RHS.");
511 auto *Dividend
= SRem
->getOperand(0);
512 auto *DividendSCEV
= SE
.getSCEVAtScope(Dividend
, Scope
);
513 auto DividendPWAC
= visit(DividendSCEV
);
514 DividendPWAC
= combine(DividendPWAC
, DivisorPWAC
, isl_pw_aff_tdiv_r
);
518 PWACtx
SCEVAffinator::visitUnknown(const SCEVUnknown
*Expr
) {
519 if (Instruction
*I
= dyn_cast
<Instruction
>(Expr
->getValue())) {
520 switch (I
->getOpcode()) {
521 case Instruction::IntToPtr
:
522 return visit(SE
.getSCEVAtScope(I
->getOperand(0), getScope()));
523 case Instruction::PtrToInt
:
524 return visit(SE
.getSCEVAtScope(I
->getOperand(0), getScope()));
525 case Instruction::SDiv
:
526 return visitSDivInstruction(I
);
527 case Instruction::SRem
:
528 return visitSRemInstruction(I
);
530 break; // Fall through.
535 "Unknowns SCEV was neither parameter nor a valid instruction.");