js/src/jit/ReciprocalMulConstants.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
   2  * vim: set ts=8 sts=2 et sw=2 tw=80:
   3  * This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "jit/ReciprocalMulConstants.h"
   8
   9 #include "mozilla/Assertions.h"
  10
  11 using namespace js::jit;
  12
  13 ReciprocalMulConstants ReciprocalMulConstants::computeDivisionConstants(
  14     uint32_t d, int maxLog) {
  15   MOZ_ASSERT(maxLog >= 2 && maxLog <= 32);
  16   // In what follows, 0 < d < 2^maxLog and d is not a power of 2.
  17   MOZ_ASSERT(d < (uint64_t(1) << maxLog) && (d & (d - 1)) != 0);
  18
  19   // Speeding up division by non power-of-2 constants is possible by
  20   // calculating, during compilation, a value M such that high-order
  21   // bits of M*n correspond to the result of the division of n by d.
  22   // No value of M can serve this purpose for arbitrarily big values
  23   // of n but, for optimizing integer division, we're just concerned
  24   // with values of n whose absolute value is bounded (by fitting in
  25   // an integer type, say). With this in mind, we'll find a constant
  26   // M as above that works for -2^maxLog <= n < 2^maxLog; maxLog can
  27   // then be 31 for signed division or 32 for unsigned division.
  28   //
  29   // The original presentation of this technique appears in Hacker's
  30   // Delight, a book by Henry S. Warren, Jr.. A proof of correctness
  31   // for our version follows; we'll denote maxLog by L in the proof,
  32   // for conciseness.
  33   //
  34   // Formally, for |d| < 2^L, we'll compute two magic values M and s
  35   // in the ranges 0 <= M < 2^(L+1) and 0 <= s <= L such that
  36   //     (M * n) >> (32 + s) = floor(n/d)    if    0 <= n < 2^L
  37   //     (M * n) >> (32 + s) = ceil(n/d) - 1 if -2^L <= n < 0.
  38   //
  39   // Define p = 32 + s, M = ceil(2^p/d), and assume that s satisfies
  40   //                     M - 2^p/d <= 2^(p-L)/d.                 (1)
  41   // (Observe that p = CeilLog32(d) + L satisfies this, as the right
  42   // side of (1) is at least one in this case). Then,
  43   //
  44   // a) If p <= CeilLog32(d) + L, then M < 2^(L+1) - 1.
  45   // Proof: Indeed, M is monotone in p and, for p equal to the above
  46   // value, the bounds 2^L > d >= 2^(p-L-1) + 1 readily imply that
  47   //    2^p / d <  2^p/(d - 1) * (d - 1)/d
  48   //            <= 2^(L+1) * (1 - 1/d) < 2^(L+1) - 2.
  49   // The claim follows by applying the ceiling function.
  50   //
  51   // b) For any 0 <= n < 2^L, floor(Mn/2^p) = floor(n/d).
  52   // Proof: Put x = floor(Mn/2^p); it's the unique integer for which
  53   //                    Mn/2^p - 1 < x <= Mn/2^p.                (2)
  54   // Using M >= 2^p/d on the LHS and (1) on the RHS, we get
  55   //           n/d - 1 < x <= n/d + n/(2^L d) < n/d + 1/d.
  56   // Since x is an integer, it's not in the interval (n/d, (n+1)/d),
  57   // and so n/d - 1 < x <= n/d, which implies x = floor(n/d).
  58   //
  59   // c) For any -2^L <= n < 0, floor(Mn/2^p) + 1 = ceil(n/d).
  60   // Proof: The proof is similar. Equation (2) holds as above. Using
  61   // M > 2^p/d (d isn't a power of 2) on the RHS and (1) on the LHS,
  62   //                 n/d + n/(2^L d) - 1 < x < n/d.
  63   // Using n >= -2^L and summing 1,
  64   //                  n/d - 1/d < x + 1 < n/d + 1.
  65   // Since x + 1 is an integer, this implies n/d <= x + 1 < n/d + 1.
  66   // In other words, x + 1 = ceil(n/d).
  67   //
  68   // Condition (1) isn't necessary for the existence of M and s with
  69   // the properties above. Hacker's Delight provides a slightly less
  70   // restrictive condition when d >= 196611, at the cost of a 3-page
  71   // proof of correctness, for the case L = 31.
  72   //
  73   // Note that, since d*M - 2^p = d - (2^p)%d, (1) can be written as
  74   //                   2^(p-L) >= d - (2^p)%d.
  75   // In order to avoid overflow in the (2^p) % d calculation, we can
  76   // compute it as (2^p-1) % d + 1, where 2^p-1 can then be computed
  77   // without overflow as UINT64_MAX >> (64-p).
  78
  79   // We now compute the least p >= 32 with the property above...
  80   int32_t p = 32;
  81   while ((uint64_t(1) << (p - maxLog)) + (UINT64_MAX >> (64 - p)) % d + 1 < d) {
  82     p++;
  83   }
  84
  85   // ...and the corresponding M. For either the signed (L=31) or the
  86   // unsigned (L=32) case, this value can be too large (cf. item a).
  87   // Codegen can still multiply by M by multiplying by (M - 2^L) and
  88   // adjusting the value afterwards, if this is the case.
  89   ReciprocalMulConstants rmc;
  90   rmc.multiplier = (UINT64_MAX >> (64 - p)) / d + 1;
  91   rmc.shiftAmount = p - 32;
  92
  93   return rmc;
  94 }