include/qemu/host-utils.h

   1 /*
   2  * Utility compute operations used by translated code.
   3  *
   4  * Copyright (c) 2007 Thiemo Seufer
   5  * Copyright (c) 2007 Jocelyn Mayer
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  */
  25
  26 /* Portions of this work are licensed under the terms of the GNU GPL,
  27  * version 2 or later. See the COPYING file in the top-level directory.
  28  */
  29
  30 #ifndef HOST_UTILS_H
  31 #define HOST_UTILS_H
  32
  33 #include "qemu/compiler.h"
  34 #include "qemu/bswap.h"
  35
  36 #ifdef CONFIG_INT128
  37 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
  38                           uint64_t a, uint64_t b)
  39 {
  40     __uint128_t r = (__uint128_t)a * b;
  41     *plow = r;
  42     *phigh = r >> 64;
  43 }
  44
  45 static inline void muls64(uint64_t *plow, uint64_t *phigh,
  46                           int64_t a, int64_t b)
  47 {
  48     __int128_t r = (__int128_t)a * b;
  49     *plow = r;
  50     *phigh = r >> 64;
  51 }
  52
  53 /* compute with 96 bit intermediate result: (a*b)/c */
  54 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  55 {
  56     return (__int128_t)a * b / c;
  57 }
  58
  59 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
  60                                uint64_t divisor)
  61 {
  62     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
  63     __uint128_t result = dividend / divisor;
  64
  65     *plow = result;
  66     *phigh = result >> 64;
  67     return dividend % divisor;
  68 }
  69
  70 static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
  71                               int64_t divisor)
  72 {
  73     __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
  74     __int128_t result = dividend / divisor;
  75
  76     *plow = result;
  77     *phigh = result >> 64;
  78     return dividend % divisor;
  79 }
  80 #else
  81 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  82 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
  83 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
  84 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
  85
  86 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  87 {
  88     union {
  89         uint64_t ll;
  90         struct {
  91 #ifdef HOST_WORDS_BIGENDIAN
  92             uint32_t high, low;
  93 #else
  94             uint32_t low, high;
  95 #endif
  96         } l;
  97     } u, res;
  98     uint64_t rl, rh;
  99
 100     u.ll = a;
 101     rl = (uint64_t)u.l.low * (uint64_t)b;
 102     rh = (uint64_t)u.l.high * (uint64_t)b;
 103     rh += (rl >> 32);
 104     res.l.high = rh / c;
 105     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
 106     return res.ll;
 107 }
 108 #endif
 109
 110 /**
 111  * clz32 - count leading zeros in a 32-bit value.
 112  * @val: The value to search
 113  *
 114  * Returns 32 if the value is zero.  Note that the GCC builtin is
 115  * undefined if the value is zero.
 116  */
 117 static inline int clz32(uint32_t val)
 118 {
 119     return val ? __builtin_clz(val) : 32;
 120 }
 121
 122 /**
 123  * clo32 - count leading ones in a 32-bit value.
 124  * @val: The value to search
 125  *
 126  * Returns 32 if the value is -1.
 127  */
 128 static inline int clo32(uint32_t val)
 129 {
 130     return clz32(~val);
 131 }
 132
 133 /**
 134  * clz64 - count leading zeros in a 64-bit value.
 135  * @val: The value to search
 136  *
 137  * Returns 64 if the value is zero.  Note that the GCC builtin is
 138  * undefined if the value is zero.
 139  */
 140 static inline int clz64(uint64_t val)
 141 {
 142     return val ? __builtin_clzll(val) : 64;
 143 }
 144
 145 /**
 146  * clo64 - count leading ones in a 64-bit value.
 147  * @val: The value to search
 148  *
 149  * Returns 64 if the value is -1.
 150  */
 151 static inline int clo64(uint64_t val)
 152 {
 153     return clz64(~val);
 154 }
 155
 156 /**
 157  * ctz32 - count trailing zeros in a 32-bit value.
 158  * @val: The value to search
 159  *
 160  * Returns 32 if the value is zero.  Note that the GCC builtin is
 161  * undefined if the value is zero.
 162  */
 163 static inline int ctz32(uint32_t val)
 164 {
 165     return val ? __builtin_ctz(val) : 32;
 166 }
 167
 168 /**
 169  * cto32 - count trailing ones in a 32-bit value.
 170  * @val: The value to search
 171  *
 172  * Returns 32 if the value is -1.
 173  */
 174 static inline int cto32(uint32_t val)
 175 {
 176     return ctz32(~val);
 177 }
 178
 179 /**
 180  * ctz64 - count trailing zeros in a 64-bit value.
 181  * @val: The value to search
 182  *
 183  * Returns 64 if the value is zero.  Note that the GCC builtin is
 184  * undefined if the value is zero.
 185  */
 186 static inline int ctz64(uint64_t val)
 187 {
 188     return val ? __builtin_ctzll(val) : 64;
 189 }
 190
 191 /**
 192  * cto64 - count trailing ones in a 64-bit value.
 193  * @val: The value to search
 194  *
 195  * Returns 64 if the value is -1.
 196  */
 197 static inline int cto64(uint64_t val)
 198 {
 199     return ctz64(~val);
 200 }
 201
 202 /**
 203  * clrsb32 - count leading redundant sign bits in a 32-bit value.
 204  * @val: The value to search
 205  *
 206  * Returns the number of bits following the sign bit that are equal to it.
 207  * No special cases; output range is [0-31].
 208  */
 209 static inline int clrsb32(uint32_t val)
 210 {
 211 #if __has_builtin(__builtin_clrsb) || !defined(__clang__)
 212     return __builtin_clrsb(val);
 213 #else
 214     return clz32(val ^ ((int32_t)val >> 1)) - 1;
 215 #endif
 216 }
 217
 218 /**
 219  * clrsb64 - count leading redundant sign bits in a 64-bit value.
 220  * @val: The value to search
 221  *
 222  * Returns the number of bits following the sign bit that are equal to it.
 223  * No special cases; output range is [0-63].
 224  */
 225 static inline int clrsb64(uint64_t val)
 226 {
 227 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
 228     return __builtin_clrsbll(val);
 229 #else
 230     return clz64(val ^ ((int64_t)val >> 1)) - 1;
 231 #endif
 232 }
 233
 234 /**
 235  * ctpop8 - count the population of one bits in an 8-bit value.
 236  * @val: The value to search
 237  */
 238 static inline int ctpop8(uint8_t val)
 239 {
 240     return __builtin_popcount(val);
 241 }
 242
 243 /**
 244  * ctpop16 - count the population of one bits in a 16-bit value.
 245  * @val: The value to search
 246  */
 247 static inline int ctpop16(uint16_t val)
 248 {
 249     return __builtin_popcount(val);
 250 }
 251
 252 /**
 253  * ctpop32 - count the population of one bits in a 32-bit value.
 254  * @val: The value to search
 255  */
 256 static inline int ctpop32(uint32_t val)
 257 {
 258     return __builtin_popcount(val);
 259 }
 260
 261 /**
 262  * ctpop64 - count the population of one bits in a 64-bit value.
 263  * @val: The value to search
 264  */
 265 static inline int ctpop64(uint64_t val)
 266 {
 267     return __builtin_popcountll(val);
 268 }
 269
 270 /**
 271  * revbit8 - reverse the bits in an 8-bit value.
 272  * @x: The value to modify.
 273  */
 274 static inline uint8_t revbit8(uint8_t x)
 275 {
 276 #if __has_builtin(__builtin_bitreverse8)
 277     return __builtin_bitreverse8(x);
 278 #else
 279     /* Assign the correct nibble position.  */
 280     x = ((x & 0xf0) >> 4)
 281       | ((x & 0x0f) << 4);
 282     /* Assign the correct bit position.  */
 283     x = ((x & 0x88) >> 3)
 284       | ((x & 0x44) >> 1)
 285       | ((x & 0x22) << 1)
 286       | ((x & 0x11) << 3);
 287     return x;
 288 #endif
 289 }
 290
 291 /**
 292  * revbit16 - reverse the bits in a 16-bit value.
 293  * @x: The value to modify.
 294  */
 295 static inline uint16_t revbit16(uint16_t x)
 296 {
 297 #if __has_builtin(__builtin_bitreverse16)
 298     return __builtin_bitreverse16(x);
 299 #else
 300     /* Assign the correct byte position.  */
 301     x = bswap16(x);
 302     /* Assign the correct nibble position.  */
 303     x = ((x & 0xf0f0) >> 4)
 304       | ((x & 0x0f0f) << 4);
 305     /* Assign the correct bit position.  */
 306     x = ((x & 0x8888) >> 3)
 307       | ((x & 0x4444) >> 1)
 308       | ((x & 0x2222) << 1)
 309       | ((x & 0x1111) << 3);
 310     return x;
 311 #endif
 312 }
 313
 314 /**
 315  * revbit32 - reverse the bits in a 32-bit value.
 316  * @x: The value to modify.
 317  */
 318 static inline uint32_t revbit32(uint32_t x)
 319 {
 320 #if __has_builtin(__builtin_bitreverse32)
 321     return __builtin_bitreverse32(x);
 322 #else
 323     /* Assign the correct byte position.  */
 324     x = bswap32(x);
 325     /* Assign the correct nibble position.  */
 326     x = ((x & 0xf0f0f0f0u) >> 4)
 327       | ((x & 0x0f0f0f0fu) << 4);
 328     /* Assign the correct bit position.  */
 329     x = ((x & 0x88888888u) >> 3)
 330       | ((x & 0x44444444u) >> 1)
 331       | ((x & 0x22222222u) << 1)
 332       | ((x & 0x11111111u) << 3);
 333     return x;
 334 #endif
 335 }
 336
 337 /**
 338  * revbit64 - reverse the bits in a 64-bit value.
 339  * @x: The value to modify.
 340  */
 341 static inline uint64_t revbit64(uint64_t x)
 342 {
 343 #if __has_builtin(__builtin_bitreverse64)
 344     return __builtin_bitreverse64(x);
 345 #else
 346     /* Assign the correct byte position.  */
 347     x = bswap64(x);
 348     /* Assign the correct nibble position.  */
 349     x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
 350       | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
 351     /* Assign the correct bit position.  */
 352     x = ((x & 0x8888888888888888ull) >> 3)
 353       | ((x & 0x4444444444444444ull) >> 1)
 354       | ((x & 0x2222222222222222ull) << 1)
 355       | ((x & 0x1111111111111111ull) << 3);
 356     return x;
 357 #endif
 358 }
 359
 360 /**
 361  * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
 362  */
 363 static inline uint64_t uabs64(int64_t v)
 364 {
 365     return v < 0 ? -v : v;
 366 }
 367
 368 /**
 369  * sadd32_overflow - addition with overflow indication
 370  * @x, @y: addends
 371  * @ret: Output for sum
 372  *
 373  * Computes *@ret = @x + @y, and returns true if and only if that
 374  * value has been truncated.
 375  */
 376 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
 377 {
 378 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 379     return __builtin_add_overflow(x, y, ret);
 380 #else
 381     *ret = x + y;
 382     return ((*ret ^ x) & ~(x ^ y)) < 0;
 383 #endif
 384 }
 385
 386 /**
 387  * sadd64_overflow - addition with overflow indication
 388  * @x, @y: addends
 389  * @ret: Output for sum
 390  *
 391  * Computes *@ret = @x + @y, and returns true if and only if that
 392  * value has been truncated.
 393  */
 394 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
 395 {
 396 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 397     return __builtin_add_overflow(x, y, ret);
 398 #else
 399     *ret = x + y;
 400     return ((*ret ^ x) & ~(x ^ y)) < 0;
 401 #endif
 402 }
 403
 404 /**
 405  * uadd32_overflow - addition with overflow indication
 406  * @x, @y: addends
 407  * @ret: Output for sum
 408  *
 409  * Computes *@ret = @x + @y, and returns true if and only if that
 410  * value has been truncated.
 411  */
 412 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 413 {
 414 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 415     return __builtin_add_overflow(x, y, ret);
 416 #else
 417     *ret = x + y;
 418     return *ret < x;
 419 #endif
 420 }
 421
 422 /**
 423  * uadd64_overflow - addition with overflow indication
 424  * @x, @y: addends
 425  * @ret: Output for sum
 426  *
 427  * Computes *@ret = @x + @y, and returns true if and only if that
 428  * value has been truncated.
 429  */
 430 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 431 {
 432 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 433     return __builtin_add_overflow(x, y, ret);
 434 #else
 435     *ret = x + y;
 436     return *ret < x;
 437 #endif
 438 }
 439
 440 /**
 441  * ssub32_overflow - subtraction with overflow indication
 442  * @x: Minuend
 443  * @y: Subtrahend
 444  * @ret: Output for difference
 445  *
 446  * Computes *@ret = @x - @y, and returns true if and only if that
 447  * value has been truncated.
 448  */
 449 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
 450 {
 451 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 452     return __builtin_sub_overflow(x, y, ret);
 453 #else
 454     *ret = x - y;
 455     return ((*ret ^ x) & (x ^ y)) < 0;
 456 #endif
 457 }
 458
 459 /**
 460  * ssub64_overflow - subtraction with overflow indication
 461  * @x: Minuend
 462  * @y: Subtrahend
 463  * @ret: Output for sum
 464  *
 465  * Computes *@ret = @x - @y, and returns true if and only if that
 466  * value has been truncated.
 467  */
 468 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
 469 {
 470 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 471     return __builtin_sub_overflow(x, y, ret);
 472 #else
 473     *ret = x - y;
 474     return ((*ret ^ x) & (x ^ y)) < 0;
 475 #endif
 476 }
 477
 478 /**
 479  * usub32_overflow - subtraction with overflow indication
 480  * @x: Minuend
 481  * @y: Subtrahend
 482  * @ret: Output for sum
 483  *
 484  * Computes *@ret = @x - @y, and returns true if and only if that
 485  * value has been truncated.
 486  */
 487 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 488 {
 489 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 490     return __builtin_sub_overflow(x, y, ret);
 491 #else
 492     *ret = x - y;
 493     return x < y;
 494 #endif
 495 }
 496
 497 /**
 498  * usub64_overflow - subtraction with overflow indication
 499  * @x: Minuend
 500  * @y: Subtrahend
 501  * @ret: Output for sum
 502  *
 503  * Computes *@ret = @x - @y, and returns true if and only if that
 504  * value has been truncated.
 505  */
 506 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 507 {
 508 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 509     return __builtin_sub_overflow(x, y, ret);
 510 #else
 511     *ret = x - y;
 512     return x < y;
 513 #endif
 514 }
 515
 516 /**
 517  * smul32_overflow - multiplication with overflow indication
 518  * @x, @y: Input multipliers
 519  * @ret: Output for product
 520  *
 521  * Computes *@ret = @x * @y, and returns true if and only if that
 522  * value has been truncated.
 523  */
 524 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
 525 {
 526 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 527     return __builtin_mul_overflow(x, y, ret);
 528 #else
 529     int64_t z = (int64_t)x * y;
 530     *ret = z;
 531     return *ret != z;
 532 #endif
 533 }
 534
 535 /**
 536  * smul64_overflow - multiplication with overflow indication
 537  * @x, @y: Input multipliers
 538  * @ret: Output for product
 539  *
 540  * Computes *@ret = @x * @y, and returns true if and only if that
 541  * value has been truncated.
 542  */
 543 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
 544 {
 545 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 546     return __builtin_mul_overflow(x, y, ret);
 547 #else
 548     uint64_t hi, lo;
 549     muls64(&lo, &hi, x, y);
 550     *ret = lo;
 551     return hi != ((int64_t)lo >> 63);
 552 #endif
 553 }
 554
 555 /**
 556  * umul32_overflow - multiplication with overflow indication
 557  * @x, @y: Input multipliers
 558  * @ret: Output for product
 559  *
 560  * Computes *@ret = @x * @y, and returns true if and only if that
 561  * value has been truncated.
 562  */
 563 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 564 {
 565 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 566     return __builtin_mul_overflow(x, y, ret);
 567 #else
 568     uint64_t z = (uint64_t)x * y;
 569     *ret = z;
 570     return z > UINT32_MAX;
 571 #endif
 572 }
 573
 574 /**
 575  * umul64_overflow - multiplication with overflow indication
 576  * @x, @y: Input multipliers
 577  * @ret: Output for product
 578  *
 579  * Computes *@ret = @x * @y, and returns true if and only if that
 580  * value has been truncated.
 581  */
 582 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 583 {
 584 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 585     return __builtin_mul_overflow(x, y, ret);
 586 #else
 587     uint64_t hi;
 588     mulu64(ret, &hi, x, y);
 589     return hi != 0;
 590 #endif
 591 }
 592
 593 /*
 594  * Unsigned 128x64 multiplication.
 595  * Returns true if the result got truncated to 128 bits.
 596  * Otherwise, returns false and the multiplication result via plow and phigh.
 597  */
 598 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
 599 {
 600 #if defined(CONFIG_INT128) && \
 601     (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5)
 602     bool res;
 603     __uint128_t r;
 604     __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
 605     res = __builtin_mul_overflow(f, factor, &r);
 606
 607     *plow = r;
 608     *phigh = r >> 64;
 609
 610     return res;
 611 #else
 612     uint64_t dhi = *phigh;
 613     uint64_t dlo = *plow;
 614     uint64_t ahi;
 615     uint64_t blo, bhi;
 616
 617     if (dhi == 0) {
 618         mulu64(plow, phigh, dlo, factor);
 619         return false;
 620     }
 621
 622     mulu64(plow, &ahi, dlo, factor);
 623     mulu64(&blo, &bhi, dhi, factor);
 624
 625     return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
 626 #endif
 627 }
 628
 629 /**
 630  * uadd64_carry - addition with carry-in and carry-out
 631  * @x, @y: addends
 632  * @pcarry: in-out carry value
 633  *
 634  * Computes @x + @y + *@pcarry, placing the carry-out back
 635  * into *@pcarry and returning the 64-bit sum.
 636  */
 637 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
 638 {
 639 #if __has_builtin(__builtin_addcll)
 640     unsigned long long c = *pcarry;
 641     x = __builtin_addcll(x, y, c, &c);
 642     *pcarry = c & 1;
 643     return x;
 644 #else
 645     bool c = *pcarry;
 646     /* This is clang's internal expansion of __builtin_addc. */
 647     c = uadd64_overflow(x, c, &x);
 648     c |= uadd64_overflow(x, y, &x);
 649     *pcarry = c;
 650     return x;
 651 #endif
 652 }
 653
 654 /**
 655  * usub64_borrow - subtraction with borrow-in and borrow-out
 656  * @x, @y: addends
 657  * @pborrow: in-out borrow value
 658  *
 659  * Computes @x - @y - *@pborrow, placing the borrow-out back
 660  * into *@pborrow and returning the 64-bit sum.
 661  */
 662 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
 663 {
 664 #if __has_builtin(__builtin_subcll)
 665     unsigned long long b = *pborrow;
 666     x = __builtin_subcll(x, y, b, &b);
 667     *pborrow = b & 1;
 668     return x;
 669 #else
 670     bool b = *pborrow;
 671     b = usub64_overflow(x, b, &x);
 672     b |= usub64_overflow(x, y, &x);
 673     *pborrow = b;
 674     return x;
 675 #endif
 676 }
 677
 678 /* Host type specific sizes of these routines.  */
 679
 680 #if ULONG_MAX == UINT32_MAX
 681 # define clzl   clz32
 682 # define ctzl   ctz32
 683 # define clol   clo32
 684 # define ctol   cto32
 685 # define ctpopl ctpop32
 686 # define revbitl revbit32
 687 #elif ULONG_MAX == UINT64_MAX
 688 # define clzl   clz64
 689 # define ctzl   ctz64
 690 # define clol   clo64
 691 # define ctol   cto64
 692 # define ctpopl ctpop64
 693 # define revbitl revbit64
 694 #else
 695 # error Unknown sizeof long
 696 #endif
 697
 698 static inline bool is_power_of_2(uint64_t value)
 699 {
 700     if (!value) {
 701         return false;
 702     }
 703
 704     return !(value & (value - 1));
 705 }
 706
 707 /**
 708  * Return @value rounded down to the nearest power of two or zero.
 709  */
 710 static inline uint64_t pow2floor(uint64_t value)
 711 {
 712     if (!value) {
 713         /* Avoid undefined shift by 64 */
 714         return 0;
 715     }
 716     return 0x8000000000000000ull >> clz64(value);
 717 }
 718
 719 /*
 720  * Return @value rounded up to the nearest power of two modulo 2^64.
 721  * This is *zero* for @value > 2^63, so be careful.
 722  */
 723 static inline uint64_t pow2ceil(uint64_t value)
 724 {
 725     int n = clz64(value - 1);
 726
 727     if (!n) {
 728         /*
 729          * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
 730          * Therefore, either @value == 0 or @value > 2^63.
 731          * If it's 0, return 1, else return 0.
 732          */
 733         return !value;
 734     }
 735     return 0x8000000000000000ull >> (n - 1);
 736 }
 737
 738 static inline uint32_t pow2roundup32(uint32_t x)
 739 {
 740     x |= (x >> 1);
 741     x |= (x >> 2);
 742     x |= (x >> 4);
 743     x |= (x >> 8);
 744     x |= (x >> 16);
 745     return x + 1;
 746 }
 747
 748 /**
 749  * urshift - 128-bit Unsigned Right Shift.
 750  * @plow: in/out - lower 64-bit integer.
 751  * @phigh: in/out - higher 64-bit integer.
 752  * @shift: in - bytes to shift, between 0 and 127.
 753  *
 754  * Result is zero-extended and stored in plow/phigh, which are
 755  * input/output variables. Shift values outside the range will
 756  * be mod to 128. In other words, the caller is responsible to
 757  * verify/assert both the shift range and plow/phigh pointers.
 758  */
 759 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
 760
 761 /**
 762  * ulshift - 128-bit Unsigned Left Shift.
 763  * @plow: in/out - lower 64-bit integer.
 764  * @phigh: in/out - higher 64-bit integer.
 765  * @shift: in - bytes to shift, between 0 and 127.
 766  * @overflow: out - true if any 1-bit is shifted out.
 767  *
 768  * Result is zero-extended and stored in plow/phigh, which are
 769  * input/output variables. Shift values outside the range will
 770  * be mod to 128. In other words, the caller is responsible to
 771  * verify/assert both the shift range and plow/phigh pointers.
 772  */
 773 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 774
 775 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 776  * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 777  *
 778  * Licensed under the GPLv2/LGPLv3
 779  */
 780 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
 781                                   uint64_t n0, uint64_t d)
 782 {
 783 #if defined(__x86_64__)
 784     uint64_t q;
 785     asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 786     return q;
 787 #elif defined(__s390x__) && !defined(__clang__)
 788     /* Need to use a TImode type to get an even register pair for DLGR.  */
 789     unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 790     asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 791     *r = n >> 64;
 792     return n;
 793 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 794     /* From Power ISA 2.06, programming note for divdeu.  */
 795     uint64_t q1, q2, Q, r1, r2, R;
 796     asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 797         : "=&r"(q1), "=r"(q2)
 798         : "r"(n1), "r"(n0), "r"(d));
 799     r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 800     r2 = n0 - (q2 * d);
 801     Q = q1 + q2;
 802     R = r1 + r2;
 803     if (R >= d || R < r2) { /* overflow implies R > d */
 804         Q += 1;
 805         R -= d;
 806     }
 807     *r = R;
 808     return Q;
 809 #else
 810     uint64_t d0, d1, q0, q1, r1, r0, m;
 811
 812     d0 = (uint32_t)d;
 813     d1 = d >> 32;
 814
 815     r1 = n1 % d1;
 816     q1 = n1 / d1;
 817     m = q1 * d0;
 818     r1 = (r1 << 32) | (n0 >> 32);
 819     if (r1 < m) {
 820         q1 -= 1;
 821         r1 += d;
 822         if (r1 >= d) {
 823             if (r1 < m) {
 824                 q1 -= 1;
 825                 r1 += d;
 826             }
 827         }
 828     }
 829     r1 -= m;
 830
 831     r0 = r1 % d1;
 832     q0 = r1 / d1;
 833     m = q0 * d0;
 834     r0 = (r0 << 32) | (uint32_t)n0;
 835     if (r0 < m) {
 836         q0 -= 1;
 837         r0 += d;
 838         if (r0 >= d) {
 839             if (r0 < m) {
 840                 q0 -= 1;
 841                 r0 += d;
 842             }
 843         }
 844     }
 845     r0 -= m;
 846
 847     *r = r0;
 848     return (q1 << 32) | q0;
 849 #endif
 850 }
 851
 852 #endif