include/qemu/host-utils.h

   1 /*
   2  * Utility compute operations used by translated code.
   3  *
   4  * Copyright (c) 2007 Thiemo Seufer
   5  * Copyright (c) 2007 Jocelyn Mayer
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  */
  25
  26 /* Portions of this work are licensed under the terms of the GNU GPL,
  27  * version 2 or later. See the COPYING file in the top-level directory.
  28  */
  29
  30 #ifndef HOST_UTILS_H
  31 #define HOST_UTILS_H
  32
  33 #include "qemu/compiler.h"
  34 #include "qemu/bswap.h"
  35 #include "qemu/int128.h"
  36
  37 #ifdef CONFIG_INT128
  38 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
  39                           uint64_t a, uint64_t b)
  40 {
  41     __uint128_t r = (__uint128_t)a * b;
  42     *plow = r;
  43     *phigh = r >> 64;
  44 }
  45
  46 static inline void muls64(uint64_t *plow, uint64_t *phigh,
  47                           int64_t a, int64_t b)
  48 {
  49     __int128_t r = (__int128_t)a * b;
  50     *plow = r;
  51     *phigh = r >> 64;
  52 }
  53
  54 /* compute with 96 bit intermediate result: (a*b)/c */
  55 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  56 {
  57     return (__int128_t)a * b / c;
  58 }
  59
  60 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
  61                                uint64_t divisor)
  62 {
  63     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
  64     __uint128_t result = dividend / divisor;
  65
  66     *plow = result;
  67     *phigh = result >> 64;
  68     return dividend % divisor;
  69 }
  70
  71 static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
  72                               int64_t divisor)
  73 {
  74     __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
  75     __int128_t result = dividend / divisor;
  76
  77     *plow = result;
  78     *phigh = result >> 64;
  79     return dividend % divisor;
  80 }
  81 #else
  82 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  83 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
  84 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
  85 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
  86
  87 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  88 {
  89     union {
  90         uint64_t ll;
  91         struct {
  92 #if HOST_BIG_ENDIAN
  93             uint32_t high, low;
  94 #else
  95             uint32_t low, high;
  96 #endif
  97         } l;
  98     } u, res;
  99     uint64_t rl, rh;
 100
 101     u.ll = a;
 102     rl = (uint64_t)u.l.low * (uint64_t)b;
 103     rh = (uint64_t)u.l.high * (uint64_t)b;
 104     rh += (rl >> 32);
 105     res.l.high = rh / c;
 106     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
 107     return res.ll;
 108 }
 109 #endif
 110
 111 /**
 112  * clz32 - count leading zeros in a 32-bit value.
 113  * @val: The value to search
 114  *
 115  * Returns 32 if the value is zero.  Note that the GCC builtin is
 116  * undefined if the value is zero.
 117  */
 118 static inline int clz32(uint32_t val)
 119 {
 120     return val ? __builtin_clz(val) : 32;
 121 }
 122
 123 /**
 124  * clo32 - count leading ones in a 32-bit value.
 125  * @val: The value to search
 126  *
 127  * Returns 32 if the value is -1.
 128  */
 129 static inline int clo32(uint32_t val)
 130 {
 131     return clz32(~val);
 132 }
 133
 134 /**
 135  * clz64 - count leading zeros in a 64-bit value.
 136  * @val: The value to search
 137  *
 138  * Returns 64 if the value is zero.  Note that the GCC builtin is
 139  * undefined if the value is zero.
 140  */
 141 static inline int clz64(uint64_t val)
 142 {
 143     return val ? __builtin_clzll(val) : 64;
 144 }
 145
 146 /**
 147  * clo64 - count leading ones in a 64-bit value.
 148  * @val: The value to search
 149  *
 150  * Returns 64 if the value is -1.
 151  */
 152 static inline int clo64(uint64_t val)
 153 {
 154     return clz64(~val);
 155 }
 156
 157 /**
 158  * ctz32 - count trailing zeros in a 32-bit value.
 159  * @val: The value to search
 160  *
 161  * Returns 32 if the value is zero.  Note that the GCC builtin is
 162  * undefined if the value is zero.
 163  */
 164 static inline int ctz32(uint32_t val)
 165 {
 166     return val ? __builtin_ctz(val) : 32;
 167 }
 168
 169 /**
 170  * cto32 - count trailing ones in a 32-bit value.
 171  * @val: The value to search
 172  *
 173  * Returns 32 if the value is -1.
 174  */
 175 static inline int cto32(uint32_t val)
 176 {
 177     return ctz32(~val);
 178 }
 179
 180 /**
 181  * ctz64 - count trailing zeros in a 64-bit value.
 182  * @val: The value to search
 183  *
 184  * Returns 64 if the value is zero.  Note that the GCC builtin is
 185  * undefined if the value is zero.
 186  */
 187 static inline int ctz64(uint64_t val)
 188 {
 189     return val ? __builtin_ctzll(val) : 64;
 190 }
 191
 192 /**
 193  * cto64 - count trailing ones in a 64-bit value.
 194  * @val: The value to search
 195  *
 196  * Returns 64 if the value is -1.
 197  */
 198 static inline int cto64(uint64_t val)
 199 {
 200     return ctz64(~val);
 201 }
 202
 203 /**
 204  * clrsb32 - count leading redundant sign bits in a 32-bit value.
 205  * @val: The value to search
 206  *
 207  * Returns the number of bits following the sign bit that are equal to it.
 208  * No special cases; output range is [0-31].
 209  */
 210 static inline int clrsb32(uint32_t val)
 211 {
 212 #if __has_builtin(__builtin_clrsb) || !defined(__clang__)
 213     return __builtin_clrsb(val);
 214 #else
 215     return clz32(val ^ ((int32_t)val >> 1)) - 1;
 216 #endif
 217 }
 218
 219 /**
 220  * clrsb64 - count leading redundant sign bits in a 64-bit value.
 221  * @val: The value to search
 222  *
 223  * Returns the number of bits following the sign bit that are equal to it.
 224  * No special cases; output range is [0-63].
 225  */
 226 static inline int clrsb64(uint64_t val)
 227 {
 228 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
 229     return __builtin_clrsbll(val);
 230 #else
 231     return clz64(val ^ ((int64_t)val >> 1)) - 1;
 232 #endif
 233 }
 234
 235 /**
 236  * ctpop8 - count the population of one bits in an 8-bit value.
 237  * @val: The value to search
 238  */
 239 static inline int ctpop8(uint8_t val)
 240 {
 241     return __builtin_popcount(val);
 242 }
 243
 244 /**
 245  * ctpop16 - count the population of one bits in a 16-bit value.
 246  * @val: The value to search
 247  */
 248 static inline int ctpop16(uint16_t val)
 249 {
 250     return __builtin_popcount(val);
 251 }
 252
 253 /**
 254  * ctpop32 - count the population of one bits in a 32-bit value.
 255  * @val: The value to search
 256  */
 257 static inline int ctpop32(uint32_t val)
 258 {
 259     return __builtin_popcount(val);
 260 }
 261
 262 /**
 263  * ctpop64 - count the population of one bits in a 64-bit value.
 264  * @val: The value to search
 265  */
 266 static inline int ctpop64(uint64_t val)
 267 {
 268     return __builtin_popcountll(val);
 269 }
 270
 271 /**
 272  * revbit8 - reverse the bits in an 8-bit value.
 273  * @x: The value to modify.
 274  */
 275 static inline uint8_t revbit8(uint8_t x)
 276 {
 277 #if __has_builtin(__builtin_bitreverse8)
 278     return __builtin_bitreverse8(x);
 279 #else
 280     /* Assign the correct nibble position.  */
 281     x = ((x & 0xf0) >> 4)
 282       | ((x & 0x0f) << 4);
 283     /* Assign the correct bit position.  */
 284     x = ((x & 0x88) >> 3)
 285       | ((x & 0x44) >> 1)
 286       | ((x & 0x22) << 1)
 287       | ((x & 0x11) << 3);
 288     return x;
 289 #endif
 290 }
 291
 292 /**
 293  * revbit16 - reverse the bits in a 16-bit value.
 294  * @x: The value to modify.
 295  */
 296 static inline uint16_t revbit16(uint16_t x)
 297 {
 298 #if __has_builtin(__builtin_bitreverse16)
 299     return __builtin_bitreverse16(x);
 300 #else
 301     /* Assign the correct byte position.  */
 302     x = bswap16(x);
 303     /* Assign the correct nibble position.  */
 304     x = ((x & 0xf0f0) >> 4)
 305       | ((x & 0x0f0f) << 4);
 306     /* Assign the correct bit position.  */
 307     x = ((x & 0x8888) >> 3)
 308       | ((x & 0x4444) >> 1)
 309       | ((x & 0x2222) << 1)
 310       | ((x & 0x1111) << 3);
 311     return x;
 312 #endif
 313 }
 314
 315 /**
 316  * revbit32 - reverse the bits in a 32-bit value.
 317  * @x: The value to modify.
 318  */
 319 static inline uint32_t revbit32(uint32_t x)
 320 {
 321 #if __has_builtin(__builtin_bitreverse32)
 322     return __builtin_bitreverse32(x);
 323 #else
 324     /* Assign the correct byte position.  */
 325     x = bswap32(x);
 326     /* Assign the correct nibble position.  */
 327     x = ((x & 0xf0f0f0f0u) >> 4)
 328       | ((x & 0x0f0f0f0fu) << 4);
 329     /* Assign the correct bit position.  */
 330     x = ((x & 0x88888888u) >> 3)
 331       | ((x & 0x44444444u) >> 1)
 332       | ((x & 0x22222222u) << 1)
 333       | ((x & 0x11111111u) << 3);
 334     return x;
 335 #endif
 336 }
 337
 338 /**
 339  * revbit64 - reverse the bits in a 64-bit value.
 340  * @x: The value to modify.
 341  */
 342 static inline uint64_t revbit64(uint64_t x)
 343 {
 344 #if __has_builtin(__builtin_bitreverse64)
 345     return __builtin_bitreverse64(x);
 346 #else
 347     /* Assign the correct byte position.  */
 348     x = bswap64(x);
 349     /* Assign the correct nibble position.  */
 350     x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
 351       | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
 352     /* Assign the correct bit position.  */
 353     x = ((x & 0x8888888888888888ull) >> 3)
 354       | ((x & 0x4444444444444444ull) >> 1)
 355       | ((x & 0x2222222222222222ull) << 1)
 356       | ((x & 0x1111111111111111ull) << 3);
 357     return x;
 358 #endif
 359 }
 360
 361 /**
 362  * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
 363  */
 364 static inline uint64_t uabs64(int64_t v)
 365 {
 366     return v < 0 ? -v : v;
 367 }
 368
 369 /**
 370  * sadd32_overflow - addition with overflow indication
 371  * @x, @y: addends
 372  * @ret: Output for sum
 373  *
 374  * Computes *@ret = @x + @y, and returns true if and only if that
 375  * value has been truncated.
 376  */
 377 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
 378 {
 379     return __builtin_add_overflow(x, y, ret);
 380 }
 381
 382 /**
 383  * sadd64_overflow - addition with overflow indication
 384  * @x, @y: addends
 385  * @ret: Output for sum
 386  *
 387  * Computes *@ret = @x + @y, and returns true if and only if that
 388  * value has been truncated.
 389  */
 390 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
 391 {
 392     return __builtin_add_overflow(x, y, ret);
 393 }
 394
 395 /**
 396  * uadd32_overflow - addition with overflow indication
 397  * @x, @y: addends
 398  * @ret: Output for sum
 399  *
 400  * Computes *@ret = @x + @y, and returns true if and only if that
 401  * value has been truncated.
 402  */
 403 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 404 {
 405     return __builtin_add_overflow(x, y, ret);
 406 }
 407
 408 /**
 409  * uadd64_overflow - addition with overflow indication
 410  * @x, @y: addends
 411  * @ret: Output for sum
 412  *
 413  * Computes *@ret = @x + @y, and returns true if and only if that
 414  * value has been truncated.
 415  */
 416 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 417 {
 418     return __builtin_add_overflow(x, y, ret);
 419 }
 420
 421 /**
 422  * ssub32_overflow - subtraction with overflow indication
 423  * @x: Minuend
 424  * @y: Subtrahend
 425  * @ret: Output for difference
 426  *
 427  * Computes *@ret = @x - @y, and returns true if and only if that
 428  * value has been truncated.
 429  */
 430 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
 431 {
 432     return __builtin_sub_overflow(x, y, ret);
 433 }
 434
 435 /**
 436  * ssub64_overflow - subtraction with overflow indication
 437  * @x: Minuend
 438  * @y: Subtrahend
 439  * @ret: Output for sum
 440  *
 441  * Computes *@ret = @x - @y, and returns true if and only if that
 442  * value has been truncated.
 443  */
 444 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
 445 {
 446     return __builtin_sub_overflow(x, y, ret);
 447 }
 448
 449 /**
 450  * usub32_overflow - subtraction with overflow indication
 451  * @x: Minuend
 452  * @y: Subtrahend
 453  * @ret: Output for sum
 454  *
 455  * Computes *@ret = @x - @y, and returns true if and only if that
 456  * value has been truncated.
 457  */
 458 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 459 {
 460     return __builtin_sub_overflow(x, y, ret);
 461 }
 462
 463 /**
 464  * usub64_overflow - subtraction with overflow indication
 465  * @x: Minuend
 466  * @y: Subtrahend
 467  * @ret: Output for sum
 468  *
 469  * Computes *@ret = @x - @y, and returns true if and only if that
 470  * value has been truncated.
 471  */
 472 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 473 {
 474     return __builtin_sub_overflow(x, y, ret);
 475 }
 476
 477 /**
 478  * smul32_overflow - multiplication with overflow indication
 479  * @x, @y: Input multipliers
 480  * @ret: Output for product
 481  *
 482  * Computes *@ret = @x * @y, and returns true if and only if that
 483  * value has been truncated.
 484  */
 485 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
 486 {
 487     return __builtin_mul_overflow(x, y, ret);
 488 }
 489
 490 /**
 491  * smul64_overflow - multiplication with overflow indication
 492  * @x, @y: Input multipliers
 493  * @ret: Output for product
 494  *
 495  * Computes *@ret = @x * @y, and returns true if and only if that
 496  * value has been truncated.
 497  */
 498 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
 499 {
 500     return __builtin_mul_overflow(x, y, ret);
 501 }
 502
 503 /**
 504  * umul32_overflow - multiplication with overflow indication
 505  * @x, @y: Input multipliers
 506  * @ret: Output for product
 507  *
 508  * Computes *@ret = @x * @y, and returns true if and only if that
 509  * value has been truncated.
 510  */
 511 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 512 {
 513     return __builtin_mul_overflow(x, y, ret);
 514 }
 515
 516 /**
 517  * umul64_overflow - multiplication with overflow indication
 518  * @x, @y: Input multipliers
 519  * @ret: Output for product
 520  *
 521  * Computes *@ret = @x * @y, and returns true if and only if that
 522  * value has been truncated.
 523  */
 524 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 525 {
 526     return __builtin_mul_overflow(x, y, ret);
 527 }
 528
 529 /*
 530  * Unsigned 128x64 multiplication.
 531  * Returns true if the result got truncated to 128 bits.
 532  * Otherwise, returns false and the multiplication result via plow and phigh.
 533  */
 534 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
 535 {
 536 #if defined(CONFIG_INT128) && \
 537     (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5)
 538     bool res;
 539     __uint128_t r;
 540     __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
 541     res = __builtin_mul_overflow(f, factor, &r);
 542
 543     *plow = r;
 544     *phigh = r >> 64;
 545
 546     return res;
 547 #else
 548     uint64_t dhi = *phigh;
 549     uint64_t dlo = *plow;
 550     uint64_t ahi;
 551     uint64_t blo, bhi;
 552
 553     if (dhi == 0) {
 554         mulu64(plow, phigh, dlo, factor);
 555         return false;
 556     }
 557
 558     mulu64(plow, &ahi, dlo, factor);
 559     mulu64(&blo, &bhi, dhi, factor);
 560
 561     return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
 562 #endif
 563 }
 564
 565 /**
 566  * uadd64_carry - addition with carry-in and carry-out
 567  * @x, @y: addends
 568  * @pcarry: in-out carry value
 569  *
 570  * Computes @x + @y + *@pcarry, placing the carry-out back
 571  * into *@pcarry and returning the 64-bit sum.
 572  */
 573 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
 574 {
 575 #if __has_builtin(__builtin_addcll)
 576     unsigned long long c = *pcarry;
 577     x = __builtin_addcll(x, y, c, &c);
 578     *pcarry = c & 1;
 579     return x;
 580 #else
 581     bool c = *pcarry;
 582     /* This is clang's internal expansion of __builtin_addc. */
 583     c = uadd64_overflow(x, c, &x);
 584     c |= uadd64_overflow(x, y, &x);
 585     *pcarry = c;
 586     return x;
 587 #endif
 588 }
 589
 590 /**
 591  * usub64_borrow - subtraction with borrow-in and borrow-out
 592  * @x, @y: addends
 593  * @pborrow: in-out borrow value
 594  *
 595  * Computes @x - @y - *@pborrow, placing the borrow-out back
 596  * into *@pborrow and returning the 64-bit sum.
 597  */
 598 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
 599 {
 600 #if __has_builtin(__builtin_subcll)
 601     unsigned long long b = *pborrow;
 602     x = __builtin_subcll(x, y, b, &b);
 603     *pborrow = b & 1;
 604     return x;
 605 #else
 606     bool b = *pborrow;
 607     b = usub64_overflow(x, b, &x);
 608     b |= usub64_overflow(x, y, &x);
 609     *pborrow = b;
 610     return x;
 611 #endif
 612 }
 613
 614 /* Host type specific sizes of these routines.  */
 615
 616 #if ULONG_MAX == UINT32_MAX
 617 # define clzl   clz32
 618 # define ctzl   ctz32
 619 # define clol   clo32
 620 # define ctol   cto32
 621 # define ctpopl ctpop32
 622 # define revbitl revbit32
 623 #elif ULONG_MAX == UINT64_MAX
 624 # define clzl   clz64
 625 # define ctzl   ctz64
 626 # define clol   clo64
 627 # define ctol   cto64
 628 # define ctpopl ctpop64
 629 # define revbitl revbit64
 630 #else
 631 # error Unknown sizeof long
 632 #endif
 633
 634 static inline bool is_power_of_2(uint64_t value)
 635 {
 636     if (!value) {
 637         return false;
 638     }
 639
 640     return !(value & (value - 1));
 641 }
 642
 643 /**
 644  * Return @value rounded down to the nearest power of two or zero.
 645  */
 646 static inline uint64_t pow2floor(uint64_t value)
 647 {
 648     if (!value) {
 649         /* Avoid undefined shift by 64 */
 650         return 0;
 651     }
 652     return 0x8000000000000000ull >> clz64(value);
 653 }
 654
 655 /*
 656  * Return @value rounded up to the nearest power of two modulo 2^64.
 657  * This is *zero* for @value > 2^63, so be careful.
 658  */
 659 static inline uint64_t pow2ceil(uint64_t value)
 660 {
 661     int n = clz64(value - 1);
 662
 663     if (!n) {
 664         /*
 665          * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
 666          * Therefore, either @value == 0 or @value > 2^63.
 667          * If it's 0, return 1, else return 0.
 668          */
 669         return !value;
 670     }
 671     return 0x8000000000000000ull >> (n - 1);
 672 }
 673
 674 static inline uint32_t pow2roundup32(uint32_t x)
 675 {
 676     x |= (x >> 1);
 677     x |= (x >> 2);
 678     x |= (x >> 4);
 679     x |= (x >> 8);
 680     x |= (x >> 16);
 681     return x + 1;
 682 }
 683
 684 /**
 685  * urshift - 128-bit Unsigned Right Shift.
 686  * @plow: in/out - lower 64-bit integer.
 687  * @phigh: in/out - higher 64-bit integer.
 688  * @shift: in - bytes to shift, between 0 and 127.
 689  *
 690  * Result is zero-extended and stored in plow/phigh, which are
 691  * input/output variables. Shift values outside the range will
 692  * be mod to 128. In other words, the caller is responsible to
 693  * verify/assert both the shift range and plow/phigh pointers.
 694  */
 695 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
 696
 697 /**
 698  * ulshift - 128-bit Unsigned Left Shift.
 699  * @plow: in/out - lower 64-bit integer.
 700  * @phigh: in/out - higher 64-bit integer.
 701  * @shift: in - bytes to shift, between 0 and 127.
 702  * @overflow: out - true if any 1-bit is shifted out.
 703  *
 704  * Result is zero-extended and stored in plow/phigh, which are
 705  * input/output variables. Shift values outside the range will
 706  * be mod to 128. In other words, the caller is responsible to
 707  * verify/assert both the shift range and plow/phigh pointers.
 708  */
 709 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 710
 711 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 712  * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 713  *
 714  * Licensed under the GPLv2/LGPLv3
 715  */
 716 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
 717                                   uint64_t n0, uint64_t d)
 718 {
 719 #if defined(__x86_64__)
 720     uint64_t q;
 721     asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 722     return q;
 723 #elif defined(__s390x__) && !defined(__clang__)
 724     /* Need to use a TImode type to get an even register pair for DLGR.  */
 725     unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 726     asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 727     *r = n >> 64;
 728     return n;
 729 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 730     /* From Power ISA 2.06, programming note for divdeu.  */
 731     uint64_t q1, q2, Q, r1, r2, R;
 732     asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 733         : "=&r"(q1), "=r"(q2)
 734         : "r"(n1), "r"(n0), "r"(d));
 735     r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 736     r2 = n0 - (q2 * d);
 737     Q = q1 + q2;
 738     R = r1 + r2;
 739     if (R >= d || R < r2) { /* overflow implies R > d */
 740         Q += 1;
 741         R -= d;
 742     }
 743     *r = R;
 744     return Q;
 745 #else
 746     uint64_t d0, d1, q0, q1, r1, r0, m;
 747
 748     d0 = (uint32_t)d;
 749     d1 = d >> 32;
 750
 751     r1 = n1 % d1;
 752     q1 = n1 / d1;
 753     m = q1 * d0;
 754     r1 = (r1 << 32) | (n0 >> 32);
 755     if (r1 < m) {
 756         q1 -= 1;
 757         r1 += d;
 758         if (r1 >= d) {
 759             if (r1 < m) {
 760                 q1 -= 1;
 761                 r1 += d;
 762             }
 763         }
 764     }
 765     r1 -= m;
 766
 767     r0 = r1 % d1;
 768     q0 = r1 / d1;
 769     m = q0 * d0;
 770     r0 = (r0 << 32) | (uint32_t)n0;
 771     if (r0 < m) {
 772         q0 -= 1;
 773         r0 += d;
 774         if (r0 >= d) {
 775             if (r0 < m) {
 776                 q0 -= 1;
 777                 r0 += d;
 778             }
 779         }
 780     }
 781     r0 -= m;
 782
 783     *r = r0;
 784     return (q1 << 32) | q0;
 785 #endif
 786 }
 787
 788 Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
 789 Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
 790 #endif